rewrite_conformer_evalnet (#176)

LiTingyu1997 · web-flow · commit 9b734c9be896 · 2024-01-26T15:45:42.000+08:00
* conformer_change

* update readme

* update eval_net

* fix ds2 bugs
diff --git a/examples/conformer/README_CN.md b/examples/conformer/README_CN.md
@@ -80,11 +80,21 @@ python train.py --config_path ./conformer.yaml
 此样例使用 8张NPU.
 ```shell
 # Distribute_training
-mpirun -n 8 python train.py ----config_path ./conformer.yaml
+mpirun -n 8 python train.py --config_path ./conformer.yaml
 ```
-注意:如果脚本是由root用户执行的，必须在mpirun中添加——allow-run-as-root参数，如下所示:
+注意:
+
+1.采用多卡训练时需确保yaml文件中的is_distributed为True，可通过更改yaml或在命令行中添加参数进行配置。
+
+```shell
+# Distribute_training
+mpirun -n 8 python train.py --config_path ./conformer.yaml  --is_distributed True
+```
+
+2.如果脚本是由root用户执行的，必须在mpirun中添加——allow-run-as-root参数，如下所示:
+
 ```shell
-mpirun --allow-run-as-root -n 8 python train.py ----config_path ./conformer.yaml
+mpirun --allow-run-as-root -n 8 python train.py --config_path ./conformer.yaml
 ```
 
 如在GPU中进行训练，可更改yaml文件中的配置。
diff --git a/examples/conformer/asr_model.py b/examples/conformer/asr_model.py
@@ -355,11 +355,14 @@ def creadte_asr_model(config, input_dim, vocab_size):
 class create_asr_eval_net(nn.Cell):
     """Create ASR eval network."""
 
-    def __init__(self, network):
+    def __init__(self, network, device_num):
         super(create_asr_eval_net, self).__init__()
         self.network = network
-        self.device_num = 1
-        self.all_reduce = None
+        self.device_num = device_num
+        if device_num > 1:
+            self.all_reduce = ops.AllReduce()
+        else:
+            self.all_reduce = None
 
     def construct(self, *inputs, **kwargs):
         loss = self.network(*inputs, **kwargs)
diff --git a/examples/conformer/conformer.yaml b/examples/conformer/conformer.yaml
@@ -102,7 +102,7 @@ is_distributed: False
 mixed_precision: True
 resume_ckpt: ""
 save_graphs: False
-training_with_eval: True
+training_with_eval: False
 
 # decode option
 test_data: "/data/test.csv"
diff --git a/examples/conformer/dataset.py b/examples/conformer/dataset.py
@@ -694,7 +694,7 @@ def create_dataset(
         group_size=group_size,
     )
 
-    sampler = DistributedSampler(dataset, rank, group_size, shuffle=True)
+    sampler = DistributedSampler(dataset, rank, group_size, shuffle=True, group=False)
 
     ds = de.GeneratorDataset(
         dataset,
diff --git a/examples/conformer/train.py b/examples/conformer/train.py
@@ -145,7 +145,7 @@ def train():
     ]
 
     if config.training_with_eval:
-        eval_net = create_asr_eval_net(net_with_loss)
+        eval_net = create_asr_eval_net(net_with_loss, device_num)
         callback_list.append(
             EvalCallback(
                 eval_net,
diff --git a/examples/deepspeech2/README_CN.md b/examples/deepspeech2/README_CN.md
@@ -34,16 +34,15 @@ DeepSpeech2是一种采用CTC损失训练的语音识别模型。它用神经网
 如为未下载数据集，可使用提供的脚本进行一键下载以及数据准备，如下所示：
 
 ```shell
-cd mindaudio/data
 # Download and creat json
-python librispeech_prepare.py --root_path "your_data_path"
+python mindaudio/data/librispeech.py --root_path "your_data_path"
 ```
 
 如已下载好压缩文件，请按如下命令操作：
 
 ```shell
 # creat json
-python librispeech_prepare.py --root_path "your_data_path"  --data_ready True
+python mindaudio/data/librispeech.py --root_path "your_data_path"  --data_ready True
 ```
 
 LibriSpeech存储flac音频格式的文件。要在MindAudio中使用它们，须将所有flac文件转换为wav文件，用户可以使用[ffmpeg](https://gist.github.com/seungwonpark/4f273739beef2691cd53b5c39629d830)或[sox](https://sourceforge.net/projects/sox/)进行转换。
@@ -94,19 +93,13 @@ mpirun -n 8 python train.py -c "./deepspeech2.yaml"
 mpirun --allow-run-as-root -n 8 python train.py -c "./deepspeech2.yaml"
 ```
 
-#### 在GPU上进行多卡训练
-If you want to use the GPU for distributed training, see the following command：
-```shell
-# Distribute_training
-# assume you have 8 GPUs
-mpirun -n 8 python train.py -c "./deepspeech2.yaml" --device_target "GPU"
-```
 
 ### 3.评估模型
 
+将训好的权重地址更新在deepspeech2.yaml配置文件Pretrained_model中，执行以下命令
 ```shell
 # Validate a trained model
-python eval.py -c "./deepspeech2.yaml" --pre_trained_model_path "xx.ckpt"
+python eval.py -c "./deepspeech2.yaml"
 ```
 
 
diff --git a/examples/deepspeech2/deepspeech2.yaml b/examples/deepspeech2/deepspeech2.yaml
@@ -45,7 +45,7 @@ EvalConfig:
     save_output: 'librispeech_val_output'
 
 # use to finetune or eval model
-Pretrained_model: './ckpt'
+Pretrained_model: ''
 
 labels: ["'", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
          "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", " ", "_"]
diff --git a/examples/deepspeech2/eval.py b/examples/deepspeech2/eval.py
@@ -6,12 +6,12 @@
 import mindspore.ops as ops
 import numpy as np
 from dataset import create_dataset
-from hparams import parse_args
 from mindspore import context, nn
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 
 from mindaudio.models.decoders.greedydecoder import MSGreedyDecoder
 from mindaudio.models.deepspeech2 import DeepSpeechModel
+from mindaudio.utils.hparams import parse_args
 
 
 class PredictWithSoftmax(nn.Cell):
@@ -73,10 +73,7 @@ def construct(self, inputs, input_length):
     load_param_into_net(model, param_dict)
     print("Successfully loading the pre-trained model")
 
-    if args.Decoder_type == "greedy":
-        decoder = MSGreedyDecoder(labels=labels, blank_index=labels.index("_"))
-    else:
-        raise NotImplementedError("Only greedy decoder is supported now")
+    decoder = MSGreedyDecoder(labels=labels, blank_index=labels.index("_"))
     target_decoder = MSGreedyDecoder(labels, blank_index=labels.index("_"))
 
     model.set_train(False)
@@ -106,8 +103,7 @@ def construct(self, inputs, input_length):
         decoded_output, _ = decoder.decode(out, output_sizes)
         target_strings = target_decoder.convert_to_strings(split_targets)
 
-        if args.save_output is not None:
-            output_data.append((out.asnumpy(), output_sizes.asnumpy(), target_strings))
+        output_data.append((out.asnumpy(), output_sizes.asnumpy(), target_strings))
         for doutput, toutput in zip(decoded_output, target_strings):
             transcript, reference = doutput[0], toutput[0]
             wer_inst = decoder.wer(transcript, reference)
diff --git a/mindaudio/utils/distributed.py b/mindaudio/utils/distributed.py
@@ -6,7 +6,8 @@ class DistributedSampler:
     For mindspore.dataset.GeneratorDataset
     """
 
-    def __init__(self, dataset, rank, group_size, shuffle=True, seed=0):
+    def __init__(self, dataset, rank, group_size, shuffle=True, seed=0, group=True):
+        self.group = group
         self.rank = rank
         self.group_size = group_size
         self.dataset_len = len(dataset)
@@ -20,7 +21,8 @@ def __iter__(self):
             indices = np.random.permutation(self.dataset_len)
         else:
             indices = np.arange(self.dataset_len)
-        indices = indices[self.rank :: self.group_size]
+        if self.group:
+            indices = indices[self.rank :: self.group_size]
         return iter(indices)
 
     def __len__(self):

Original file line number	Diff line number	Diff line change
`@@ -694,7 +694,7 @@ def create_dataset(`
`694`	`694`	`group_size=group_size,`
`695`	`695`	`)`
`696`	`696`
`697`		`- sampler = DistributedSampler(dataset, rank, group_size, shuffle=True)`
	`697`	`+ sampler = DistributedSampler(dataset, rank, group_size, shuffle=True, group=False)`
`698`	`698`
`699`	`699`	`ds = de.GeneratorDataset(`
`700`	`700`	`dataset,`
Original file line number	Diff line number	Diff line change
`@@ -145,7 +145,7 @@ def train():`
`145`	`145`	`]`
`146`	`146`
`147`	`147`	`if config.training_with_eval:`
`148`		`- eval_net = create_asr_eval_net(net_with_loss)`
	`148`	`+ eval_net = create_asr_eval_net(net_with_loss, device_num)`
`149`	`149`	`callback_list.append(`
`150`	`150`	`EvalCallback(`
`151`	`151`	`eval_net,`