diff --git a/README.md b/README.md
index 2ecda66..43c2a5c 100644
--- a/README.md
+++ b/README.md
@@ -113,121 +113,106 @@ cuDNN下载地址：https://developer.nvidia.com/rdp/form/cudnn-download-survey
 
 ## 2. 配置化
 
-1. config.yaml - System Config
 
-   ```yaml
-   # - requirement.txt  -  GPU: tensorflow-gpu, CPU: tensorflow
-   # - If you use the GPU version, you need to install some additional applications.
-   # TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
-   # - The Default is .*?(?=_.*\.)
-   # TrainsPath and TestPath: The local absolute path of your training and testing set.
-   # TestSetNum: This is an optional parameter that is used when you want to extract some of the test set
-   # - from the training set when you are not preparing the test set separately.
-   System:
-     DeviceUsage: 0.5
-     TrainRegex: '.*?(?=_)'
-     TestRegex: '.*?(?=_)'
-     TestSetNum: 300
-   
-   # CNNNetwork: [CNN5, DenseNet]
-   # RecurrentNetwork: [BLSTM, LSTM]
-   # - The recommended configuration is CNN5+BLSTM / DenseNet+BLSTM
-   # HiddenNum: [64, 128, 256]
-   # - This parameter indicates the number of nodes used to remember and store past states.
-   NeuralNet:
-     CNNNetwork: CNN5
-     RecurrentNetwork: BLSTM
-     HiddenNum: 64
-     KeepProb: 0.99
-   
-   # SavedEpochs: A Session.run() execution is called a Epochs,
-   # - Used to save traininsed to calculate accuracy, Default value is 100.
-   # TestNum: The number of samples for each test batch.
-   # - A test for every saved steps.
-   # CompileAcc: When the accuracy reaches the set threshold,
-   # - the model will be compiled together each time it is archived.
-   # - Available for specific usage scenarios.
-   # EndAcc: Finish the training when the accuracy reaches [EndAcc*100]%.
-   # EndEpochs: Finish the training when the epoch is greater than the defined epoch.
-   # PreprocessCollapseRepe ated: If True, then a preprocessing step runs
-   # - before loss calculation, wherein repeated labels passed to the loss
-   # - are merged into single labels.  This is useful if the training labels come
-   # - from, e.g., forced alignments and therefore have unnecessary repetitions.
-   # CTCMergeRepeated: If False, then deep within the CTC calculation,
-   # - repeated non-blank labels will not be merged and are interpreted
-   # - as individual labels. This is a simplified (non-standard) version of CTC.
-   Trains:
-     SavedSteps: 100
-     ValidationSteps: 500
-     EndAcc: 0.98
-     EndCost: 1
-     EndEpochs: 2
-     BatchSize: 64
-     TestBatchSize: 300
-     LearningRate: 0.01
-     DecayRate: 0.98
-     DecaySteps: 100000
-     PreprocessCollapseRepeated: False
-     CTCMergeRepeated: True  
-     CTCBeamWidth: 5
-     CTCTopPaths: 1
-  
-   ```
-
-   There are several common examples of TrainRegex:
-   i. apple_20181010121212.jpg
-
-   ```
-   .*?(?=_.*\.)
-   ```
-
-   ii apple.png
-
-   ```
-   .*?(?=\.)
-   ```
-
-2. model.yaml  - Model Config
+1. model.yaml  - Model Config
 
    ```yaml
-   # Sites: A bindable parameter used to select a model. 
-   # - If this parameter is defined, 
-   # - it can be identified by using the model_site parameter 
-   # - to identify a model that is inconsistent with the actual size of the current model.
-   # ModelName: Corresponding to the model file in the model directory,
-   # - such as YourModelName.pb, fill in YourModelName here.
-   # ModelType: This parameter is also used to locate the model. 
-   # - The difference from the sites is that if there is no corresponding site, 
-   # - the size will be used to assign the model. 
-   # - If a model of the corresponding size and corresponding to the ModelType is not found, 
-   # - the model belonging to the category is preferentially selected.
-   # CharSet: Provides a default optional built-in solution:
-   # - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
-   # -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET]
-   # - Or you can use your own customized character set like: ['a', '1', '2'].
-   # CharExclude: CharExclude should be a list, like: ['a', '1', '2']
-   # - which is convenient for users to freely combine character sets.
-   # - If you don't want to manually define the character set manually,
-   # - you can choose a built-in character set
-   # - and set the characters to be excluded by CharExclude parameter.
-   Model:
-     Sites: []
-     ModelName: YourModelName
-     ModelType: 150x50
-     CharSet: ALPHANUMERIC_LOWER
-     CharExclude: []
-     CharReplace: {}
-     ImageWidth: 150
-     ImageHeight: 50
-   
-   # Binaryzation: [-1: Off, >0 and < 255: On].
-   # Smoothing: [-1: Off, >0: On].
-   # Blur: [-1: Off, >0: On].
-   Pretreatment:
-     Binaryzation: -1
-     Smoothing: -1
-     Blur: -1
-     Resize: [150, 50]
+   # - requirement.txt  -  GPU: tensorflow-gpu, CPU: tensorflow
+    # - If you use the GPU version, you need to install some additional applications.
+    System:
+      DeviceUsage: 0.7
+    
+    # ModelName: Corresponding to the model file in the model directory,
+    # - such as YourModelName.pb, fill in YourModelName here.
+    # CharSet: Provides a default optional built-in solution:
+    # - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
+    # -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET, ALPHANUMERIC_LOWER_MIX_CHINESE_3500]
+    # - Or you can use your own customized character set like: ['a', '1', '2'].
+    # CharMaxLength: Maximum length of characters， used for label padding.
+    # CharExclude: CharExclude should be a list, like: ['a', '1', '2']
+    # - which is convenient for users to freely combine character sets.
+    # - If you don't want to manually define the character set manually,
+    # - you can choose a built-in character set
+    # - and set the characters to be excluded by CharExclude parameter.
+    Model:
+      Sites: [
+        'YourModelName'
+      ]
+      ModelName: YourModelName
+      ModelType: 150x50
+      CharSet: ALPHANUMERIC_LOWER
+      CharExclude: []
+      CharReplace: {}
+      ImageWidth: 150
+      ImageHeight: 50
+    
+    # Binaryzation: [-1: Off, >0 and < 255: On].
+    # Smoothing: [-1: Off, >0: On].
+    # Blur: [-1: Off, >0: On].
+    # Resize: [WIDTH, HEIGHT]
+    # - If the image size is too small, the training effect will be poor and you need to zoom in.
+    # ReplaceTransparent: [True, False]
+    # - True: Convert transparent images in RGBA format to opaque RGB format,
+    # - False: Keep the original image
+    Pretreatment:
+      Binaryzation: -1
+      Smoothing: -1
+      Blur: -1
+      Resize: [150, 50]
+      ReplaceTransparent: True
+    
+    # CNNNetwork: [CNN5, ResNet, DenseNet]
+    # RecurrentNetwork: [BLSTM, LSTM, SRU, BSRU, GRU]
+    # - The recommended configuration is CNN5+BLSTM / ResNet+BLSTM
+    # HiddenNum: [64, 128, 256]
+    # - This parameter indicates the number of nodes used to remember and store past states.
+    # Optimizer: Loss function algorithm for calculating gradient.
+    # - [AdaBound, Adam, Momentum]
+    NeuralNet:
+      CNNNetwork: CNN5
+      RecurrentNetwork: BLSTM
+      HiddenNum: 64
+      KeepProb: 0.98
+      Optimizer: AdaBound
+      PreprocessCollapseRepeated: False
+      CTCMergeRepeated: True
+      CTCBeamWidth: 1
+      CTCTopPaths: 1
+    
+    # TrainsPath and TestPath: The local absolute path of your training and testing set.
+    # DatasetPath: Package a sample of the TFRecords format from this path.
+    # TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
+    # - The Default is .*?(?=_.*\.)
+    # TestSetNum: This is an optional parameter that is used when you want to extract some of the test set
+    # - from the training set when you are not preparing the test set separately.
+    # SavedSteps: A Session.run() execution is called a Step,
+    # - Used to save training progress, Default value is 100.
+    # ValidationSteps: Used to calculate accuracy, Default value is 500.
+    # TestSetNum: The number of test sets, if an automatic allocation strategy is used (TestPath not set).
+    # EndAcc: Finish the training when the accuracy reaches [EndAcc*100]% and other conditions.
+    # EndCost: Finish the training when the cost reaches EndCost and other conditions.
+    # EndEpochs: Finish the training when the epoch is greater than the defined epoch and other conditions.
+    # BatchSize: Number of samples selected for one training step.
+    # TestBatchSize: Number of samples selected for one validation step.
+    # LearningRate: Recommended value[0.01: MomentumOptimizer/AdamOptimizer, 0.001: AdaBoundOptimizer]
+    Trains:
+      TrainsPath: './dataset/mnist-CNN5BLSTM-H64-28x28_trains.tfrecords'
+      TestPath: './dataset/mnist-CNN5BLSTM-H64-28x28_test.tfrecords'
+      DatasetPath: [
+        "D:/***"
+      ]
+      TrainRegex: '.*?(?=_)'
+      TestSetNum: 300
+      SavedSteps: 100
+      ValidationSteps: 500
+      EndAcc: 0.95
+      EndCost: 0.1
+      EndEpochs: 2
+      BatchSize: 128
+      TestBatchSize: 300
+      LearningRate: 0.001
+      DecayRate: 0.98
+      DecaySteps: 10000
    ```
 
 # 工具集
@@ -235,10 +220,7 @@ cuDNN下载地址：https://developer.nvidia.com/rdp/form/cudnn-download-survey
 1. 预处理预览工具，只支持为打包的训练集查看
    ```python -m tools.preview```
 
-2. 新手指南 （只支持字符集推荐，我觉得是个鸡肋各位请忽略）
-   ```python -m tools.navigator```
-
-3. PyInstaller 一键打包（训练的话支持不好，部署的打包效果不错）
+2. PyInstaller 一键打包（训练的话支持不好，部署的打包效果不错）
 
    ```
    pip install pyinstaller
@@ -249,6 +231,9 @@ cuDNN下载地址：https://developer.nvidia.com/rdp/form/cudnn-download-survey
 
 1. 命令行或终端运行：```python trains.py```
 2. 使用 PyCharm 运行，右键 Run
+3. **新手专用**: 使用IDE工具修改 tutorial.py 配置内容并运行，集推荐配置，打包样本，运行于一体。
+
+
 
 # 开源许可
 
diff --git a/config.py b/config.py
index 9863550..e90dda0 100644
--- a/config.py
+++ b/config.py
@@ -5,11 +5,10 @@
 import os
 import platform
 import re
-from enum import Enum, unique
-
 import yaml
 
 from character import *
+from constants import *
 from exception import exception, ConfigException
 
 # Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
@@ -20,30 +19,10 @@
 IGNORE_FILES = ['.DS_Store']
 
 
-class RunMode(object):
-    Test = 'test'
-    Trains = 'trains'
-    Predict = 'predict'
-
-
-@unique
-class CNNNetwork(Enum):
-    CNN5 = 'CNN5'
-    ResNet = 'ResNet'
-
-
-@unique
-class RecurrentNetwork(Enum):
-    LSTM = 'LSTM'
-    BLSTM = 'BLSTM'
-    SRU = 'SRU'
-    BSRU = 'BSRU'
-    GRU = 'GRU'
-
-
 NETWORK_MAP = {
     'CNN5': CNNNetwork.CNN5,
     'ResNet': CNNNetwork.ResNet,
+    'DenseNet': CNNNetwork.DenseNet,
     'LSTM': RecurrentNetwork.LSTM,
     'BLSTM': RecurrentNetwork.BLSTM,
     'SRU': RecurrentNetwork.SRU,
@@ -51,16 +30,21 @@ class RecurrentNetwork(Enum):
     'GRU': RecurrentNetwork.GRU,
 }
 
-TFRECORDS_NAME_MAP = {
-    RunMode.Trains: 'trains',
-    RunMode.Test: 'test'
+
+OPTIMIZER_MAP = {
+    'AdaBound': Optimizer.AdaBound,
+    'Adam': Optimizer.Adam,
+    'Momentum': Optimizer.Momentum,
+    'SGD': Optimizer.SGD,
+    'AdaGrad': Optimizer.AdaGrad,
+    'RMSProp': Optimizer.RMSProp
 }
 
 PLATFORM = platform.system()
 
-SYS_CONFIG_DEMO_NAME = 'config_demo.yaml'
+# SYS_CONFIG_DEMO_NAME = 'config_demo.yaml'
 MODEL_CONFIG_DEMO_NAME = 'model_demo.yaml'
-SYS_CONFIG_NAME = 'config.yaml'
+# SYS_CONFIG_NAME = 'config.yaml'
 MODEL_CONFIG_NAME = 'model.yaml'
 MODEL_PATH = os.path.join(PROJECT_PATH, 'model')
 OUTPUT_PATH = os.path.join(PROJECT_PATH, 'out')
@@ -68,19 +52,19 @@ class RecurrentNetwork(Enum):
 
 PATH_SPLIT = "\\" if PLATFORM == "Windows" else "/"
 
-SYS_CONFIG_PATH = os.path.join(PROJECT_PATH, SYS_CONFIG_NAME)
-SYS_CONFIG_PATH = SYS_CONFIG_PATH if os.path.exists(SYS_CONFIG_PATH) else os.path.join("../", SYS_CONFIG_NAME)
+# SYS_CONFIG_PATH = os.path.join(PROJECT_PATH, SYS_CONFIG_NAME)
+# SYS_CONFIG_PATH = SYS_CONFIG_PATH if os.path.exists(SYS_CONFIG_PATH) else os.path.join("../", SYS_CONFIG_NAME)
 
 MODEL_CONFIG_PATH = os.path.join(PROJECT_PATH, MODEL_CONFIG_NAME)
 MODEL_CONFIG_PATH = MODEL_CONFIG_PATH if os.path.exists(MODEL_CONFIG_PATH) else os.path.join("../", MODEL_CONFIG_NAME)
 
-with open(SYS_CONFIG_PATH, 'r', encoding="utf-8") as sys_fp:
-    sys_stream = sys_fp.read()
-    cf_system = yaml.load(sys_stream)
+# with open(SYS_CONFIG_PATH, 'r', encoding="utf-8") as sys_fp:
+#     sys_stream = sys_fp.read()
+#     cf_system = yaml.load(sys_stream, Loader=yaml.SafeLoader)
 
 with open(MODEL_CONFIG_PATH, 'r', encoding="utf-8") as sys_fp:
     sys_stream = sys_fp.read()
-    cf_model = yaml.load(sys_stream)
+    cf_model = yaml.load(sys_stream, Loader=yaml.SafeLoader)
 
 
 def char_set(_type):
@@ -115,15 +99,31 @@ def char_set(_type):
 IMAGE_WIDTH = cf_model['Model'].get('ImageWidth')
 IMAGE_CHANNEL = cf_model['Model'].get('ImageChannel')
 IMAGE_CHANNEL = IMAGE_CHANNEL if IMAGE_CHANNEL else 1
+MULTI_SHAPE = False
+
 
 """NEURAL NETWORK"""
-NEU_CNN = cf_system['NeuralNet'].get('CNNNetwork')
+NEU_CNN = cf_model['NeuralNet'].get('CNNNetwork')
 NEU_CNN = NEU_CNN if NEU_CNN else 'CNN5'
-NEU_RECURRENT = cf_system['NeuralNet'].get('RecurrentNetwork')
+NEU_RECURRENT = cf_model['NeuralNet'].get('RecurrentNetwork')
 NEU_RECURRENT = NEU_RECURRENT if NEU_RECURRENT else 'BLSTM'
-NUM_HIDDEN = cf_system['NeuralNet'].get('HiddenNum')
-OUTPUT_KEEP_PROB = cf_system['NeuralNet'].get('KeepProb')
+NUM_HIDDEN = cf_model['NeuralNet'].get('HiddenNum')
+OUTPUT_KEEP_PROB = cf_model['NeuralNet'].get('KeepProb')
 LSTM_LAYER_NUM = 2
+NEU_OPTIMIZER = cf_model['NeuralNet'].get('Optimizer')
+NEU_OPTIMIZER = NEU_OPTIMIZER if NEU_OPTIMIZER else 'AdaBound'
+PREPROCESS_COLLAPSE_REPEATED = cf_model['NeuralNet'].get('PreprocessCollapseRepeated')
+PREPROCESS_COLLAPSE_REPEATED = PREPROCESS_COLLAPSE_REPEATED if PREPROCESS_COLLAPSE_REPEATED is not None else False
+CTC_MERGE_REPEATED = cf_model['NeuralNet'].get('CTCMergeRepeated')
+CTC_MERGE_REPEATED = CTC_MERGE_REPEATED if CTC_MERGE_REPEATED is not None else True
+CTC_BEAM_WIDTH = cf_model['NeuralNet'].get('CTCBeamWidth')
+CTC_BEAM_WIDTH = CTC_BEAM_WIDTH if CTC_BEAM_WIDTH is not None else 1
+CTC_TOP_PATHS = cf_model['NeuralNet'].get('CTCTopPaths')
+CTC_TOP_PATHS = CTC_TOP_PATHS if CTC_TOP_PATHS is not None else 1
+CTC_LOSS_TIME_MAJOR = True
+WARP_CTC = cf_model['NeuralNet'].get('WarpCTC')
+WARP_CTC = WARP_CTC if WARP_CTC is not None else False
+
 
 LEAKINESS = 0.01
 NUM_CLASSES = CHAR_SET_LEN + 2
@@ -134,25 +134,23 @@ def char_set(_type):
 SAVE_CHECKPOINT = os.path.join(MODEL_PATH, CHECKPOINT_TAG)
 
 """SYSTEM"""
-GPU_USAGE = cf_system['System'].get('DeviceUsage')
+GPU_USAGE = cf_model['System'].get('DeviceUsage')
 
 """PATH & LABEL"""
 TRAIN_PATH_IN_MODEL = cf_model.get('Trains')
 
-if TRAIN_PATH_IN_MODEL:
-    TRAINS_PATH = cf_model['Trains'].get('TrainsPath')
-    TEST_PATH = cf_model['Trains'].get('TestPath')
-else:
-    TRAINS_PATH = cf_system['System'].get('TrainsPath')
-    TEST_PATH = cf_system['System'].get('TestPath')
 
-TRAINS_REGEX = cf_system['System'].get('TrainRegex')
+TRAINS_PATH = cf_model['Trains'].get('TrainsPath')
+TEST_PATH = cf_model['Trains'].get('TestPath')
+DATASET_PATH = cf_model['Trains'].get('DatasetPath')
+
+TRAINS_REGEX = cf_model['Trains'].get('TrainRegex')
 TRAINS_REGEX = TRAINS_REGEX if TRAINS_REGEX else ".*?(?=_)"
 
-TEST_REGEX = cf_system['System'].get('TestRegex')
+TEST_REGEX = cf_model['Trains'].get('TestRegex')
 TEST_REGEX = TEST_REGEX if TEST_REGEX else (TRAINS_REGEX if TRAINS_REGEX else ".*?(?=_)")
 
-TEST_SET_NUM = cf_system['System'].get('TestSetNum')
+TEST_SET_NUM = cf_model['Trains'].get('TestSetNum')
 TEST_SET_NUM = TEST_SET_NUM if TEST_SET_NUM else 1000
 HAS_TEST_SET = TEST_PATH and (os.path.exists(TEST_PATH) if isinstance(TEST_PATH, str) else True)
 
@@ -161,27 +159,22 @@ def char_set(_type):
 TRAINS_USE_TFRECORDS = isinstance(TRAINS_PATH, str) and TRAINS_PATH.endswith("tfrecords")
 
 """TRAINS"""
-TRAINS_SAVE_STEPS = cf_system['Trains'].get('SavedSteps')
-TRAINS_VALIDATION_STEPS = cf_system['Trains'].get('ValidationSteps')
-TRAINS_END_ACC = cf_system['Trains'].get('EndAcc')
-TRAINS_END_COST = cf_system['Trains'].get('EndCost')
+TRAINS_SAVE_STEPS = cf_model['Trains'].get('SavedSteps')
+TRAINS_VALIDATION_STEPS = cf_model['Trains'].get('ValidationSteps')
+TRAINS_END_ACC = cf_model['Trains'].get('EndAcc')
+TRAINS_END_COST = cf_model['Trains'].get('EndCost')
 TRAINS_END_COST = TRAINS_END_COST if TRAINS_END_COST else 1
-TRAINS_END_EPOCHS = cf_system['Trains'].get('EndEpochs')
-TRAINS_LEARNING_RATE = cf_system['Trains'].get('LearningRate')
-DECAY_RATE = cf_system['Trains'].get('DecayRate')
-DECAY_STEPS = cf_system['Trains'].get('DecaySteps')
-BATCH_SIZE = cf_system['Trains'].get('BatchSize')
-TEST_BATCH_SIZE = cf_system['Trains'].get('TestBatchSize')
-TEST_BATCH_SIZE = TEST_BATCH_SIZE if TEST_BATCH_SIZE else 200
+TRAINS_END_EPOCHS = cf_model['Trains'].get('EndEpochs')
+TRAINS_LEARNING_RATE = cf_model['Trains'].get('LearningRate')
+DECAY_RATE = cf_model['Trains'].get('DecayRate')
+DECAY_RATE = DECAY_RATE if DECAY_RATE else 0.98
+DECAY_STEPS = cf_model['Trains'].get('DecaySteps')
+DECAY_STEPS = DECAY_STEPS if DECAY_STEPS else 10000
+BATCH_SIZE = cf_model['Trains'].get('BatchSize')
+BATCH_SIZE = BATCH_SIZE if BATCH_SIZE else 64
+TEST_BATCH_SIZE = cf_model['Trains'].get('TestBatchSize')
+TEST_BATCH_SIZE = TEST_BATCH_SIZE if TEST_BATCH_SIZE else 300
 MOMENTUM = 0.9
-PREPROCESS_COLLAPSE_REPEATED = cf_system['Trains'].get('PreprocessCollapseRepeated')
-PREPROCESS_COLLAPSE_REPEATED = PREPROCESS_COLLAPSE_REPEATED if PREPROCESS_COLLAPSE_REPEATED is not None else False
-CTC_MERGE_REPEATED = cf_system['Trains'].get('CTCMergeRepeated')
-CTC_MERGE_REPEATED = CTC_MERGE_REPEATED if CTC_MERGE_REPEATED is not None else True
-CTC_BEAM_WIDTH = cf_system['Trains'].get('CTCBeamWidth')
-CTC_BEAM_WIDTH = CTC_BEAM_WIDTH if CTC_BEAM_WIDTH is not None else 1
-CTC_TOP_PATHS = cf_system['Trains'].get('CTCTopPaths')
-CTC_TOP_PATHS = CTC_TOP_PATHS if CTC_TOP_PATHS is not None else 1
 
 """PRETREATMENT"""
 BINARYZATION = cf_model['Pretreatment'].get('Binaryzation')
@@ -213,15 +206,15 @@ def init():
     if not os.path.exists(OUTPUT_PATH):
         os.makedirs(OUTPUT_PATH)
 
-    if not os.path.exists(SYS_CONFIG_PATH):
-        exception(
-            'Configuration File "{}" No Found. '
-            'If it is used for the first time, please copy one from {} as {}'.format(
-                SYS_CONFIG_NAME,
-                SYS_CONFIG_DEMO_NAME,
-                SYS_CONFIG_NAME
-            ), ConfigException.SYS_CONFIG_PATH_NOT_EXIST
-        )
+    # if not os.path.exists(SYS_CONFIG_PATH):
+    #     exception(
+    #         'Configuration File "{}" No Found. '
+    #         'If it is used for the first time, please copy one from {} as {}'.format(
+    #             SYS_CONFIG_NAME,
+    #             SYS_CONFIG_DEMO_NAME,
+    #             SYS_CONFIG_NAME
+    #         ), ConfigException.SYS_CONFIG_PATH_NOT_EXIST
+    #     )
 
     if not os.path.exists(MODEL_CONFIG_PATH):
         exception(
@@ -248,7 +241,7 @@ def init():
         f.write(checkpoint)
 
 
-if '../' not in SYS_CONFIG_PATH:
+if '../' not in MODEL_CONFIG_PATH:
     print('Loading Configuration...')
     print('---------------------------------------------------------------------------------')
     print("PROJECT_PATH", PROJECT_PATH)
@@ -259,6 +252,6 @@ def init():
     print('IMAGE_WIDTH: {}, IMAGE_HEIGHT: {}'.format(
         IMAGE_WIDTH, IMAGE_HEIGHT)
     )
-    print('NEURAL NETWORK: {}'.format(cf_system['NeuralNet']))
+    print('NEURAL NETWORK: {}'.format(cf_model['NeuralNet']))
 
-    print('---------------------------------------------------------------------------------')
+    print('---------------------------------------------------------------------------------')
\ No newline at end of file
diff --git a/config_demo.yaml b/config_demo.yaml
deleted file mode 100644
index 6d7080d..0000000
--- a/config_demo.yaml
+++ /dev/null
@@ -1,50 +0,0 @@
-# - requirement.txt  -  GPU: tensorflow-gpu, CPU: tensorflow
-# - If you use the GPU version, you need to install some additional applications.
-# TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
-# - The Default is .*?(?=_.*\.)
-# TrainsPath and TestPath: The local absolute path of your training and testing set.
-# TestSetNum: This is an optional parameter that is used when you want to extract some of the test set
-# - from the training set when you are not preparing the test set separately.
-System:
-  DeviceUsage: 0.7
-  TrainRegex: '.*?(?=_)'
-  TestRegex: '.*?(?=_)'
-  TrainsPath: './dataset/mnist-CNN5BLSTM-H64-28x28_trains.tfrecords'
-  TestPath: './dataset/mnist-CNN5BLSTM-H64-28x28_test.tfrecords'
-  TestSetNum: 300
-
-# CNNNetwork: [CNN5, ResNet]
-# RecurrentNetwork: [BLSTM, LSTM, SRU, BSRU, GRU]
-# - The recommended configuration is CNN5+BLSTM / ResNet+BLSTM
-# HiddenNum: [64, 128, 256]
-# - This parameter indicates the number of nodes used to remember and store past states.
-NeuralNet:
-  CNNNetwork: CNN5
-  RecurrentNetwork: BLSTM
-  HiddenNum: 64
-  KeepProb: 0.98
-
-# SavedSteps: A Session.run() execution is called a Epochs,
-# - Used to save training progress, Default value is 100.
-# ValidationSteps: Used to calculate accuracy, Default value is 100.
-# TestNum: The number of samples for each test batch.
-# - A test for every saved steps.
-# EndAcc: Finish the training when the accuracy reaches [EndAcc*100]%.
-# EndEpochs: Finish the training when the epoch is greater than the defined epoch.
-Trains:
-  SavedSteps: 100
-  ValidationSteps: 500
-  EndAcc: 0.975
-  EndCost: 1
-  EndEpochs: 1
-  BatchSize: 64
-  TestBatchSize: 300
-  LearningRate: 0.01
-  DecayRate: 0.98
-  DecaySteps: 10000
-  PreprocessCollapseRepeated: False
-  CTCMergeRepeated: True
-  CTCBeamWidth: 5
-  CTCTopPaths: 1
-
-
diff --git a/constants.py b/constants.py
new file mode 100644
index 0000000..5e6aaa0
--- /dev/null
+++ b/constants.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Author: kerlomz <kerlomz@gmail.com>
+from enum import Enum, unique
+
+
+@unique
+class RunMode(Enum):
+    Test = 'test'
+    Trains = 'trains'
+    Predict = 'predict'
+
+
+@unique
+class CNNNetwork(Enum):
+    CNN5 = 'CNN5'
+    ResNet = 'ResNet'
+    DenseNet = 'DenseNet'
+
+
+@unique
+class RecurrentNetwork(Enum):
+    LSTM = 'LSTM'
+    BLSTM = 'BLSTM'
+    SRU = 'SRU'
+    BSRU = 'BSRU'
+    GRU = 'GRU'
+
+
+@unique
+class Optimizer(Enum):
+    AdaBound = 'AdaBound'
+    Adam = 'Adam'
+    Momentum = 'Momentum'
+    SGD = 'SGD'
+    AdaGrad = 'AdaGrad'
+    RMSProp = 'RMSProp'
+
+
+@unique
+class SimpleCharset(Enum):
+    NUMERIC = 'NUMBER'
+    ALPHANUMERIC = 'ALPHANUMERIC'
+    ALPHANUMERIC_LOWER = 'ALPHANUMERIC_LOWER'
+    ALPHANUMERIC_UPPER = 'ALPHANUMERIC_UPPER'
+    ALPHABET_LOWER = 'ALPHABET_LOWER'
+    ALPHABET_UPPER = 'ALPHABET_UPPER'
+    ALPHABET = 'ALPHABET'
+    ARITHMETIC = 'ARITHMETIC'
+    ALPHANUMERIC_LOWER_MIX_ARITHMETIC = 'ALPHANUMERIC_LOWER_MIX_ARITHMETIC'
+    FLOAT = 'FLOAT'
+    CHINESE_3500 = 'CHINESE_3500'
+    ALPHANUMERIC_LOWER_MIX_CHINESE_3500 = 'ALPHANUMERIC_LOWER_MIX_CHINESE_3500'
+
diff --git a/framework.py b/framework.py
index 4509ffd..a76b3a1 100644
--- a/framework.py
+++ b/framework.py
@@ -2,16 +2,18 @@
 # -*- coding:utf-8 -*-
 # Author: kerlomz <kerlomz@gmail.com>
 import sys
-
 import tensorflow as tf
+from importlib import import_module
 from distutils.version import StrictVersion
 from config import *
-from network.CNN5 import CNN5
+from network.CNN import CNN5
 from network.GRU import GRU
 from network.LSTM import LSTM, BLSTM
 from network.ResNet import ResNet50
+from network.DenseNet import DenseNet
 from network.SRU import SRU, BSRU
 from network.utils import NetworkUtils
+from optimizer.AdaBound import AdaBoundOptimizer
 
 
 class GraphOCR(object):
@@ -21,7 +23,7 @@ def __init__(self, mode, cnn: CNNNetwork, recurrent: RecurrentNetwork):
         self.utils = NetworkUtils(mode)
         self.network = cnn
         self.recurrent = recurrent
-        self.inputs = tf.placeholder(tf.float32, [None, RESIZE[0], RESIZE[1], IMAGE_CHANNEL], name='input')
+        self.inputs = tf.placeholder(tf.float32, [None, None, RESIZE[1], IMAGE_CHANNEL], name='input')
         self.labels = tf.sparse_placeholder(tf.int32, name='labels')
         self.seq_len = None
         self.merged_summary = None
@@ -32,18 +34,25 @@ def build_graph(self):
         self.merged_summary = tf.summary.merge_all()
 
     def _build_model(self):
+
         if self.network == CNNNetwork.CNN5:
             x = CNN5(inputs=self.inputs, utils=self.utils).build()
 
         elif self.network == CNNNetwork.ResNet:
             x = ResNet50(inputs=self.inputs, utils=self.utils).build()
 
+        elif self.network == CNNNetwork.DenseNet:
+            x = DenseNet(inputs=self.inputs, utils=self.utils).build()
+
         else:
-            print('This cnn neural network is not supported at this time.')
+            tf.logging.error('This cnn neural network is not supported at this time.')
             sys.exit(-1)
 
-        shape_list = x.get_shape().as_list()
-        self.seq_len = tf.fill([tf.shape(x)[0]], shape_list[1], name="seq_len")
+        # time_major = True: [max_time_step, batch_size, num_classes]
+        # time_major = False: [batch_size, max_time_step, num_classes]
+        tf.logging.info("CNN Output: {}".format(x.get_shape()))
+
+        self.seq_len = tf.fill([tf.shape(x)[0]], tf.shape(x)[1], name="seq_len")
 
         if self.recurrent == RecurrentNetwork.LSTM:
             recurrent_network_builder = LSTM(self.utils, x, self.seq_len)
@@ -56,7 +65,7 @@ def _build_model(self):
         elif self.recurrent == RecurrentNetwork.BSRU:
             recurrent_network_builder = BSRU(self.utils, x, self.seq_len)
         else:
-            print('This recurrent neural network is not supported at this time.')
+            tf.logging.error('This recurrent neural network is not supported at this time.')
             sys.exit(-1)
 
         outputs = recurrent_network_builder.build()
@@ -69,10 +78,10 @@ def _build_model(self):
                 name='weight',
                 shape=[outputs.get_shape()[1] if self.network == CNNNetwork.ResNet else NUM_HIDDEN * 2, NUM_CLASSES],
                 dtype=tf.float32,
-                initializer=tf.truncated_normal_initializer(stddev=0.1),
+                initializer=tf.contrib.layers.xavier_initializer(),
+                # initializer=tf.truncated_normal_initializer(stddev=0.1),
+                # initializer=tf.glorot_normal_initializer(),
                 # initializer=tf.glorot_uniform_initializer(),
-                # initializer=tf.contrib.layers.xavier_initializer(),
-                # initializer=tf.truncated_normal([NUM_HIDDEN, NUM_CLASSES], stddev=0.1),
             )
             biases_out = tf.get_variable(
                 name='biases',
@@ -91,20 +100,27 @@ def _build_model(self):
     def _build_train_op(self):
         self.global_step = tf.train.get_or_create_global_step()
         # ctc loss function, using forward and backward algorithms and maximum likelihood.
-
-        self.loss = tf.nn.ctc_loss(
-            labels=self.labels,
-            inputs=self.predict,
-            sequence_length=self.seq_len,
-            ctc_merge_repeated=CTC_MERGE_REPEATED,
-            preprocess_collapse_repeated=PREPROCESS_COLLAPSE_REPEATED,
-            ignore_longer_outputs_than_inputs=False,
-            time_major=True
-        )
+        if WARP_CTC:
+            import_module('warpctc_tensorflow')
+            with tf.get_default_graph()._kernel_label_map({"CTCLoss": "WarpCTC"}):
+                self.loss = tf.nn.ctc_loss(
+                    inputs=self.predict,
+                    labels=self.labels,
+                    sequence_length=self.seq_len
+                )
+        else:
+            self.loss = tf.nn.ctc_loss(
+                labels=self.labels,
+                inputs=self.predict,
+                sequence_length=self.seq_len,
+                ctc_merge_repeated=CTC_MERGE_REPEATED,
+                preprocess_collapse_repeated=PREPROCESS_COLLAPSE_REPEATED,
+                ignore_longer_outputs_than_inputs=False,
+                time_major=CTC_LOSS_TIME_MAJOR
+            )
 
         self.cost = tf.reduce_mean(self.loss)
         tf.summary.scalar('cost', self.cost)
-
         self.lrn_rate = tf.train.exponential_decay(
             TRAINS_LEARNING_RATE,
             self.global_step,
@@ -114,18 +130,59 @@ def _build_train_op(self):
         )
         tf.summary.scalar('learning_rate', self.lrn_rate)
 
-        self.optimizer = tf.train.MomentumOptimizer(
-            learning_rate=self.lrn_rate,
-            use_nesterov=True,
-            momentum=MOMENTUM,
-        ).minimize(
-            self.cost,
-            global_step=self.global_step
-        )
-
+        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+        # print(update_ops)
         # Storing adjusted smoothed mean and smoothed variance operations
-        train_ops = [self.optimizer] + self.utils.extra_train_ops
-        self.train_op = tf.group(*train_ops)
+        with tf.control_dependencies(update_ops):
+            if OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.AdaBound:
+                self.train_op = AdaBoundOptimizer(
+                    learning_rate=self.lrn_rate,
+                    final_lr=0.1,
+                    beta1=0.9,
+                    beta2=0.999,
+                    amsbound=True
+                ).minimize(
+                    loss=self.cost,
+                    global_step=self.global_step
+                )
+            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.Adam:
+                self.train_op = tf.train.AdamOptimizer(
+                    learning_rate=self.lrn_rate
+                ).minimize(
+                    self.cost,
+                    global_step=self.global_step
+                )
+            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.Momentum:
+                self.train_op = tf.train.MomentumOptimizer(
+                    learning_rate=self.lrn_rate,
+                    use_nesterov=True,
+                    momentum=MOMENTUM,
+                ).minimize(
+                    self.cost,
+                    global_step=self.global_step
+                )
+            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.SGD:
+                self.train_op = tf.train.GradientDescentOptimizer(
+                    learning_rate=self.lrn_rate,
+                ).minimize(
+                    self.cost,
+                    global_step=self.global_step
+                )
+            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.AdaGrad:
+                self.train_op = tf.train.AdagradOptimizer(
+                    learning_rate=self.lrn_rate,
+                ).minimize(
+                    self.cost,
+                    global_step=self.global_step
+                )
+            elif OPTIMIZER_MAP[NEU_OPTIMIZER] == Optimizer.RMSProp:
+                self.train_op = tf.train.RMSPropOptimizer(
+                    learning_rate=self.lrn_rate,
+                    decay=DECAY_RATE,
+                ).minimize(
+                    self.cost,
+                    global_step=self.global_step
+                )
 
         # Option 2: tf.contrib.ctc.ctc_beam_search_decoder
         # (it's slower but you'll get better results)
@@ -137,13 +194,18 @@ def _build_train_op(self):
 
         # Find the optimal path
         self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder(
-            self.predict,
-            self.seq_len,
+            inputs=self.predict,
+            sequence_length=self.seq_len,
             merge_repeated=False,
             beam_width=CTC_BEAM_WIDTH,
             top_paths=CTC_TOP_PATHS,
         )
+
         if StrictVersion(tf.__version__) >= StrictVersion('1.12.0'):
             self.dense_decoded = tf.sparse.to_dense(self.decoded[0], default_value=-1, name="dense_decoded")
         else:
             self.dense_decoded = tf.sparse_tensor_to_dense(self.decoded[0], default_value=-1, name="dense_decoded")
+
+
+if __name__ == '__main__':
+    GraphOCR(RunMode.Predict, CNNNetwork.CNN5, RecurrentNetwork.BLSTM).build_graph()
diff --git a/make_dataset.py b/make_dataset.py
index c3dcb6a..bc2617c 100644
--- a/make_dataset.py
+++ b/make_dataset.py
@@ -5,40 +5,33 @@
 import random
 import tensorflow as tf
 from config import *
-
-REGEX_MAP = {
-    RunMode.Trains: TRAINS_REGEX,
-    RunMode.Test: TEST_REGEX
-}
+from constants import RunMode
 
 _RANDOM_SEED = 0
+label_max_length = 0
+
+TFRECORDS_TYPE = [
+    RunMode.Trains,
+    RunMode.Test
+]
 
 if not os.path.exists(TFRECORDS_DIR):
     os.makedirs(TFRECORDS_DIR)
 
 
 def _image(path):
-
     with open(path, "rb") as f:
         return f.read()
 
 
 def _dataset_exists(dataset_dir):
-    for split_name in TFRECORDS_NAME_MAP.values():
-        output_filename = os.path.join(dataset_dir, "{}_{}.tfrecords".format(TARGET_MODEL, split_name))
+    for split_name in TFRECORDS_TYPE:
+        output_filename = os.path.join(dataset_dir, "{}_{}.tfrecords".format(TARGET_MODEL, split_name.value))
         if not tf.gfile.Exists(output_filename):
             return False
     return True
 
 
-def _get_all_files(dataset_dir):
-    file_list = []
-    for filename in os.listdir(dataset_dir):
-        path = os.path.join(dataset_dir, filename)
-        file_list.append(path)
-    return file_list
-
-
 def bytes_feature(values):
     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
 
@@ -51,17 +44,20 @@ def image_to_tfrecords(image_data, label):
 
 
 def _convert_dataset(file_list, mode):
-
-    output_filename = os.path.join(TFRECORDS_DIR, "{}_{}.tfrecords".format(TARGET_MODEL, TFRECORDS_NAME_MAP[mode]))
+    output_filename = os.path.join(TFRECORDS_DIR, "{}_{}.tfrecords".format(TARGET_MODEL, mode.value))
     with tf.python_io.TFRecordWriter(output_filename) as writer:
         for i, file_name in enumerate(file_list):
             try:
                 sys.stdout.write('\r>> Converting image %d/%d ' % (i + 1, len(file_list)))
                 sys.stdout.flush()
                 image_data = _image(file_name)
-                labels = re.search(REGEX_MAP[mode], file_name.split(PATH_SPLIT)[-1]).group()
-                labels = labels.encode('utf-8')
-
+                labels = re.search(TRAINS_REGEX, file_name.split(PATH_SPLIT)[-1])
+                if labels:
+                    labels = labels.group()
+                else:
+                    raise NameError('invalid filename {}'.format(file_name))
+                labelx=labels.split('/')[len(labels.split('/'))-1]
+                labels = labelx.encode('utf-8')
                 example = image_to_tfrecords(image_data, labels)
                 writer.write(example.SerializeToString())
 
@@ -73,29 +69,28 @@ def _convert_dataset(file_list, mode):
     sys.stdout.flush()
 
 
-def run():
+def make_dataset():
+    dataset_path = DATASET_PATH
     if _dataset_exists(TFRECORDS_DIR):
         print('Exists!')
     else:
-        if isinstance(TRAINS_PATH, list):
+        if not DATASET_PATH and isinstance(TRAINS_PATH, str) and not TRAINS_PATH.endswith("tfrecords"):
+            dataset_path = TRAINS_PATH
+        elif not DATASET_PATH and isinstance(TRAINS_PATH, str) and TRAINS_PATH.endswith("tfrecords"):
+            print('DATASET_PATH is not configured!')
+            exit(-1)
+
+        if isinstance(dataset_path, list):
             origin_dataset = []
-            for trains_path in TRAINS_PATH:
+            for trains_path in dataset_path:
                 origin_dataset += [os.path.join(trains_path, trains) for trains in os.listdir(trains_path)]
         else:
-            origin_dataset = [os.path.join(TRAINS_PATH, trains) for trains in os.listdir(TRAINS_PATH)]
-        if HAS_TEST_SET:
-            trains_dataset = origin_dataset
-            if isinstance(TEST_PATH, list):
-                test_dataset = []
-                for test_path in TEST_PATH:
-                    test_dataset += [os.path.join(test_path, test) for test in os.listdir(test_path)]
-            else:
-                test_dataset = [os.path.join(TEST_PATH, test) for test in os.listdir(TEST_PATH)]
-        else:
-            random.seed(_RANDOM_SEED)
-            random.shuffle(origin_dataset)
-            test_dataset = origin_dataset[:TEST_SET_NUM]
-            trains_dataset = origin_dataset[TEST_SET_NUM:]
+            origin_dataset = [os.path.join(TRAINS_PATH, trains) for trains in os.listdir(dataset_path)]
+
+        random.seed(_RANDOM_SEED)
+        random.shuffle(origin_dataset)
+        test_dataset = origin_dataset[:TEST_SET_NUM]
+        trains_dataset = origin_dataset[TEST_SET_NUM:]
 
         _convert_dataset(test_dataset, mode=RunMode.Test)
         _convert_dataset(trains_dataset, mode=RunMode.Trains)
@@ -103,4 +98,4 @@ def run():
 
 
 if __name__ == '__main__':
-    run()
+    make_dataset()
diff --git a/model_demo.yaml b/model_demo.yaml
index fb93134..353e49d 100644
--- a/model_demo.yaml
+++ b/model_demo.yaml
@@ -1,25 +1,24 @@
-# Sites: A bindable parameter used to select a model.
-# - If this parameter is defined,
-# - it can be identified by using the model_site parameter
-# - to identify a model that is inconsistent with the actual size of the current model.
+# - requirement.txt  -  GPU: tensorflow-gpu, CPU: tensorflow
+# - If you use the GPU version, you need to install some additional applications.
+System:
+  DeviceUsage: 0.7
+
 # ModelName: Corresponding to the model file in the model directory,
 # - such as YourModelName.pb, fill in YourModelName here.
-# ModelType: This parameter is also used to locate the model.
-# - The difference from the sites is that if there is no corresponding site,
-# - the size will be used to assign the model.
-# - If a model of the corresponding size and corresponding to the ModelType is not found,
-# - the model belonging to the category is preferentially selected.
 # CharSet: Provides a default optional built-in solution:
 # - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
-# -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET]
+# -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET, ALPHANUMERIC_LOWER_MIX_CHINESE_3500]
 # - Or you can use your own customized character set like: ['a', '1', '2'].
+# CharMaxLength: Maximum length of characters， used for label padding.
 # CharExclude: CharExclude should be a list, like: ['a', '1', '2']
 # - which is convenient for users to freely combine character sets.
 # - If you don't want to manually define the character set manually,
 # - you can choose a built-in character set
 # - and set the characters to be excluded by CharExclude parameter.
 Model:
-  Sites: []
+  Sites: [
+    'YourModelName'
+  ]
   ModelName: YourModelName
   ModelType: 150x50
   CharSet: ALPHANUMERIC_LOWER
@@ -27,15 +26,74 @@ Model:
   CharReplace: {}
   ImageWidth: 150
   ImageHeight: 50
-  ImageChannel: 1
-  Version: 1.0
 
 # Binaryzation: [-1: Off, >0 and < 255: On].
 # Smoothing: [-1: Off, >0: On].
 # Blur: [-1: Off, >0: On].
 # Resize: [WIDTH, HEIGHT]
 # - If the image size is too small, the training effect will be poor and you need to zoom in.
+# ReplaceTransparent: [True, False]
+# - True: Convert transparent images in RGBA format to opaque RGB format,
+# - False: Keep the original image
 Pretreatment:
   Binaryzation: -1
   Smoothing: -1
-  Blur: -1
\ No newline at end of file
+  Blur: -1
+  Resize: [150, 50]
+  ReplaceTransparent: True
+
+# CNNNetwork: [CNN5, ResNet, DenseNet]
+# RecurrentNetwork: [BLSTM, LSTM, SRU, BSRU, GRU]
+# - The recommended configuration is CNN5+BLSTM / ResNet+BLSTM
+# HiddenNum: [64, 128, 256]
+# - This parameter indicates the number of nodes used to remember and store past states.
+# Optimizer: Loss function algorithm for calculating gradient.
+# - [AdaBound, Adam, Momentum]
+NeuralNet:
+  CNNNetwork: CNN5
+  RecurrentNetwork: BLSTM
+  HiddenNum: 64
+  KeepProb: 0.98
+  Optimizer: AdaBound
+  PreprocessCollapseRepeated: False
+  CTCMergeRepeated: True
+  CTCBeamWidth: 1
+  CTCTopPaths: 1
+  WarpCTC: False
+
+# TrainsPath and TestPath: The local absolute path of your training and testing set.
+# DatasetPath: Package a sample of the TFRecords format from this path.
+# TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
+# - The Default is .*?(?=_.*\.)
+# TestSetNum: This is an optional parameter that is used when you want to extract some of the test set
+# - from the training set when you are not preparing the test set separately.
+# SavedSteps: A Session.run() execution is called a Step,
+# - Used to save training progress, Default value is 100.
+# ValidationSteps: Used to calculate accuracy, Default value is 500.
+# TestSetNum: The number of test sets, if an automatic allocation strategy is used (TestPath not set).
+# EndAcc: Finish the training when the accuracy reaches [EndAcc*100]% and other conditions.
+# EndCost: Finish the training when the cost reaches EndCost and other conditions.
+# EndEpochs: Finish the training when the epoch is greater than the defined epoch and other conditions.
+# BatchSize: Number of samples selected for one training step.
+# TestBatchSize: Number of samples selected for one validation step.
+# LearningRate: Recommended value[0.01: MomentumOptimizer/AdamOptimizer, 0.001: AdaBoundOptimizer]
+Trains:
+  TrainsPath: './dataset/mnist-CNN5BLSTM-H64-28x28_trains.tfrecords'
+  TestPath: './dataset/mnist-CNN5BLSTM-H64-28x28_test.tfrecords'
+  DatasetPath: [
+    "D:/***"
+  ]
+  TrainRegex: '.*?(?=_)'
+  TestSetNum: 300
+  SavedSteps: 100
+  ValidationSteps: 500
+  EndAcc: 0.95
+  EndCost: 0.1
+  EndEpochs: 2
+  BatchSize: 128
+  TestBatchSize: 300
+  LearningRate: 0.001
+  DecayRate: 0.98
+  DecaySteps: 10000
+
+
diff --git a/network/CNN.py b/network/CNN.py
new file mode 100644
index 0000000..b59e648
--- /dev/null
+++ b/network/CNN.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Author: kerlomz <kerlomz@gmail.com>
+import tensorflow as tf
+from network.utils import NetworkUtils
+from config import IMAGE_CHANNEL
+
+
+class CNN5(object):
+
+    def __init__(self, inputs: tf.Tensor, utils: NetworkUtils):
+        self.inputs = inputs
+        self.utils = utils
+        # (in_channels, out_channels)
+        self.filters = [(IMAGE_CHANNEL, 32), (32, 64), (64, 128), (128, 128), (128, 64)]
+        # (conv2d_strides, max_pool_strides)
+        self.strides = [(1, 1), (1, 2), (1, 2), (1, 2), (1, 2)]
+        self.filter_size = [7, 5, 3, 3, 3]
+
+    def build(self):
+        with tf.variable_scope('cnn'):
+            x = self.inputs
+            x = self.utils.cnn_layers(
+                inputs=x,
+                filter_size=self.filter_size,
+                filters=self.filters,
+                strides=self.strides
+            )
+
+            shape_list = x.get_shape().as_list()
+            x = tf.reshape(x, [tf.shape(x)[0], -1, shape_list[2] * shape_list[3]])
+            return x
diff --git a/network/CNN5.py b/network/CNN5.py
deleted file mode 100644
index 018d52a..0000000
--- a/network/CNN5.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-# Author: kerlomz <kerlomz@gmail.com>
-import tensorflow as tf
-from network.utils import NetworkUtils
-from config import IMAGE_CHANNEL
-
-
-class CNN5(object):
-
-    def __init__(self, inputs: tf.Tensor, utils: NetworkUtils):
-        self.inputs = inputs
-        self.utils = utils
-        self.filters = [32, 64, 128, 128, 64]
-        self.strides = [1, 2]
-
-    def build(self):
-        with tf.variable_scope('cnn'):
-            with tf.variable_scope('unit-1'):
-                x = self.utils.conv2d(self.inputs, 'cnn-1', 7, IMAGE_CHANNEL, self.filters[0], self.strides[0])
-                x = self.utils.batch_norm('bn1', x)
-                x = self.utils.leaky_relu(x, 0.01)
-                x = self.utils.max_pool(x, 2, self.strides[0])
-
-            with tf.variable_scope('unit-2'):
-                x = self.utils.conv2d(x, 'cnn-2', 5, self.filters[0], self.filters[1], self.strides[0])
-                x = self.utils.batch_norm('bn2', x)
-                x = self.utils.leaky_relu(x, 0.01)
-                x = self.utils.max_pool(x, 2, self.strides[1])
-
-            with tf.variable_scope('unit-3'):
-                x = self.utils.conv2d(x, 'cnn-3', 3, self.filters[1], self.filters[2], self.strides[0])
-                x = self.utils.batch_norm('bn3', x)
-                x = self.utils.leaky_relu(x, 0.01)
-                x = self.utils.max_pool(x, 2, self.strides[1])
-
-            with tf.variable_scope('unit-4'):
-                x = self.utils.conv2d(x, 'cnn-4', 3, self.filters[2], self.filters[3], self.strides[0])
-                x = self.utils.batch_norm('bn4', x)
-                x = self.utils.leaky_relu(x, 0.01)
-                x = self.utils.max_pool(x, 2, self.strides[1])
-
-            with tf.variable_scope('unit-5'):
-                x = self.utils.conv2d(x, 'cnn-5', 3, self.filters[3], self.filters[4], self.strides[0])
-                x = self.utils.batch_norm('bn5', x)
-                x = self.utils.leaky_relu(x, 0.01)
-                x = self.utils.max_pool(x, 2, self.strides[1])
-
-            shape_list = x.get_shape().as_list()
-            x = tf.reshape(x, [-1, shape_list[1], shape_list[2] * shape_list[3]])
-            return x
diff --git a/network/DenseNet.py b/network/DenseNet.py
new file mode 100644
index 0000000..b81e55f
--- /dev/null
+++ b/network/DenseNet.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Author: kerlomz <kerlomz@gmail.com>
+import tensorflow as tf
+from network.utils import NetworkUtils
+
+
+class DenseNet(object):
+
+    def __init__(self, inputs: tf.Tensor, utils: NetworkUtils):
+        self.inputs = inputs
+        self.utils = utils
+        self.nb_filter = 12
+        self.strides = (2, 2)
+        self.kernel_size = 5
+        self.padding = "SAME"
+
+    def build(self):
+        with tf.variable_scope('DenseNet'):
+            x = tf.layers.conv2d(
+                inputs=self.inputs,
+                filters=self.nb_filter,
+                kernel_size=self.kernel_size,
+                strides=self.strides,
+                padding=self.padding,
+                use_bias=False
+            )
+            x, nb_filter = self.utils.dense_block(x, 8, 8, self.nb_filter)
+            x, nb_filter = self.utils.transition_block(x, 128, pool_type=2)
+            x, nb_filter = self.utils.dense_block(x, 8, 8, nb_filter)
+            x, nb_filter = self.utils.transition_block(x, 128, pool_type=3)
+            x, nb_filter = self.utils.dense_block(x, 8, 8, nb_filter)
+
+            shape_list = x.get_shape().as_list()
+            x = tf.reshape(x, [tf.shape(x)[0], -1, shape_list[2] * shape_list[3]])
+            return x
diff --git a/network/ResNet.py b/network/ResNet.py
index 8216498..77b3f79 100644
--- a/network/ResNet.py
+++ b/network/ResNet.py
@@ -29,8 +29,8 @@ def build(self):
         )
 
         a1 = self.utils.batch_norm(x=a1, name='bn_conv1')
-        a1 = tf.nn.relu(a1)
-        # a1 = self._leaky_relu(a1)
+        # a1 = tf.nn.relu(a1)
+        a1 = self.utils.leaky_relu(a1)
 
         a1 = tf.nn.max_pool(a1, ksize=(1, 3, 3, 1), strides=(1, 2, 2, 1), padding='VALID')
 
@@ -59,5 +59,5 @@ def build(self):
         x = self.utils.identity_block(a5, 3, [512, 512, 2048], stage=5, block='c')
 
         shape_list = x.get_shape().as_list()
-        x = tf.reshape(x, [-1, shape_list[1] * shape_list[2], shape_list[3]])
+        x = tf.reshape(x, [tf.shape(x)[0], tf.shape(x)[1] * shape_list[2], shape_list[3]])
         return x
diff --git a/network/utils.py b/network/utils.py
index 37d6525..0dc7872 100644
--- a/network/utils.py
+++ b/network/utils.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 # Author: kerlomz <kerlomz@gmail.com>
+import math
 import tensorflow as tf
 from config import *
-from tensorflow.python.training import moving_averages
 
 
 class NetworkUtils(object):
@@ -18,15 +18,43 @@ def zero_padding(x, pad=(3, 3)):
         return tf.pad(x, padding, 'CONSTANT')
 
     @staticmethod
-    def conv2d(x, name, filter_size, in_channels, out_channels, strides, padding='SAME'):
+    def msra_initializer(kl, dl):
+        """ MSRA weight initializer
+        (https://arxiv.org/pdf/1502.01852.pdf)
+        Keyword arguments:
+        kl -- kernel size
+        dl -- filter numbers
+        """
+
+        stddev = math.sqrt(2. / (kl ** 2 * dl))
+        return tf.truncated_normal_initializer(stddev=stddev)
+
+    def cnn_layers(self, inputs, filter_size, filters, strides):
+        x = inputs
+        for i in range(len(filter_size)):
+            with tf.variable_scope('unit-{}'.format(i + 1)):
+                x = self.conv2d(
+                    x=x,
+                    name='cnn-{}'.format(i + 1),
+                    filter_size=filter_size[i],
+                    in_channels=filters[i][0],
+                    out_channels=filters[i][1],
+                    strides=strides[i][0]
+                )
+                x = self.batch_norm('bn{}'.format(i + 1), x)
+                x = self.leaky_relu(x, 0.01)
+                x = self.max_pool(x, 2, strides[i][1])
+        return x
+
+    def conv2d(self, x, name, filter_size, in_channels, out_channels, strides, padding='SAME'):
         # n = filter_size * filter_size * out_channels
         with tf.variable_scope(name):
             kernel = tf.get_variable(
                 name='DW',
                 shape=[filter_size, filter_size, in_channels, out_channels],
                 dtype=tf.float32,
-                initializer=tf.contrib.layers.xavier_initializer()
-                # initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))
+                # initializer=tf.contrib.layers.xavier_initializer(),
+                initializer=self.msra_initializer(filter_size, in_channels),
             )
 
             b = tf.get_variable(
@@ -200,10 +228,14 @@ def leaky_relu(x, leakiness=0.0):
 
     @staticmethod
     def max_pool(x, ksize, strides):
+        if isinstance(ksize, int):
+            ksize = [ksize, ksize]
+        if isinstance(strides, int):
+            strides = [strides, strides]
         return tf.nn.max_pool(
             x,
-            ksize=[1, ksize, ksize, 1],
-            strides=[1, strides, strides, 1],
+            ksize=[1, ksize[0], ksize[1], 1],
+            strides=[1, strides[0], strides[1], 1],
             padding='SAME',
             name='max_pool'
         )
@@ -239,63 +271,48 @@ def stacked_bidirectional_rnn(rnn, num_units, num_layers, inputs, seq_lengths):
         return _inputs
 
     def batch_norm(self, name, x):
-        with tf.variable_scope(name):
-            params_shape = [x.get_shape()[-1]]
-            # offset
-            beta = tf.get_variable(
-                'beta',
-                params_shape,
-                tf.float32,
-                initializer=tf.constant_initializer(0.0, tf.float32)
-            )
-            # scale
-            gamma = tf.get_variable(
-                'gamma',
-                params_shape,
-                tf.float32,
-                initializer=tf.constant_initializer(1.0, tf.float32)
-            )
+        return tf.layers.batch_normalization(x, training=self.mode == RunMode.Trains, fused=True, name=name)
 
-            if self.mode == RunMode.Trains:
-                # Calculate the mean and standard deviation for each channel.
-                mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
-                # New or build batch average, standard deviation used in the test phase.
-                moving_mean = tf.get_variable(
-                    'moving_mean',
-                    params_shape, tf.float32,
-                    initializer=tf.constant_initializer(0.0, tf.float32),
-                    trainable=False
-                )
-                moving_variance = tf.get_variable(
-                    'moving_variance',
-                    params_shape, tf.float32,
-                    initializer=tf.constant_initializer(1.0, tf.float32),
-                    trainable=False
-                )
-                # Add update operation for batch mean and standard deviation (sliding average)
-                # moving_mean = moving_mean * decay + mean * (1 - decay)
-                # moving_variance = moving_variance * decay + variance * (1 - decay)
-                self.extra_train_ops.append(moving_averages.assign_moving_average(moving_mean, mean, 0.9))
-                self.extra_train_ops.append(moving_averages.assign_moving_average(moving_variance, variance, 0.9))
-            else:
-                # Obtain the batch mean and standard deviation accumulated during training.
-                mean = tf.get_variable(
-                    'moving_mean',
-                    params_shape, tf.float32,
-                    initializer=tf.constant_initializer(0.0, tf.float32),
-                    trainable=False
-                )
-                variance = tf.get_variable(
-                    'moving_variance',
-                    params_shape, tf.float32,
-                    initializer=tf.constant_initializer(1.0, tf.float32),
-                    trainable=False
-                )
-                # Add to histogram summary.
-                tf.summary.histogram(mean.op.name, mean)
-                tf.summary.histogram(variance.op.name, variance)
-
-            # BN Layer：((x-mean)/var)*gamma+beta
-            y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 0.001)
-            y.set_shape(x.get_shape())
-            return y
+    def conv_block(self, x, growth_rate, dropout_rate=None):
+        _x = self.batch_norm(name=None, x=x)
+        _x = self.leaky_relu(_x)
+
+        _x = tf.layers.conv2d(
+            inputs=_x,
+            filters=growth_rate,
+            kernel_size=3,
+            strides=(1, 1),
+            padding='SAME',
+            kernel_initializer=self.msra_initializer(3, growth_rate)
+        )
+        if dropout_rate is not None:
+            _x = tf.nn.dropout(_x, dropout_rate)
+        return _x
+
+    def dense_block(self, x, nb_layers, growth_rate, nb_filter, dropout_rate=0.2):
+        for i in range(nb_layers):
+            cb = self.conv_block(x, growth_rate, dropout_rate)
+            x = tf.concat([x, cb], 3)
+            nb_filter += growth_rate
+        return x, nb_filter
+
+    def transition_block(self, x, filters, dropout_kp=None, pool_type=1):
+        _x = self.batch_norm(name=None, x=x)
+        _x = self.leaky_relu(_x)
+        _x = tf.layers.conv2d(
+            inputs=_x,
+            filters=filters,
+            kernel_size=1,
+            strides=(1, 1),
+            padding='SAME',
+            kernel_initializer=self.msra_initializer(3, filters)
+        )
+        if dropout_kp is not None:
+            _x = tf.nn.dropout(_x, dropout_kp)
+        if pool_type == 2:
+            _x = tf.nn.avg_pool(_x, [1, 2, 2, 1], [1, 2, 2, 1], "VALID")
+        elif pool_type == 1:
+            _x = tf.nn.avg_pool(_x, [1, 2, 2, 1], [1, 2, 1, 1], "SAME")
+        elif pool_type == 3:
+            _x = tf.nn.avg_pool(_x, [1, 2, 2, 1], [1, 1, 2, 1], "SAME")
+        return _x, filters
diff --git a/optimizer/AdaBound.py b/optimizer/AdaBound.py
new file mode 100644
index 0000000..4cee26b
--- /dev/null
+++ b/optimizer/AdaBound.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Author: kerlomz <kerlomz@gmail.com>
+import tensorflow as tf
+from distutils.version import StrictVersion
+from tensorflow.python.eager import context
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import optimizer
+from tensorflow.python.ops.clip_ops import clip_by_value
+
+"""Implements AdaBound algorithm.
+    It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
+    Arguments:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): Adam learning rate (default: 1e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square (default: (0.9, 0.999))
+        final_lr (float, optional): final (SGD) learning rate (default: 0.1)
+        gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
+    .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
+        https://openreview.net/forum?id=Bkg3g2R9FX
+    """
+
+
+class AdaBoundOptimizer(optimizer.Optimizer):
+    def __init__(self, learning_rate=0.001, final_lr=0.1, beta1=0.9, beta2=0.999,
+                 gamma=1e-3, epsilon=1e-8, amsbound=False,
+                 use_locking=False, name="AdaBound"):
+        super(AdaBoundOptimizer, self).__init__(use_locking, name)
+        self._lr = learning_rate
+        self._final_lr = final_lr
+        self._beta1 = beta1
+        self._beta2 = beta2
+        self._epsilon = epsilon
+
+        self._gamma = gamma
+        self._amsbound = amsbound
+
+        self._lr_t = None
+        self._beta1_t = None
+        self._beta2_t = None
+        self._epsilon_t = None
+
+    def _create_slots(self, var_list):
+        first_var = min(var_list, key=lambda x: x.name)
+        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
+            graph = None if context.executing_eagerly() else ops.get_default_graph()
+        else:
+            graph = ops.get_default_graph()
+        create_new = self._get_non_slot_variable("beta1_power", graph) is None
+        if not create_new and context.in_graph_mode():
+            create_new = (self._get_non_slot_variable("beta1_power", graph).graph is not first_var.graph)
+
+        if create_new:
+            self._create_non_slot_variable(initial_value=self._beta1,
+                                           name="beta1_power",
+                                           colocate_with=first_var)
+            self._create_non_slot_variable(initial_value=self._beta2,
+                                           name="beta2_power",
+                                           colocate_with=first_var)
+            self._create_non_slot_variable(initial_value=self._gamma,
+                                           name="gamma_multi",
+                                           colocate_with=first_var)
+        # Create slots for the first and second moments.
+        for v in var_list :
+            self._zeros_slot(v, "m", self._name)
+            self._zeros_slot(v, "v", self._name)
+            self._zeros_slot(v, "vhat", self._name)
+
+    def _prepare(self):
+        self._lr_t = ops.convert_to_tensor(self._lr)
+        self._base_lr_t = ops.convert_to_tensor(self._lr)
+        self._beta1_t = ops.convert_to_tensor(self._beta1)
+        self._beta2_t = ops.convert_to_tensor(self._beta2)
+        self._epsilon_t = ops.convert_to_tensor(self._epsilon)
+        self._gamma_t = ops.convert_to_tensor(self._gamma)
+
+    def _apply_dense(self, grad, var):
+        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
+            graph = None if context.executing_eagerly() else ops.get_default_graph()
+        else:
+            graph = ops.get_default_graph()
+        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype)
+        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype)
+        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
+        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
+        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
+        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
+        gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype)
+
+        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
+        final_lr = self._final_lr * lr_t / base_lr_t
+        lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.))
+        upper_bound = final_lr * (1. + 1. / (gamma_multi))
+
+        # m_t = beta1 * m + (1 - beta1) * g_t
+        m = self.get_slot(var, "m")
+        m_scaled_g_values = grad * (1 - beta1_t)
+        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)
+
+        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
+        v = self.get_slot(var, "v")
+        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
+        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)
+
+        # amsgrad
+        vhat = self.get_slot(var, "vhat")
+        if self._amsbound :
+            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
+            v_sqrt = math_ops.sqrt(vhat_t)
+        else:
+            vhat_t = state_ops.assign(vhat, vhat)
+            v_sqrt = math_ops.sqrt(v_t)
+
+        # Compute the bounds
+        step_size_bound = step_size / (v_sqrt + epsilon_t)
+        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)
+
+        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)
+        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
+
+    def _resource_apply_dense(self, grad, var):
+        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
+            graph = None if context.executing_eagerly() else ops.get_default_graph()
+        else:
+            graph = ops.get_default_graph()
+        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype)
+        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype)
+        lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
+        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
+        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
+        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
+        epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)
+        gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype)
+
+        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
+        final_lr = self._final_lr * lr_t / base_lr_t
+        lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.))
+        upper_bound = final_lr * (1. + 1. / (gamma_multi))
+
+        # m_t = beta1 * m + (1 - beta1) * g_t
+        m = self.get_slot(var, "m")
+        m_scaled_g_values = grad * (1 - beta1_t)
+        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)
+
+        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
+        v = self.get_slot(var, "v")
+        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
+        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)
+
+        # amsgrad
+        vhat = self.get_slot(var, "vhat")
+        if self._amsbound:
+            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
+            v_sqrt = math_ops.sqrt(vhat_t)
+        else:
+            vhat_t = state_ops.assign(vhat, vhat)
+            v_sqrt = math_ops.sqrt(v_t)
+
+        # Compute the bounds
+        step_size_bound = step_size / (v_sqrt + epsilon_t)
+        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)
+
+        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)
+
+        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
+
+    def _apply_sparse_shared(self, grad, var, indices, scatter_add):
+        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
+            graph = None if context.executing_eagerly() else ops.get_default_graph()
+        else:
+            graph = ops.get_default_graph()
+        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype)
+        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype)
+        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
+        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
+        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
+        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
+        gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype)
+
+        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
+        final_lr = self._final_lr * lr_t / base_lr_t
+        lower_bound = final_lr * (1. - 1. / (gamma_t + 1.))
+        upper_bound = final_lr * (1. + 1. / (gamma_t))
+
+        # m_t = beta1 * m + (1 - beta1) * g_t
+        m = self.get_slot(var, "m")
+        m_scaled_g_values = grad * (1 - beta1_t)
+        m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
+        with ops.control_dependencies([m_t]):
+            m_t = scatter_add(m, indices, m_scaled_g_values)
+
+        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
+        v = self.get_slot(var, "v")
+        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
+        v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
+        with ops.control_dependencies([v_t]):
+            v_t = scatter_add(v, indices, v_scaled_g_values)
+
+        # amsgrad
+        vhat = self.get_slot(var, "vhat")
+        if self._amsbound:
+            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
+            v_sqrt = math_ops.sqrt(vhat_t)
+        else:
+            vhat_t = state_ops.assign(vhat, vhat)
+            v_sqrt = math_ops.sqrt(v_t)
+
+        # Compute the bounds
+        step_size_bound = step_size / (v_sqrt + epsilon_t)
+        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)
+
+        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)
+
+        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
+
+    def _apply_sparse(self, grad, var):
+        return self._apply_sparse_shared(
+            grad.values, var, grad.indices,
+            lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
+                x, i, v, use_locking=self._use_locking))
+
+    def _resource_scatter_add(self, x, i, v):
+        with ops.control_dependencies(
+                [resource_variable_ops.resource_scatter_add(x, i, v)]):
+            return x.value()
+
+    def _resource_apply_sparse(self, grad, var, indices):
+        return self._apply_sparse_shared(
+            grad, var, indices, self._resource_scatter_add)
+
+    def _finish(self, update_ops, name_scope):
+        # Update the power accumulators.
+        with ops.control_dependencies(update_ops):
+            if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
+                graph = None if context.executing_eagerly() else ops.get_default_graph()
+            else:
+                graph = ops.get_default_graph()
+            beta1_power = self._get_non_slot_variable("beta1_power", graph=graph)
+            beta2_power = self._get_non_slot_variable("beta2_power", graph=graph)
+            gamma_multi = self._get_non_slot_variable("gamma_multi", graph=graph)
+            with ops.colocate_with(beta1_power):
+                update_beta1 = beta1_power.assign(
+                    beta1_power * self._beta1_t,
+                    use_locking=self._use_locking)
+                update_beta2 = beta2_power.assign(
+                    beta2_power * self._beta2_t,
+                    use_locking=self._use_locking)
+                update_gamma = gamma_multi.assign(
+                    gamma_multi + self._gamma_t,
+                    use_locking=self._use_locking)
+        return control_flow_ops.group(*update_ops + [update_beta1, update_beta2, update_gamma], name=name_scope)
diff --git a/optimizer/__init__.py b/optimizer/__init__.py
new file mode 100644
index 0000000..6c85277
--- /dev/null
+++ b/optimizer/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Author: kerlomz <kerlomz@gmail.com>
\ No newline at end of file
diff --git a/predict_testing.py b/predict_testing.py
index e9c6c08..b49c097 100644
--- a/predict_testing.py
+++ b/predict_testing.py
@@ -6,8 +6,11 @@
 import numpy as np
 import PIL.Image as PIL_Image
 import tensorflow as tf
+from importlib import import_module
 from config import *
+from constants import RunMode
 from pretreatment import preprocessing
+from framework import GraphOCR
 
 
 def get_image_batch(img_bytes):
@@ -28,7 +31,12 @@ def load_image(image_bytes):
 
         im = np.array(pil_image)
         im = preprocessing(im, BINARYZATION, SMOOTH, BLUR).astype(np.float32)
-        im = cv2.resize(im, (RESIZE[0], RESIZE[1]))
+        if RESIZE[0] == -1:
+            ratio = RESIZE[1] / size[1]
+            resize_width = int(ratio * size[0])
+            im = cv2.resize(im, (resize_width, RESIZE[1]))
+        else:
+            im = cv2.resize(im, (RESIZE[0], RESIZE[1]))
         im = im.swapaxes(0, 1)
         return (im[:, :, np.newaxis] if IMAGE_CHANNEL == 1 else im[:, :]) / 255.
 
@@ -58,6 +66,8 @@ def predict_func(image_batch, _sess, dense_decoded, op_input):
 
 if __name__ == '__main__':
 
+    if WARP_CTC:
+        import_module('warpctc_tensorflow')
     graph = tf.Graph()
     tf_checkpoint = tf.train.latest_checkpoint(MODEL_PATH)
     sess = tf.Session(
@@ -66,17 +76,29 @@ def predict_func(image_batch, _sess, dense_decoded, op_input):
             # allow_soft_placement=True,
             # log_device_placement=True,
             gpu_options=tf.GPUOptions(
+                allocator_type='BFC',
                 # allow_growth=True,  # it will cause fragmentation.
-                per_process_gpu_memory_fraction=0.1
+                per_process_gpu_memory_fraction=0.01
             ))
     )
     graph_def = graph.as_graph_def()
 
     with graph.as_default():
         sess.run(tf.global_variables_initializer())
+        # with tf.gfile.GFile(COMPILE_MODEL_PATH.replace('.pb', '_{}.pb'.format(int(0.95 * 10000))), "rb") as f:
+        #     graph_def_file = f.read()
+        # graph_def.ParseFromString(graph_def_file)
+        # print('{}.meta'.format(tf_checkpoint))
+        model = GraphOCR(
+            RunMode.Predict,
+            NETWORK_MAP[NEU_CNN],
+            NETWORK_MAP[NEU_RECURRENT]
+        )
+        model.build_graph()
+        saver = tf.train.Saver(tf.global_variables())
+
+        saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH))
         _ = tf.import_graph_def(graph_def, name="")
-        saver = tf.train.import_meta_graph('{}.meta'.format(tf_checkpoint))
-        saver.restore(sess, tf_checkpoint)
 
     dense_decoded_op = sess.graph.get_tensor_by_name("dense_decoded:0")
     x_op = sess.graph.get_tensor_by_name('input:0')
diff --git a/trains.py b/trains.py
index e742e22..d172e24 100644
--- a/trains.py
+++ b/trains.py
@@ -3,7 +3,6 @@
 # Author: kerlomz <kerlomz@gmail.com>
 import time
 import random
-import logging
 import numpy as np
 import tensorflow as tf
 import framework
@@ -13,9 +12,7 @@
 from PIL import ImageFile
 
 ImageFile.LOAD_TRUNCATED_IMAGES = True
-
-logger = logging.getLogger('Training for OCR using {}+{}+CTC'.format(NEU_CNN, NEU_RECURRENT))
-logger.setLevel(logging.INFO)
+tf.logging.set_verbosity(tf.logging.INFO)
 
 
 def compile_graph(acc):
@@ -30,7 +27,8 @@ def compile_graph(acc):
         )
         model.build_graph()
         input_graph_def = sess.graph.as_graph_def()
-        saver = tf.train.Saver()
+        saver = tf.train.Saver(var_list=tf.global_variables())
+        tf.logging.info(tf.train.latest_checkpoint(MODEL_PATH))
         saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH))
 
     output_graph_def = convert_variables_to_constants(
@@ -40,7 +38,7 @@ def compile_graph(acc):
     )
 
     last_compile_model_path = COMPILE_MODEL_PATH.replace('.pb', '_{}.pb'.format(int(acc * 10000)))
-    with tf.gfile.FastGFile(last_compile_model_path, mode='wb') as gf:
+    with tf.gfile.GFile(last_compile_model_path, mode='wb') as gf:
         gf.write(output_graph_def.SerializeToString())
 
     generate_config(acc)
@@ -50,11 +48,11 @@ def train_process(mode=RunMode.Trains):
     model = framework.GraphOCR(mode, NETWORK_MAP[NEU_CNN], NETWORK_MAP[NEU_RECURRENT])
     model.build_graph()
 
-    print('Loading Trains DataSet...')
+    tf.logging.info('Loading Trains DataSet...')
     train_feeder = utils.DataIterator(mode=RunMode.Trains)
     if TRAINS_USE_TFRECORDS:
         train_feeder.read_sample_from_tfrecords(TRAINS_PATH)
-        print('Loading Test DataSet...')
+        tf.logging.info('Loading Test DataSet...')
         test_feeder = utils.DataIterator(mode=RunMode.Test)
         test_feeder.read_sample_from_tfrecords(TEST_PATH)
     else:
@@ -64,7 +62,7 @@ def train_process(mode=RunMode.Trains):
                 origin_list += [os.path.join(trains_path, trains) for trains in os.listdir(trains_path)]
         else:
             origin_list = [os.path.join(TRAINS_PATH, trains) for trains in os.listdir(TRAINS_PATH)]
-        random.shuffle(origin_list)
+        np.random.shuffle(origin_list)
         if not HAS_TEST_SET:
             test_list = origin_list[:TEST_SET_NUM]
             trains_list = origin_list[TEST_SET_NUM:]
@@ -75,15 +73,15 @@ def train_process(mode=RunMode.Trains):
                     test_list += [os.path.join(test_path, test) for test in os.listdir(test_path)]
             else:
                 test_list = [os.path.join(TEST_PATH, test) for test in os.listdir(TEST_PATH)]
-            random.shuffle(test_list)
+            np.random.shuffle(test_list)
             trains_list = origin_list
         train_feeder.read_sample_from_files(trains_list)
-        print('Loading Test DataSet...')
+        tf.logging.info('Loading Test DataSet...')
         test_feeder = utils.DataIterator(mode=RunMode.Test)
         test_feeder.read_sample_from_files(test_list)
 
-    print('Total {} Trains DataSets'.format(train_feeder.size))
-    print('Total {} Test DataSets'.format(test_feeder.size))
+    tf.logging.info('Total {} Trains DataSets'.format(train_feeder.size))
+    tf.logging.info('Total {} Test DataSets'.format(test_feeder.size))
     if test_feeder.size >= train_feeder.size:
         exception("The number of training sets cannot be less than the test set.", )
 
@@ -97,10 +95,11 @@ def train_process(mode=RunMode.Trains):
     num_batches_per_epoch = int(num_train_samples / BATCH_SIZE)
 
     config = tf.ConfigProto(
-        allow_soft_placement=True,
+        # allow_soft_placement=True,
         log_device_placement=False,
         gpu_options=tf.GPUOptions(
-            # allow_growth=True,  # it will cause fragmentation.
+            allocator_type='BFC',
+            allow_growth=True,  # it will cause fragmentation.
             per_process_gpu_memory_fraction=GPU_USAGE)
     )
     accuracy = 0
@@ -116,14 +115,12 @@ def train_process(mode=RunMode.Trains):
             saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH))
         except ValueError:
             pass
-
-        print('Start training...')
+        tf.logging.info('Start training...')
 
         while 1:
             shuffle_trains_idx = np.random.permutation(num_train_samples)
-            train_cost = 0
             start_time = time.time()
-            _avg_train_cost = 0
+            last_train_avg_cost = 0
             for cur_batch in range(num_batches_per_epoch):
                 batch_time = time.time()
                 index_list = [
@@ -131,32 +128,39 @@ def train_process(mode=RunMode.Trains):
                     range(cur_batch * BATCH_SIZE, (cur_batch + 1) * BATCH_SIZE)
                 ]
                 if TRAINS_USE_TFRECORDS:
-                    batch_inputs, batch_seq_len, batch_labels = train_feeder.generate_batch_by_tfrecords(sess)
+                    classified_batch = train_feeder.generate_batch_by_tfrecords(sess)
                 else:
-                    batch_inputs, batch_seq_len, batch_labels = train_feeder.generate_batch_by_files(index_list)
-
-                feed = {
-                    model.inputs: batch_inputs,
-                    model.labels: batch_labels,
-                }
-
-                summary_str, batch_cost, step, _ = sess.run(
-                    [model.merged_summary, model.cost, model.global_step, model.train_op],
-                    feed_dict=feed
-                )
-                train_cost += batch_cost * BATCH_SIZE
-                avg_train_cost = train_cost / ((cur_batch + 1) * BATCH_SIZE)
-
-                train_writer.add_summary(summary_str, step)
-
-                if step % 100 == 0 and step != 0:
-                    print('Step: {} Time: {:.3f}, Cost = {:.5f}'.format(step, time.time() - batch_time, avg_train_cost))
-
-                if step % TRAINS_SAVE_STEPS == 0 and step != 0:
-                    saver.save(sess, SAVE_MODEL, global_step=step)
-                    logger.info('save checkpoint at step {0}', format(step))
+                    classified_batch = train_feeder.generate_batch_by_files(index_list)
+                step = 0
+                class_num = len(classified_batch)
+                avg_cost = 0
+                for index, (shape, batch) in enumerate(classified_batch.items()):
+                    batch_inputs, batch_seq_len, batch_labels = batch
+                    feed = {
+                        model.inputs: batch_inputs,
+                        model.labels: batch_labels,
+                    }
 
-                if step % TRAINS_VALIDATION_STEPS == 0 and step != 0:
+                    summary_str, batch_cost, step, _ = sess.run(
+                        [model.merged_summary, model.cost, model.global_step, model.train_op],
+                        feed_dict=feed
+                    )
+                    avg_cost += batch_cost
+                    last_train_avg_cost = avg_cost / class_num
+                    train_writer.add_summary(summary_str, step)
+                    if step % 100 == index and step not in range(class_num):
+                        tf.logging.info('Step: {} Time: {:.3f} sec/batch, Cost = {:.5f}, {}-BatchSize: {}'.format(
+                            step,
+                            time.time() - batch_time,
+                            batch_cost,
+                            shape,
+                            len(batch_inputs)
+                        ))
+                    if step % TRAINS_SAVE_STEPS == index and index == (class_num - 1) and step not in range(class_num):
+                        saver.save(sess, SAVE_MODEL, global_step=step)
+                        # tf.logging.info('save checkpoint at step {0}'.format(step))
+
+                if step % TRAINS_VALIDATION_STEPS == (class_num - 1) and step not in range(class_num):
                     shuffle_test_idx = np.random.permutation(num_test_samples)
                     batch_time = time.time()
                     index_test = [
@@ -164,34 +168,44 @@ def train_process(mode=RunMode.Trains):
                         range(cur_batch * TEST_BATCH_SIZE, (cur_batch + 1) * TEST_BATCH_SIZE)
                     ]
                     if TRAINS_USE_TFRECORDS:
-                        test_inputs, batch_seq_len, test_labels = test_feeder.generate_batch_by_tfrecords(sess)
+                        classified_batch = test_feeder.generate_batch_by_tfrecords(sess)
                     else:
-                        test_inputs, batch_seq_len, test_labels = test_feeder.generate_batch_by_files(index_test)
-
-                    val_feed = {
-                        model.inputs: test_inputs,
-                        model.labels: test_labels
-                    }
-                    dense_decoded, lr = sess.run(
-                        [model.dense_decoded, model.lrn_rate],
-                        feed_dict=val_feed
-                    )
+                        classified_batch = test_feeder.generate_batch_by_files(index_test)
+
+                    all_dense_decoded = []
+                    lr = 0
+
+                    for index, (shape, batch) in enumerate(classified_batch.items()):
+                        test_inputs, batch_seq_len, test_labels = batch
+                        val_feed = {
+                            model.inputs: test_inputs,
+                            model.labels: test_labels
+                        }
+                        dense_decoded, sub_lr = sess.run(
+                            [model.dense_decoded, model.lrn_rate],
+                            feed_dict=val_feed
+                        )
+                        all_dense_decoded += dense_decoded.tolist()
+                        lr += sub_lr
                     accuracy = utils.accuracy_calculation(
-                        test_feeder.labels(None if TRAINS_USE_TFRECORDS else index_test),
-                        dense_decoded,
+                        test_feeder.labels,
+                        all_dense_decoded,
                         ignore_value=[0, -1],
                     )
                     log = "Epoch: {}, Step: {}, Accuracy = {:.4f}, Cost = {:.5f}, " \
-                          "Time = {:.3f}, LearningRate: {}"
-                    print(log.format(
-                        epoch_count, step, accuracy, avg_train_cost, time.time() - batch_time, lr
+                          "Time = {:.3f} sec/batch, LearningRate: {}"
+                    tf.logging.info(log.format(
+                        epoch_count,
+                        step,
+                        accuracy,
+                        last_train_avg_cost, time.time() - batch_time, lr / len(classified_batch)
                     ))
-                    _avg_train_cost = avg_train_cost
-                    if accuracy >= TRAINS_END_ACC and epoch_count >= TRAINS_END_EPOCHS and avg_train_cost <= TRAINS_END_COST:
+
+                    if accuracy >= TRAINS_END_ACC and epoch_count >= TRAINS_END_EPOCHS and last_train_avg_cost <= TRAINS_END_COST:
                         break
-            if accuracy >= TRAINS_END_ACC and epoch_count >= TRAINS_END_EPOCHS and _avg_train_cost <= TRAINS_END_COST:
+            if accuracy >= TRAINS_END_ACC and epoch_count >= TRAINS_END_EPOCHS and last_train_avg_cost <= TRAINS_END_COST:
                 compile_graph(accuracy)
-                print('Total Time: {}'.format(time.time() - start_time))
+                tf.logging.info('Total Time: {} sec.'.format(time.time() - start_time))
                 break
             epoch_count += 1
 
@@ -207,7 +221,7 @@ def generate_config(acc):
 def main(_):
     init()
     train_process()
-    print('Training completed.')
+    tf.logging.info('Training completed.')
     pass
 
 
diff --git a/tutorial.py b/tutorial.py
index ac8ae95..fe74166 100644
--- a/tutorial.py
+++ b/tutorial.py
@@ -3,29 +3,44 @@
 # Author: kerlomz <kerlomz@gmail.com>
 import os
 import json
-import PIL.Image as pil_image
+import PIL.Image as pilImage
+from constants import *
 
+# - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
+# -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET, ALPHANUMERIC_LOWER_MIX_CHINESE_3500]
+charset = SimpleCharset.ALPHANUMERIC_LOWER
 
-class RecurrentNetwork:
-    LSTM = 'LSTM'
-    BLSTM = 'BLSTM'
-    SRU = 'SRU'
-    BSRU = 'BSRU'
-
+cnn_network = CNNNetwork.CNN5
+recurrent_network = RecurrentNetwork.BLSTM
+optimizer = Optimizer.AdaBound
 
-charset = "ALPHANUMERIC_LOWER"
-network = RecurrentNetwork.BLSTM
 trains_path = [
     r"D:\TrainSet\***",
 ]
 
+test_num = 500
+hidden_num = 64
+beam_width = 1
+learning_rate = None
+
+name_prefix = None
+name_suffix = None
+name_prefix = name_prefix if name_prefix else "tutorial"
+name_suffix = '-' + str(name_suffix) if name_suffix else ''
+
 model = """
+# - requirement.txt  -  GPU: tensorflow-gpu, CPU: tensorflow
+# - If you use the GPU version, you need to install some additional applications.
+System:
+  DeviceUsage: 0.7
+  
 # ModelName: Corresponding to the model file in the model directory,
 # - such as YourModelName.pb, fill in YourModelName here.
 # CharSet: Provides a default optional built-in solution:
 # - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
 # -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET, ALPHANUMERIC_LOWER_MIX_CHINESE_3500]
 # - Or you can use your own customized character set like: ['a', '1', '2'].
+# CharMaxLength: Maximum length of characters， used for label padding.
 # CharExclude: CharExclude should be a list, like: ['a', '1', '2']
 # - which is convenient for users to freely combine character sets.
 # - If you don't want to manually define the character set manually,
@@ -47,46 +62,142 @@ class RecurrentNetwork:
 # Blur: [-1: Off, >0: On].
 # Resize: [WIDTH, HEIGHT]
 # - If the image size is too small, the training effect will be poor and you need to zoom in.
+# ReplaceTransparent: [True, False]
+# - True: Convert transparent images in RGBA format to opaque RGB format,
+# - False: Keep the original image
 Pretreatment:
   Binaryzation: -1
   Smoothing: -1
   Blur: -1
   Resize: @resize
+  ReplaceTransparent: True
 
-Trains:
-#  TrainsPath: './dataset/@model_name_trains.tfrecords'
-#  TestPath: './dataset/@model_name_test.tfrecords'
-  TrainsPath: @trains_path
+# CNNNetwork: [CNN5, ResNet, DenseNet]
+# RecurrentNetwork: [BLSTM, LSTM, SRU, BSRU, GRU]
+# - The recommended configuration is CNN5+BLSTM / ResNet+BLSTM
+# HiddenNum: [64, 128, 256]
+# - This parameter indicates the number of nodes used to remember and store past states.
+# Optimizer: Loss function algorithm for calculating gradient.
+# - [AdaBound, Adam, Momentum, SGD, AdaGrad, RMSProp]
+NeuralNet:
+  CNNNetwork: @cnn_network
+  RecurrentNetwork: @recurrent_network
+  HiddenNum: @hidden_num
+  KeepProb: 0.98
+  Optimizer: @optimizer
+  PreprocessCollapseRepeated: False
+  CTCMergeRepeated: True
+  CTCBeamWidth: @beam_width
+  CTCTopPaths: 1
+  WarpCTC: False
   
+# TrainsPath and TestPath: The local absolute path of your training and testing set.
+# DatasetPath: Package a sample of the TFRecords format from this path.
+# TrainRegex and TestRegex: Default matching apple_20181010121212.jpg file.
+# - The Default is .*?(?=_.*\.)
+# TestSetNum: This is an optional parameter that is used when you want to extract some of the test set
+# - from the training set when you are not preparing the test set separately.
+# SavedSteps: A Session.run() execution is called a Step,
+# - Used to save training progress, Default value is 100.
+# ValidationSteps: Used to calculate accuracy, Default value is 500.
+# TestSetNum: The number of test sets, if an automatic allocation strategy is used (TestPath not set).
+# EndAcc: Finish the training when the accuracy reaches [EndAcc*100]% and other conditions.
+# EndCost: Finish the training when the cost reaches EndCost and other conditions.
+# EndEpochs: Finish the training when the epoch is greater than the defined epoch and other conditions.
+# BatchSize: Number of samples selected for one training step.
+# TestBatchSize: Number of samples selected for one validation step.
+# LearningRate: Recommended value[0.01: MomentumOptimizer/AdamOptimizer, 0.001: AdaBoundOptimizer]
+Trains:
+  TrainsPath: './dataset/@model_name_trains.tfrecords'
+  TestPath: './dataset/@model_name_test.tfrecords'
+  DatasetPath: @trains_path
+  TrainRegex: '.*?(?=_)'
+  TestSetNum: @test_num
+  SavedSteps: 100
+  ValidationSteps: 500
+  EndAcc: 0.95
+  EndCost: 0.1
+  EndEpochs: 2
+  BatchSize: 128
+  TestBatchSize: 300
+  LearningRate: @learning_rate
+  DecayRate: 0.98
+  DecaySteps: 10000
 """
 
-# - [ALPHANUMERIC, ALPHANUMERIC_LOWER, ALPHANUMERIC_UPPER,
-# -- NUMERIC, ALPHABET_LOWER, ALPHABET_UPPER, ALPHABET, ALPHANUMERIC_LOWER_MIX_CHINESE_3500]
-
 trains_path = [i.replace("\\", "/") for i in trains_path]
 file_name = os.listdir(trains_path[0])[0]
-size = pil_image.open(os.path.join(trains_path[0], file_name)).size
+size = pilImage.open(os.path.join(trains_path[0], file_name)).size
 
 width = size[0]
 height = size[1]
+
 size_str = "{}x{}".format(width, height)
-if width > 180 or width < 120:
+if width > 160 or width < 120:
     r_height = int(height * 150 / width)
 else:
     r_height = height
 resize = "[{}, {}]".format(width if r_height == height else 150, r_height)
 
-model_name = 'sell-mix-CNN5{}-{}'.format(network, size_str)
-trains_path = json.dumps(trains_path, ensure_ascii=False).replace("]", "  ]")
-result = model.replace("@trains_path", trains_path).replace("@model_name", model_name).replace("@resize", resize).replace("@size_str", size_str).replace("@width", str(width)).replace("@height", str(height)).replace("@charset", charset)
+
+model_name = '{}-mix-{}{}-{}-H{}{}'.format(
+    name_prefix,
+    cnn_network.value,
+    recurrent_network.value,
+    size_str,
+    hidden_num,
+    name_suffix
+)
+trains_path = json.dumps(trains_path, ensure_ascii=False, indent=2).replace('\n', '\n  ')
+
+BEST_LEARNING_RATE = {
+    Optimizer.AdaBound: 0.001,
+    Optimizer.Momentum: 0.01,
+    Optimizer.Adam: 0.01,
+    Optimizer.SGD: 0.01,
+    Optimizer.RMSProp: 0.01,
+    Optimizer.AdaGrad: 0.01,
+}
+
+learning_rate = BEST_LEARNING_RATE[optimizer] if not learning_rate else learning_rate
+
+
+result = model.replace(
+    "@trains_path", trains_path
+).replace(
+    "@model_name", model_name
+).replace(
+    "@resize", resize
+).replace(
+    "@size_str", size_str
+).replace(
+    "@width", str(width)
+).replace(
+    "@height", str(height)
+).replace(
+    "@charset", str(charset.value) if isinstance(charset, SimpleCharset) else str(charset)
+).replace(
+    "@test_num", str(test_num)
+).replace(
+    "@optimizer", str(optimizer.value)
+).replace(
+    "@hidden_num", str(hidden_num)
+).replace(
+    "@cnn_network", str(cnn_network.value)
+).replace(
+    "@recurrent_network", str(recurrent_network.value)
+).replace(
+    "@beam_width", str(beam_width)
+).replace(
+    "@learning_rate", str(learning_rate)
+)
 print(result)
 
+
 with open("model.yaml".format(size_str), "w", encoding="utf8") as f:
     f.write(result)
 
-from make_dataset import run
+from make_dataset import make_dataset
 from trains import main
-run()
-with open("model.yaml".format(size_str), "w") as f:
-    f.write("\n".join(result.split("\n")[:-3]).replace("#  TrainsPath", "  TrainsPath").replace("#  TestPath", "  TestPath"))
+make_dataset()
 main(None)
\ No newline at end of file
diff --git a/utils.py b/utils.py
index 537b075..c1b9f36 100644
--- a/utils.py
+++ b/utils.py
@@ -4,10 +4,12 @@
 import io
 import PIL.Image
 import cv2
+import random
 import numpy as np
 import tensorflow as tf
-
+from tensorflow import keras
 from config import *
+from constants import RunMode
 from pretreatment import preprocessing
 
 PATH_MAP = {
@@ -15,11 +17,6 @@
     RunMode.Test: TEST_PATH
 }
 
-REGEX_MAP = {
-    RunMode.Trains: TRAINS_REGEX,
-    RunMode.Test: TEST_REGEX
-}
-
 
 def encode_maps():
     return {char: i for i, char in enumerate(GEN_CHAR_SET, 0)}
@@ -38,7 +35,10 @@ def __init__(self, mode: RunMode):
         self.next_element = None
         self.image_path = []
         self.label_list = []
+        self._label_list = []
         self._size = 0
+        self.max_length = 0
+        self.is_first = True
 
     @staticmethod
     def _encoder(code):
@@ -49,10 +49,10 @@ def _encoder(code):
             if not k or not v:
                 break
             code.replace(k, v)
-        code = code.lower() if 'LOWER' in CHAR_SET or not CASE_SENSITIVE else code
+        code = code.lower() if 'LOWER' in CHAR_SET else code
         code = code.upper() if 'UPPER' in CHAR_SET else code
         try:
-            return [SPACE_INDEX if code == SPACE_TOKEN else encode_maps()[c] for c in list(code)]
+            return [encode_maps()[c] for c in list(code)]
         except KeyError as e:
             exception(
                 'The sample label {} contains invalid charset: {}.'.format(
@@ -64,16 +64,16 @@ def read_sample_from_files(self, data_set=None):
         if data_set:
             self.image_path = data_set
             try:
-                self.label_list = [
-                    self._encoder(re.search(REGEX_MAP[self.mode], i.split(PATH_SPLIT)[-1]).group()) for i in data_set
+                self._label_list = [
+                    self._encoder(re.search(TRAINS_REGEX, i.split(PATH_SPLIT)[-1]).group()) for i in data_set
                 ]
             except AttributeError as e:
                 regex_not_found = "group" in e.args[0]
                 if regex_not_found:
                     exception(
                         "Configured {} is '{}', it may be wrong and unable to get label properly.".format(
-                            "TrainRegex" if self.mode == RunMode.Trains else "TestRegex",
-                            TRAINS_REGEX if self.mode == RunMode.Trains else TEST_REGEX
+                            "TrainRegex",
+                            TRAINS_REGEX
                         ),
                         ConfigException.GET_LABEL_REGEX_ERROR
                     )
@@ -86,13 +86,13 @@ def read_sample_from_files(self, data_set=None):
                     self.image_path.append(image_name)
                     # Get the label from the file name based on the regular expression.
                     code = re.search(
-                        REGEX_MAP[self.mode], image_name.split(PATH_SPLIT)[-1]
+                        TRAINS_REGEX, image_name.split(PATH_SPLIT)[-1]
                     )
                     if not code:
                         exception(
                             "Configured {} is '{}', it may be wrong and unable to get label properly.".format(
-                                "TrainRegex" if self.mode == RunMode.Trains else "TestRegex",
-                                TRAINS_REGEX if self.mode == RunMode.Trains else TEST_REGEX
+                                "TrainRegex",
+                                TRAINS_REGEX
                             ),
                             ConfigException.GET_LABEL_REGEX_ERROR
                         )
@@ -100,8 +100,8 @@ def read_sample_from_files(self, data_set=None):
                     # The manual verification code platform is not case sensitive,
                     # - it will affect the accuracy of the training set.
                     # Here is a case conversion based on the selected character set.
-                    self.label_list.append(self._encoder(code))
-        self._size = len(self.label_list)
+                    self._label_list.append(self._encoder(code))
+        self._size = len(self._label_list)
 
     @staticmethod
     def parse_example(serial_example):
@@ -124,8 +124,13 @@ def read_sample_from_tfrecords(self, path):
         min_after_dequeue = 1000
         batch = BATCH_SIZE if self.mode == RunMode.Trains else TEST_BATCH_SIZE
 
-        dataset_train = tf.data.TFRecordDataset(path).map(self.parse_example)
-        dataset_train = dataset_train.shuffle(min_after_dequeue).batch(batch).repeat()
+        dataset_train = tf.data.TFRecordDataset(
+            filenames=path,
+            # num_parallel_reads=20
+        ).map(self.parse_example)
+        dataset_train = dataset_train.shuffle(
+            min_after_dequeue
+        ).batch(batch).repeat()
         iterator = dataset_train.make_one_shot_iterator()
         self.next_element = iterator.get_next()
 
@@ -133,14 +138,12 @@ def read_sample_from_tfrecords(self, path):
     def size(self):
         return self._size
 
-    def labels(self, index):
-        if (TRAINS_USE_TFRECORDS and self.mode == RunMode.Trains) or (TEST_USE_TFRECORDS and self.mode == RunMode.Test):
-            return self.label_list
-        else:
-            return [self.label_list[i] for i in index]
+    @property
+    def labels(self):
+        return self.label_list
 
     @staticmethod
-    def _image(path_or_bytes):
+    def _image(path_or_bytes, is_random=False):
 
         # im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
         # The OpenCV cannot handle gif format images, it will return None.
@@ -160,7 +163,16 @@ def _image(path_or_bytes):
 
         im = np.array(pil_image)
         im = preprocessing(im, BINARYZATION, SMOOTH, BLUR).astype(np.float32)
-        im = cv2.resize(im, (RESIZE[0], RESIZE[1]))
+
+        if RESIZE[0] == -1:
+            random_ratio = random.choice([2.5, 3, 3.5, 3.2, 2.7, 2.75])
+            ratio = RESIZE[1] / size[1]
+            random_width = int(random_ratio * RESIZE[1])
+            resize_width = int(ratio * size[0])
+            resize_width = random_width if is_random else resize_width
+            im = cv2.resize(im, (resize_width, RESIZE[1]))
+        else:
+            im = cv2.resize(im, (RESIZE[0], RESIZE[1]))
         im = im.swapaxes(0, 1)
         return np.array((im[:, :, np.newaxis] if IMAGE_CHANNEL == 1 else im[:, :]) / 255.)
 
@@ -169,64 +181,190 @@ def _get_input_lens(sequences):
         lengths = np.asarray([len(_) for _ in sequences], dtype=np.int64)
         return sequences, lengths
 
-    def generate_batch_by_files(self, index=None):
-        if index:
-            image_batch = [self._image(self.image_path[i]) for i in index]
-            label_batch = [self.label_list[i] for i in index]
+    def generate_batch_by_files(self, image_index=None):
+        batch = {}
+        image_batch = []
+        label_batch = []
+
+        if image_index:
+            # if len(image_index) == TEST_BATCH_SIZE:
+            #     ii = image_index[0]
+            #     ll = self._label_list[ii]
+            #     ll = "".join([GEN_CHAR_SET[_] for _ in ll])
+            #     import shutil
+            #     shutil.copy(self.image_path[ii], "image/{}.png".format(ll))
+            for i, index in enumerate(image_index):
+                try:
+                    is_training = len(image_index) == BATCH_SIZE
+                    is_random = bool(random.getrandbits(1))
+
+                    image_array = self._image(self.image_path[index], is_random=is_training and is_random)
+                    label_array = self._label_list[index]
+                    if MULTI_SHAPE:
+                        image_shape = "{}x{}".format(image_array.shape[0], image_array.shape[1])
+                        if image_shape in batch:
+                            batch[image_shape].append((image_array, label_array))
+                        else:
+                            batch[image_shape] = [(image_array, label_array)]
+                    else:
+                        image_batch.append(image_array)
+                        label_batch.append(label_array)
+                except OSError:
+                    continue
+        # else:
+        #     for i, path in enumerate(self.image_path):
+        #         try:
+        #             if i == 0:
+        #                 import shutil
+        #                 print('----')
+        #
+        #                 shutil.copy(self.image_path[path], "{}.png".format(self._label_list[path]))
+        #             is_random = bool(random.getrandbits(1))
+        #             image_array = self._image(self.image_path[path], is_random=is_random)
+        #             label_array = self._label_list[path]
+        #             if MULTI_SHAPE:
+        #                 image_shape = "{}x{}".format(image_array.shape[0], image_array.shape[1])
+        #                 if image_shape in batch:
+        #                     batch[image_shape].append((image_array, label_array))
+        #                 else:
+        #                     batch[image_shape] = [(image_array, label_array)]
+        #             else:
+        #                 image_batch.append(image_array)
+        #                 label_batch.append(label_array)
+        #         except OSError:
+        #             continue
+
+        if MULTI_SHAPE:
+            self.label_list = sum([v for k, v in batch.items()], [])
+            self.label_list = [i[1] for i in self.label_list]
+            return self.classified_generate_batch(batch)
         else:
-            image_batch = [self._image(i) for i in self.image_path]
-            label_batch = self.label_list
-        return self._generate_batch(image_batch, label_batch)
-
-    def _generate_batch(self, image_batch, label_batch):
+            if RESIZE[0] == -1:
+                image_batch = keras.preprocessing.sequence.pad_sequences(
+                    sequences=image_batch,
+                    maxlen=None,
+                    dtype='float32',
+                    padding='post',
+                    truncating='post',
+                    value=0
+                )
+                # image_batch = self.padding(image_batch)
+            self.label_list = label_batch
+            return self.padded_generate_batch(image_batch, label_batch)
+
+    def padded_generate_batch(self, image_batch, label_batch):
+        classified_batch = {}
         batch_inputs, batch_seq_len = self._get_input_lens(np.array(image_batch))
         batch_labels = sparse_tuple_from_label(label_batch)
-        self._label_batch = batch_labels
-        return batch_inputs, batch_seq_len, batch_labels
+        classified_batch['{}x{}'.format(RESIZE[0], RESIZE[1])] = [batch_inputs, batch_seq_len, batch_labels]
+        return classified_batch
+
+    def classified_generate_batch(self, batch):
+        classified_batch = {}
+        for shape, v in batch.items():
+            batch_inputs, batch_seq_len = self._get_input_lens(np.array([i[0] for i in v]))
+            batch_labels = sparse_tuple_from_label([i[1] for i in v])
+            if shape in classified_batch:
+                classified_batch[shape].append([batch_inputs, batch_seq_len, batch_labels])
+            else:
+                classified_batch[shape] = [batch_inputs, batch_seq_len, batch_labels]
+        return classified_batch
+
+    @staticmethod
+    def padding(image_batch):
+
+        max_width = max([np.shape(_)[0] for _ in image_batch])
+        padded_image_batch = []
+        for image in image_batch:
+            output_img = np.zeros([max_width, RESIZE[1], IMAGE_CHANNEL])
+            output_img[0: np.shape(image)[0]] = image
+            padded_image_batch.append(output_img)
+        return padded_image_batch
 
     def generate_batch_by_tfrecords(self, sess):
+
         _image, _label = sess.run(self.next_element)
+        batch = {}
+        image_batch = []
+        label_batch = []
 
-        image_batch, label_batch = [], []
-        for (i1, i2) in zip(_image, _label):
+        for index, (i1, i2) in enumerate(zip(_image, _label)):
             try:
-                image_batch.append(self._image(i1))
-                label_batch.append(self._encoder(i2))
+                is_random = bool(random.getrandbits(1))
+                random_and_training = is_random and self.mode == RunMode.Trains
+                image_array = self._image(i1, is_random=random_and_training)
+                label_array = self._encoder(i2)
+                if MULTI_SHAPE:
+                    image_shape = "{}x{}".format(image_array.shape[0], image_array.shape[1])
+                    if image_shape in batch:
+                        batch[image_shape].append((image_array, label_array))
+                    else:
+                        batch[image_shape] = [(image_array, label_array)]
+                else:
+                    image_batch.append(image_array)
+                    label_batch.append(label_array)
+
             except OSError:
                 continue
-        self.label_list = label_batch
-        return self._generate_batch(image_batch, label_batch)
+
+        if MULTI_SHAPE:
+            self.label_list = sum([v for k, v in batch.items()], [])
+            self.label_list = [i[1] for i in self.label_list]
+            return self.classified_generate_batch(batch)
+        else:
+            if RESIZE[0] == -1:
+                # image_batch = self.padding(image_batch)
+                image_batch = keras.preprocessing.sequence.pad_sequences(
+                    sequences=image_batch,
+                    maxlen=None,
+                    dtype='float32',
+                    padding='post',
+                    truncating='post',
+                    value=0
+                )
+            self.label_list = label_batch
+            return self.padded_generate_batch(image_batch, label_batch)
 
 
 def accuracy_calculation(original_seq, decoded_seq, ignore_value=None):
     if ignore_value is None:
-        ignore_value = [-1, 0]
+        ignore_value = [-1]
     original_seq_len = len(original_seq)
     decoded_seq_len = len(decoded_seq)
     if original_seq_len != decoded_seq_len:
-        print(original_seq)
-        print('original lengths {} is different from the decoded_seq {}, please check again'.format(
+        tf.logging.error(original_seq)
+        tf.logging.error('original lengths {} is different from the decoded_seq {}, please check again'.format(
             original_seq_len,
             decoded_seq_len
         ))
         return 0
     count = 0
     # Here is for debugging, positioning error source use
-    # error_sample = []
+    error_sample = []
     for i, origin_label in enumerate(original_seq):
         decoded_label = [j for j in decoded_seq[i] if j not in ignore_value]
         if i < 5:
-            print(i, len(origin_label), len(decoded_label), origin_label, decoded_label)
+            tf.logging.info(
+                "{} {} {} {} {} --> {} {}".format(
+                    i,
+                    len(origin_label),
+                    len(decoded_label),
+                    origin_label,
+                    decoded_label,
+                    [GEN_CHAR_SET[_] for _ in origin_label],
+                    [GEN_CHAR_SET[_] for _ in decoded_label]
+                )
+            )
         if origin_label == decoded_label:
             count += 1
     # Training is not useful for decoding
     # Here is for debugging, positioning error source use
-    #     if origin_label != decoded_label and len(error_sample) < 500:
-    #         error_sample.append({
-    #             "origin": "".join([decode_maps()[i] for i in origin_label]),
-    #             "decode": "".join([decode_maps()[i] for i in decoded_label])
-    #         })
-    # print(error_sample)
+        if origin_label != decoded_label and len(error_sample) < 5:
+            error_sample.append({
+                "origin": "".join([GEN_CHAR_SET[_] for _ in origin_label]),
+                "decode": "".join([GEN_CHAR_SET[_] for _ in decoded_label])
+            })
+    tf.logging.error(error_sample)
     return count * 1.0 / len(original_seq)
 
 
@@ -241,4 +379,3 @@ def sparse_tuple_from_label(sequences, dtype=np.int32):
     values = np.asarray(values, dtype=dtype)
     shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1] + 1], dtype=np.int64)
     return indices, values, shape
-