@@ -10,34 +10,33 @@ The test example can be seen in ``test/test_package.py`` as below:
10
10
11
11
.. code-block :: python
12
12
13
- import scipy.io.wavfile as wav
14
- import numpy as np
15
- import speechpy
16
- import os
17
-
18
13
file_name = os.path.join(os.path.dirname(os.path.abspath(__file__ )),' Alesis-Sanctuary-QCard-AcoustcBas-C2.wav' )
19
14
fs, signal = wav.read(file_name)
20
15
signal = signal[:,0 ]
21
16
17
+ # Example of staching frames
18
+ frames = speechpy.processing.stack_frames(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 , Filter = lambda x : np.ones((x,)),
19
+ zero_padding = True )
20
+
21
+ # Example of extracting power spectrum
22
+ power_spectrum = speechpy.processing.power_spectrum(frames, fft_points = 512 )
23
+ print (' power spectrum shape=' , power_spectrum.shape)
24
+
22
25
# ############ Extract MFCC features #############
23
- mfcc = speechpy.mfcc(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
24
- num_filters = 40 , fft_length = 512 , low_frequency = 0 , high_frequency = None )
25
- mfcc_cmvn = speechpy.cmvnw(mfcc,win_size = 301 ,variance_normalization = True )
26
+ mfcc = speechpy.feature. mfcc(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
27
+ num_filters = 40 , fft_length = 512 , low_frequency = 0 , high_frequency = None )
28
+ mfcc_cmvn = speechpy.processing. cmvnw(mfcc,win_size = 301 ,variance_normalization = True )
26
29
print (' mfcc(mean + variance normalized) feature shape=' , mfcc_cmvn.shape)
27
30
28
- mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc)
31
+ mfcc_feature_cube = speechpy.feature. extract_derivative_feature(mfcc)
29
32
print (' mfcc feature cube shape=' , mfcc_feature_cube.shape)
30
33
31
34
# ############ Extract logenergy features #############
32
- logenergy = speechpy.lmfe(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
33
- num_filters = 40 , fft_length = 512 , low_frequency = 0 , high_frequency = None )
34
- logenergy_feature_cube = speechpy.extract_derivative_feature(logenergy)
35
+ logenergy = speechpy.feature. lmfe(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
36
+ num_filters = 40 , fft_length = 512 , low_frequency = 0 , high_frequency = None )
37
+ logenergy_feature_cube = speechpy.feature. extract_derivative_feature(logenergy)
35
38
print (' logenergy features=' , logenergy.shape)
36
39
37
- # Example of staching frames
38
- signal = speechpy.stack_frames(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 , Filter = lambda x : np.ones((x,)),
39
- zero_padding = True )
40
-
41
40
-----------
42
41
Test Local
43
42
-----------
@@ -47,39 +46,33 @@ The local test example can be found in ``test/test_package.py`` as follows:
47
46
48
47
.. code-block :: python
49
48
50
- import scipy.io.wavfile as wav
51
- import numpy as np
52
- import os
53
- import sys
54
- lib_path = os.path.abspath(os.path.join(' ..' ))
55
- print (lib_path)
56
- sys.path.append(lib_path)
57
- import speechpy
58
- import os
59
-
60
49
file_name = os.path.join(os.path.dirname(os.path.abspath(__file__ )),' Alesis-Sanctuary-QCard-AcoustcBas-C2.wav' )
61
50
fs, signal = wav.read(file_name)
62
51
signal = signal[:,0 ]
63
52
53
+ # Example of staching frames
54
+ frames = speechpy.processing.stack_frames(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 , Filter = lambda x : np.ones((x,)),
55
+ zero_padding = True )
56
+
57
+ # Example of extracting power spectrum
58
+ power_spectrum = speechpy.processing.power_spectrum(frames, fft_points = 512 )
59
+ print (' power spectrum shape=' , power_spectrum.shape)
60
+
64
61
# ############ Extract MFCC features #############
65
- mfcc = speechpy.mfcc(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
62
+ mfcc = speechpy.feature. mfcc(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
66
63
num_filters = 40 , fft_length = 512 , low_frequency = 0 , high_frequency = None )
67
- mfcc_cmvn = speechpy.cmvnw(mfcc,win_size = 301 ,variance_normalization = True )
64
+ mfcc_cmvn = speechpy.processing. cmvnw(mfcc,win_size = 301 ,variance_normalization = True )
68
65
print (' mfcc(mean + variance normalized) feature shape=' , mfcc_cmvn.shape)
69
66
70
- mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc)
67
+ mfcc_feature_cube = speechpy.feature. extract_derivative_feature(mfcc)
71
68
print (' mfcc feature cube shape=' , mfcc_feature_cube.shape)
72
69
73
70
# ############ Extract logenergy features #############
74
- logenergy = speechpy.lmfe(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
71
+ logenergy = speechpy.feature. lmfe(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 ,
75
72
num_filters = 40 , fft_length = 512 , low_frequency = 0 , high_frequency = None )
76
- logenergy_feature_cube = speechpy.extract_derivative_feature(logenergy)
73
+ logenergy_feature_cube = speechpy.feature. extract_derivative_feature(logenergy)
77
74
print (' logenergy features=' , logenergy.shape)
78
75
79
- # Example of staching frames
80
- signal = speechpy.stack_frames(signal, sampling_frequency = fs, frame_length = 0.020 , frame_stride = 0.01 , Filter = lambda x : np.ones((x,)),
81
- zero_padding = True )
82
-
83
76
84
77
85
78
For ectracting the feature at first, the signal samples will be stacked into frames. The features are computed for each frame in the stacked frames collection.
0 commit comments