@@ -3,26 +3,50 @@ package segmenter
3
3
import (
4
4
"context"
5
5
"errors"
6
- "fmt"
7
6
"io"
7
+ "time"
8
8
9
9
// Packages
10
10
media "github.com/mutablelogic/go-media"
11
11
ffmpeg "github.com/mutablelogic/go-media/pkg/ffmpeg"
12
+
13
+ // Namespace imports
14
+ . "github.com/djthorpe/go-errors"
12
15
)
13
16
14
17
type Segmenter struct {
15
- reader * ffmpeg.Reader
18
+ ts time.Duration
19
+ sample_rate int
20
+ n int
21
+ buf []float32
22
+ reader * ffmpeg.Reader
16
23
}
17
24
25
+ // SegmentFunc is a callback function which is called when a segment is ready
26
+ // to be processed. The first argument is the timestamp of the segment.
27
+ type SegmentFunc func (time.Duration , []float32 )
28
+
18
29
//////////////////////////////////////////////////////////////////////////////
19
30
// LIFECYCLE
20
31
21
32
// Create a new segmenter for "NumSamples" with a reader r
22
33
// If NumSamples is zero then no segmenting is performed
23
- func NewSegmenter (r io.Reader ) (* Segmenter , error ) {
34
+ func NewSegmenter (r io.Reader , dur time. Duration , sample_rate int ) (* Segmenter , error ) {
24
35
segmenter := new (Segmenter )
25
36
37
+ // Check arguments
38
+ if dur < 0 || sample_rate <= 0 {
39
+ return nil , ErrBadParameter .With ("invalid duration or sample rate arguments" )
40
+ } else {
41
+ segmenter .sample_rate = sample_rate
42
+ }
43
+
44
+ // Sample buffer is duration * sample rate
45
+ if dur > 0 {
46
+ segmenter .n = int (dur .Seconds ()) * sample_rate
47
+ segmenter .buf = make ([]float32 , 0 , int (dur .Seconds ())* sample_rate )
48
+ }
49
+
26
50
// Open the file
27
51
media , err := ffmpeg .NewReader (r )
28
52
if err != nil {
@@ -42,6 +66,7 @@ func (s *Segmenter) Close() error {
42
66
result = errors .Join (result , s .reader .Close ())
43
67
}
44
68
s .reader = nil
69
+ s .buf = nil
45
70
46
71
// Return any errors
47
72
return result
@@ -53,19 +78,51 @@ func (s *Segmenter) Close() error {
53
78
// TODO: segments are output through a callback, with the samples and a timestamp
54
79
// TODO: we could do some basic silence and voice detection to segment to ensure
55
80
// we don't overtax the CPU/GPU with silence and non-speech
56
- // TODO: We have hard-coded the sample format, sample rate and number of channels
57
- // here. We should make this configurable
58
- func (s * Segmenter ) Decode (ctx context.Context ) error {
81
+ func (s * Segmenter ) Decode (ctx context.Context , fn SegmentFunc ) error {
82
+ // Check input parameters
83
+ if fn == nil {
84
+ return ErrBadParameter .With ("SegmentFunc is nil" )
85
+ }
86
+
87
+ // Map function chooses the best audio stream
59
88
mapFunc := func (stream int , params * ffmpeg.Par ) (* ffmpeg.Par , error ) {
60
89
if stream == s .reader .BestStream (media .AUDIO ) {
61
- return ffmpeg .NewAudioPar ("flt" , "mono" , 16000 )
90
+ return ffmpeg .NewAudioPar ("flt" , "mono" , s . sample_rate )
62
91
}
63
92
// Ignore no-audio streams
64
93
return nil , nil
65
94
}
66
- return s .reader .Decode (ctx , mapFunc , func (stream int , frame * ffmpeg.Frame ) error {
67
- // Append float32 samples to buffer
68
- fmt .Println ("TODO: Implement Decode" , frame )
95
+
96
+ // Decode samples and segment
97
+ if err := s .reader .Decode (ctx , mapFunc , func (stream int , frame * ffmpeg.Frame ) error {
98
+ // We get null frames sometimes, ignore them
99
+ if frame == nil {
100
+ return nil
101
+ }
102
+
103
+ // Append float32 samples from plane 0 to buffer
104
+ s .buf = append (s .buf , frame .Float32 (0 )... )
105
+
106
+ // n != 0 and len(buf) >= n we have a segment to process
107
+ if s .n != 0 && len (s .buf ) >= s .n {
108
+ fn (s .ts , s .buf )
109
+ // Clear the buffer
110
+ s .buf = s .buf [:0 ]
111
+ // Increment the timestamp
112
+ s .ts += time .Duration (float64 (s .n )/ float64 (s .sample_rate )) * time .Second
113
+ }
114
+
115
+ // Continue processing
69
116
return nil
70
- })
117
+ }); err != nil {
118
+ return err
119
+ }
120
+
121
+ // Output any remaining samples
122
+ if len (s .buf ) > 0 {
123
+ fn (s .ts , s .buf )
124
+ }
125
+
126
+ // Return success
127
+ return nil
71
128
}
0 commit comments