Skip to content

Commit fbcd0cf

Browse files
committed
Updated segmenter
1 parent 886d60a commit fbcd0cf

File tree

2 files changed

+66
-12
lines changed

2 files changed

+66
-12
lines changed

pkg/segmenter/segmenter.go

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@ type SegmentFunc func(time.Duration, []float32) error
3131
//////////////////////////////////////////////////////////////////////////////
3232
// LIFECYCLE
3333

34-
// Create a new segmenter for a specific "dur" duration of samples with
35-
// a reader r. If dur is zero then no segmenting is performed, the whole
34+
// Create a new segmenter with a reader r which segments raw audio of 'dur'
35+
// length. If dur is zero then no segmenting is performed, the whole
3636
// audio file is read, which could cause some memory issues.
37+
//
3738
// The sample rate is the number of samples per second.
39+
//
3840
// At the moment, the audio format is auto-detected, but there should be
3941
// a way to specify the audio format.
40-
func New(r io.Reader, dur time.Duration, sample_rate int) (*Segmenter, error) {
42+
func NewReader(r io.Reader, dur time.Duration, sample_rate int) (*Segmenter, error) {
4143
segmenter := new(Segmenter)
4244

4345
// Check arguments
@@ -136,12 +138,3 @@ func (s *Segmenter) Decode(ctx context.Context, fn SegmentFunc) error {
136138
// Return success
137139
return nil
138140
}
139-
140-
// Return the file duration from the file or timestamp
141-
func (s *Segmenter) Duration() time.Duration {
142-
if s.reader != nil {
143-
return s.reader.Duration()
144-
} else {
145-
return s.ts + time.Duration(len(s.buf))*time.Second/time.Duration(s.sample_rate)
146-
}
147-
}

pkg/segmenter/silence.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package segmenter
2+
3+
import (
4+
"math"
5+
"time"
6+
// Packages
7+
)
8+
9+
////////////////////////////////////////////////////////////////////////////////
10+
// TYPES
11+
12+
// silence is a silence detector and audio booster for raw samples
13+
// typical values are gain=20, threshold=0.003, timeout=2s
14+
type silence struct {
15+
Gain float64 // gain in decibels
16+
Threshold float64 // threshold for silence
17+
Timeout time.Duration // duration of silence before stopping recording
18+
19+
// When we last started recording
20+
t time.Time
21+
r bool
22+
}
23+
24+
////////////////////////////////////////////////////////////////////////////////
25+
// PUBLIC METHODS
26+
27+
// Increase gain and compute energy of a frame of audio data, return true
28+
// if the frame of data should be recorded, false if it should be ignored
29+
func (s *silence) Process(data []float32) bool {
30+
energy := process(data, float32(math.Pow(10, s.Gain/20.0)))
31+
32+
// Compute the gain
33+
if energy > s.Threshold {
34+
if s.t.IsZero() {
35+
// Transition from silence to recording
36+
s.r = true
37+
}
38+
s.t = time.Now()
39+
} else if !s.t.IsZero() {
40+
if time.Since(s.t) > s.Timeout {
41+
// Transition from recording to silence
42+
s.t = time.Time{}
43+
s.r = false
44+
}
45+
}
46+
return s.r
47+
}
48+
49+
////////////////////////////////////////////////////////////////////////////////
50+
// PRIVATE METHODS
51+
52+
// Increase gain and compute energy of a frame of audio data, return the
53+
// energy of the frame of data
54+
func process(data []float32, gain float32) float64 {
55+
energy := float64(0)
56+
for i := 0; i < len(data); i++ {
57+
data[i] *= gain
58+
energy += float64(data[i]) * float64(data[i])
59+
}
60+
return energy / math.Sqrt(float64(len(data)))
61+
}

0 commit comments

Comments
 (0)