Skip to content

Commit 106cbd3

Browse files
committed
Added audio resampler first pass
1 parent 44508c3 commit 106cbd3

File tree

9 files changed

+204
-25
lines changed

9 files changed

+204
-25
lines changed

decoder.go

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,12 @@ func (d *demuxer) newDecoder(stream *ff.AVStream, dest Parameters) (*decoder, er
9797
} else if !equals {
9898
switch src.CodecType() {
9999
case ff.AVMEDIA_TYPE_AUDIO:
100-
fmt.Println("TODO: set up resampler", dest)
100+
if resampler, frame, err := newResampler(dest, src); err != nil {
101+
return nil, err
102+
} else {
103+
decoder.resampler = resampler
104+
decoder.reframe = frame
105+
}
101106
case ff.AVMEDIA_TYPE_VIDEO:
102107
if rescaler, frame, err := newResizer(dest, src); err != nil {
103108
return nil, err
@@ -155,10 +160,61 @@ func newResizer(dest Parameters, src *ff.AVCodecParameters) (*ff.SWSContext, *ff
155160
ff.SWScale_free_context(ctx)
156161
return nil, nil, errors.New("failed to allocate frame")
157162
} else {
163+
// Set frame parameters
164+
frame.SetPixFmt(dest_pixel_format)
165+
frame.SetWidth(dest.Width())
166+
frame.SetHeight(dest.Height())
167+
168+
// Return success
158169
return ctx, frame, nil
159170
}
160171
}
161172

173+
func newResampler(dest Parameters, src *ff.AVCodecParameters) (*ff.SWRContext, *ff.AVFrame, error) {
174+
// Get native sample format and channel layout
175+
var dest_channel_layout ff.AVChannelLayout
176+
dest_sample_format := ff.AVUtil_get_sample_fmt(dest.SampleFormat())
177+
if err := ff.AVUtil_channel_layout_from_string(&dest_channel_layout, dest.ChannelLayout()); err != nil {
178+
return nil, nil, fmt.Errorf("failed to get channel layout: %w", err)
179+
}
180+
181+
// Create a new resampler
182+
ctx := ff.SWResample_alloc()
183+
if ctx == nil {
184+
return nil, nil, errors.New("failed to allocate resampler")
185+
}
186+
187+
// Set options to covert from the codec frame to the decoder frame
188+
if err := ff.SWResample_set_opts(ctx,
189+
dest_channel_layout, dest_sample_format, dest.Samplerate(), // destination
190+
src.ChannelLayout(), src.SampleFormat(), src.Samplerate(), // source
191+
); err != nil {
192+
ff.SWResample_free(ctx)
193+
return nil, nil, fmt.Errorf("SWResample_set_opts: %w", err)
194+
}
195+
196+
// Initialize the resampling context
197+
if err := ff.SWResample_init(ctx); err != nil {
198+
ff.SWResample_free(ctx)
199+
return nil, nil, fmt.Errorf("SWResample_init: %w", err)
200+
}
201+
202+
// Create a new frame for the resampled audio
203+
frame := ff.AVUtil_frame_alloc()
204+
if frame == nil {
205+
ff.SWResample_free(ctx)
206+
return nil, nil, errors.New("failed to allocate frame")
207+
}
208+
209+
// Set frame parameters
210+
frame.SetSampleRate(dest.Samplerate())
211+
frame.SetSampleFormat(dest_sample_format)
212+
frame.SetChannelLayout(dest_channel_layout)
213+
214+
// Return success
215+
return ctx, frame, nil
216+
}
217+
162218
func (d *demuxer) close() error {
163219
var result error
164220

@@ -327,7 +383,14 @@ func (d *decoder) decode(packet *ff.AVPacket, demuxfn DecoderFunc, framefn Frame
327383
func (d *decoder) re(src *ff.AVFrame) (*ff.AVFrame, error) {
328384
switch d.codec.Codec().Type() {
329385
case ff.AVMEDIA_TYPE_AUDIO:
330-
fmt.Println("TODO: resample audio", src)
386+
if d.resampler != nil && src != nil {
387+
// Resample the audio
388+
if err := resample(d.resampler, d.reframe, src); err != nil {
389+
return nil, err
390+
} else {
391+
return d.reframe, nil
392+
}
393+
}
331394
case ff.AVMEDIA_TYPE_VIDEO:
332395
if d.rescaler != nil && src != nil {
333396
// Rescale the video
@@ -339,10 +402,6 @@ func (d *decoder) re(src *ff.AVFrame) (*ff.AVFrame, error) {
339402
}
340403
}
341404

342-
// if err := decoder.rescale(decoder.frame, src); err != nil {
343-
// return nil, err
344-
// }
345-
346405
// NO-OP - just return the source frame
347406
return src, nil
348407
}
@@ -359,6 +418,27 @@ func rescale(ctx *ff.SWSContext, dest, src *ff.AVFrame) error {
359418
return nil
360419
}
361420

421+
func resample(ctx *ff.SWRContext, dest, src *ff.AVFrame) error {
422+
// Copy properties from source
423+
//if err := ff.AVUtil_frame_copy_props(dest, src); err != nil {
424+
// return fmt.Errorf("failed to copy props: %w", err)
425+
//}
426+
427+
dest_samples, err := ff.SWResample_get_out_samples(ctx, src.NumSamples())
428+
if err != nil {
429+
return fmt.Errorf("SWResample_get_out_samples: %w", err)
430+
}
431+
dest.SetNumSamples(dest_samples)
432+
fmt.Println("dest frame=", dest)
433+
434+
// Perform resampling
435+
if err := ff.SWResample_convert_frame(ctx, src, dest); err != nil {
436+
return fmt.Errorf("SWResample_convert_frame: %w", err)
437+
}
438+
439+
return nil
440+
}
441+
362442
// Return an error if the parameters don't match the stream type (AUDIO, VIDEO)
363443
// Return true if the codec parameters are compatible with the stream
364444
func equalsStream(dest Parameters, src *ff.AVCodecParameters) (bool, error) {

decoder_test.go

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ func Test_decoder_004(t *testing.T) {
168168

169169
// Open the file
170170
manager := NewManager()
171-
media, err := manager.Open("./etc/test/sample.mp4", nil)
171+
media, err := manager.Open("./etc/test/audio_22050_1ch_5m35.s16le.sw", nil)
172172
if !assert.NoError(err) {
173173
t.SkipNow()
174174
}
@@ -188,8 +188,81 @@ func Test_decoder_004(t *testing.T) {
188188
}
189189

190190
// This is the function which processes the audio frames
191+
tmp, err := os.MkdirTemp("", "media_test_")
192+
if !assert.NoError(err) {
193+
t.SkipNow()
194+
}
195+
filename := filepath.Join(tmp, "audio.sw")
196+
f, err := os.Create(filename)
197+
if !assert.NoError(err) {
198+
t.SkipNow()
199+
}
200+
defer f.Close()
201+
202+
bytes_written := 0
203+
framefn := func(frame Frame) error {
204+
n, err := f.Write(frame.Bytes(0))
205+
if err != nil {
206+
return err
207+
} else {
208+
bytes_written += n
209+
t.Logf("Written %d bytes to %v", bytes_written, filename)
210+
}
211+
return nil
212+
}
213+
214+
// Finally, this is where we actually decode frames from the stream
215+
assert.NoError(decoder.Decode(context.Background(), framefn))
216+
}
217+
218+
func Test_decoder_005(t *testing.T) {
219+
// Decode audio frames
220+
assert := assert.New(t)
221+
222+
// Open the file
223+
manager := NewManager()
224+
media, err := manager.Open("./test.mp3", nil)
225+
if !assert.NoError(err) {
226+
t.SkipNow()
227+
}
228+
defer media.Close()
229+
230+
// Create a decoder to decompress the audio
231+
decoder, err := media.Decoder(func(stream Stream) (Parameters, error) {
232+
// Audio - downsample to stereo, s16
233+
if stream.Type() == AUDIO {
234+
return manager.AudioParameters("mono", "s16", 44100)
235+
}
236+
// Ignore other streams
237+
return nil, nil
238+
})
239+
if !assert.NoError(err) {
240+
t.SkipNow()
241+
}
242+
243+
// This is the function which processes the audio frames
244+
tmp, err := os.MkdirTemp("", "media_test_")
245+
if !assert.NoError(err) {
246+
t.SkipNow()
247+
}
248+
249+
// TODO: Endian might be le or be depending on the native endianness
250+
filename := filepath.Join(tmp, "audio.s16le.sw")
251+
f, err := os.Create(filename)
252+
if !assert.NoError(err) {
253+
t.SkipNow()
254+
}
255+
defer f.Close()
256+
257+
bytes_written := 0
191258
framefn := func(frame Frame) error {
192-
t.Log(frame)
259+
n, err := f.Write(frame.Bytes(0))
260+
if err != nil {
261+
return err
262+
} else {
263+
bytes_written += n
264+
t.Logf("Written %d bytes to %v", bytes_written, filename)
265+
}
193266
return nil
194267
}
195268

interfaces.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ type Frame interface {
249249

250250
// Return a frame as an image, which supports the following
251251
// pixel formats: AV_PIX_FMT_GRAY8, AV_PIX_FMT_RGBA,
252-
// AV_PIX_FMT_RGB24, AV_PIX_FMT_YUV420P (4:2:0)
252+
// AV_PIX_FMT_RGB24, AV_PIX_FMT_YUV420P
253253
Image() (image.Image, error)
254254
}
255255

manager.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ import (
77
// Package imports
88
version "github.com/mutablelogic/go-media/pkg/version"
99
ff "github.com/mutablelogic/go-media/sys/ffmpeg61"
10-
11-
// Namespace imports
12-
. "github.com/djthorpe/go-errors"
1310
)
1411

1512
////////////////////////////////////////////////////////////////////////////

parameters.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,26 @@ func newAudioParametersEx(channels string, samplefmt string, samplerate int) (*p
8787
return par, nil
8888
}
8989

90+
func newCodecAudioParameters(codec *ff.AVCodecParameters) *par {
91+
par := new(par)
92+
par.t = AUDIO
93+
par.audiopar.Ch = codec.ChannelLayout()
94+
par.audiopar.SampleFormat = codec.SampleFormat()
95+
par.audiopar.Samplerate = codec.Samplerate()
96+
par.planepar.NumPlanes = par.NumPlanes()
97+
return par
98+
}
99+
100+
func newCodecVideoParameters(codec *ff.AVCodecParameters) *par {
101+
par := new(par)
102+
par.t = VIDEO
103+
par.videopar.Width = codec.Width()
104+
par.videopar.Height = codec.Height()
105+
par.videopar.PixelFormat = codec.PixelFormat()
106+
par.planepar.NumPlanes = par.NumPlanes()
107+
return par
108+
}
109+
90110
// Create new parameters for video from a width, height and pixel format
91111
func newVideoParametersEx(width int, height int, pixelfmt string) (*par, error) {
92112
par := new(par)

stream.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ type writerstream struct {
1717
}
1818

1919
var _ Stream = (*stream)(nil)
20-
var _ Stream = (*writerstream)(nil)
20+
21+
//var _ Stream = (*writerstream)(nil)
2122

2223
////////////////////////////////////////////////////////////////////////////////
2324
// LIFECYCLE
@@ -27,6 +28,7 @@ func newStream(ctx *ff.AVStream) *stream {
2728
return &stream{ctx}
2829
}
2930

31+
/*
3032
// Stream wrapper for encoding
3133
func newWriterStream(ctx *ff.AVFormatContext, param Parameters) (*writerstream, error) {
3234
// Parameters - Codec
@@ -44,7 +46,7 @@ func newWriterStream(ctx *ff.AVFormatContext, param Parameters) (*writerstream,
4446
4547
return nil, ErrNotImplemented
4648
}
47-
49+
*/
4850
////////////////////////////////////////////////////////////////////////////////
4951
// PUBLIC METHODS
5052

@@ -61,6 +63,13 @@ func (stream *stream) Type() MediaType {
6163
}
6264

6365
func (stream *stream) Parameters() Parameters {
64-
// TODO
65-
return new(par)
66+
switch stream.Type() {
67+
case AUDIO:
68+
return newCodecAudioParameters(stream.CodecPar())
69+
case VIDEO:
70+
return newCodecVideoParameters(stream.CodecPar())
71+
}
72+
73+
// Other types not yet supported
74+
return nil
6675
}

test.mp3

4 MB
Binary file not shown.

writer.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package media
22

33
import (
44
"errors"
5-
"fmt"
65
"io"
76

87
// Packages
@@ -55,15 +54,16 @@ func createMedia(url string, format Format, metadata []Metadata, params ...Param
5554
}
5655

5756
// Add streams
58-
for _, param := range params {
59-
stream, err := newWriterStream(ctx, param)
60-
if err != nil {
61-
return nil, errors.Join(err, writer.Close())
62-
} else {
63-
fmt.Println("TODO: STREAM", stream)
57+
/*
58+
for _, param := range params {
59+
stream, err := newWriterStream(ctx, param)
60+
if err != nil {
61+
return nil, errors.Join(err, writer.Close())
62+
} else {
63+
fmt.Println("TODO: STREAM", stream)
64+
}
6465
}
65-
}
66-
66+
*/
6767
// Open the output file, if needed
6868
if !ctx.Flags().Is(ff.AVFMT_NOFILE) {
6969
w, err := ff.AVFormat_avio_open(url, ff.AVIO_FLAG_WRITE)

0 commit comments

Comments
 (0)