Skip to content

Commit da5f20c

Browse files
committed
Added media_stream component which would be used to record/play audio_video
1 parent 398f125 commit da5f20c

32 files changed

+4821
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
if (ENABLE_SIGNALLING_ONLY)
2+
set(srcs )
3+
else()
4+
set(srcs
5+
"src/esp_video_if.c"
6+
"src/esp_h264_hw_enc.c"
7+
"src/app_camera_esp.c"
8+
"src/esp32p4_frame_grabber.c"
9+
"src/H264FrameGrabber.c"
10+
"src/OpusFrameGrabber.c"
11+
"src/OpusAudioPlayer.c"
12+
"src/resampling.c"
13+
"src/ringbuf.c"
14+
"src/video_capture_adapter.c"
15+
"src/audio_capture_adapter.c"
16+
"src/audio_player_adapter.c"
17+
"src/video_player_adapter.c"
18+
)
19+
endif()
20+
21+
idf_component_register(
22+
SRCS ${srcs}
23+
INCLUDE_DIRS "include"
24+
PRIV_INCLUDE_DIRS "src"
25+
)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
menu "ESP Video Interface"
2+
config USE_ESP_VIDEO_IF
3+
bool "Use ESP Video Interface"
4+
default y
5+
help
6+
Use ESP Video Interface for video capture.
7+
8+
config ESP_VIDEO_IF_HOR_FLIP
9+
bool "Horizontal Flip"
10+
default y
11+
depends on USE_ESP_VIDEO_IF
12+
help
13+
Flip the image horizontally.
14+
endmenu
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Media Stream
2+
3+
This component provides a media stream interface for the KVS SDK for ESP.
4+
5+
## Components
6+
7+
- `OpusAudioPlayer`: Plays a single frame of OPUS. Internally, the frame is decoded and inserted into a ring buffer. The special i2s task is used to read the ring buffer and play the audio.
8+
- `H264FrameGrabber`: Grabs one h264 encoded frame. Camera frames are captured, encoded using H264 encoder and continuously inserted into a frame_queue.
9+
- `OpusFrameGrabber`: Grabs one frame of OPUS from the frame queue. The i2s data is recorded by the module in a ring buffer, encoded using the Opus encoder and continuously inserted into a frame_queue.
10+
11+
## Usage
12+
13+
- Please refer to the header files in media_stream/include for the API descriptions.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
dependencies:
2+
espressif/esp_video:
3+
version: "*"
4+
rules:
5+
- if: "target in [esp32p4]"
6+
7+
espressif/esp_h264:
8+
public: true
9+
matches:
10+
- if: "target in [esp32s3]"
11+
version: "~0.1.1"
12+
- if: "target in [esp32p4]"
13+
version: "1.0.4"
14+
15+
espressif/esp_audio_codec:
16+
version: "*"
17+
18+
espressif/esp_codec_dev:
19+
version: "*"
20+
21+
espressif/esp32-camera:
22+
version: ^2.0.2
23+
24+
# local components
25+
esp32_p4_function_ev_board:
26+
path: ../esp32_p4_function_ev_board
27+
version: "*"
28+
rules:
29+
- if: "target in [esp32p4]"
30+
31+
esp_webrtc_utils:
32+
path: ../esp_webrtc_utils
33+
version: "*"
34+
35+
description: Media stream component
36+
version: "0.0.8"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
/**
8+
* @brief grab camera frames and encode them with h264 encoder
9+
*
10+
*/
11+
12+
#pragma once
13+
14+
#include <stdint.h>
15+
#include <esp_h264_types.h>
16+
17+
#define ENCODER_TASK (1)
18+
19+
#if ENCODER_TASK
20+
typedef struct {
21+
uint8_t *buffer; /*<! Data buffer */
22+
uint32_t len; /*<! It is buffer length in byte */
23+
esp_h264_frame_type_t type; /* Frame type */
24+
} esp_h264_out_buf_t;
25+
26+
esp_h264_out_buf_t *get_h264_encoded_frame();
27+
28+
/* Explicitly initialize camera and encoder */
29+
void camera_and_encoder_init(void);
30+
#else
31+
void get_h264_encoded_frame(uint8_t *out_buf, uint32_t *frame_len);
32+
#endif
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/**
2+
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
#include "esp_err.h"
8+
#include "inttypes.h"
9+
10+
#pragma once
11+
12+
/**
13+
* @brief Initialize OPUS audio player
14+
*
15+
* @return esp_err_t ESP_OK on success, otherwise an error code
16+
*/
17+
esp_err_t OpusAudioPlayerInit();
18+
19+
/**
20+
* @brief Deinitialize OPUS audio player
21+
*
22+
* @return esp_err_t ESP_OK on success, otherwise an error code
23+
*/
24+
esp_err_t OpusAudioPlayerDeinit();
25+
26+
/**
27+
* @brief Decode one frame of OPUS to PCM
28+
*
29+
* @param data Pointer to encoded OPUS data
30+
* @param size Size of encoded OPUS data
31+
* @return esp_err_t ESP_OK on success, otherwise an error code
32+
*/
33+
esp_err_t OpusAudioPlayerDecode(uint8_t *data, size_t size);
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
/**
8+
* @brief grab mic data encode them with opus encoder
9+
*
10+
*/
11+
12+
#pragma once
13+
14+
#include <stdint.h>
15+
16+
typedef struct {
17+
uint8_t *buffer; /*<! Data buffer */
18+
uint32_t len; /*<! It is buffer length in byte */
19+
} esp_opus_out_buf_t;
20+
21+
esp_opus_out_buf_t *get_opus_encoded_frame();
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
/**
8+
* @brief Generic audio capture interface for microphone input and encoding
9+
*/
10+
11+
#pragma once
12+
13+
#include <stdint.h>
14+
#include "esp_err.h"
15+
16+
#ifdef __cplusplus
17+
extern "C" {
18+
#endif
19+
20+
/**
21+
* @brief Audio codec type enumeration
22+
*/
23+
typedef enum {
24+
AUDIO_CODEC_OPUS,
25+
AUDIO_CODEC_PCM,
26+
AUDIO_CODEC_AAC,
27+
/* Add more codecs as needed */
28+
} audio_codec_type_t;
29+
30+
/**
31+
* @brief Audio format configuration
32+
*/
33+
typedef struct {
34+
uint32_t sample_rate; /* Sample rate in Hz (e.g., 16000, 44100) */
35+
uint8_t channels; /* Number of channels (1=mono, 2=stereo) */
36+
uint16_t bits_per_sample; /* Bits per sample (8, 16, 24, 32) */
37+
} audio_format_t;
38+
39+
/**
40+
* @brief Audio capture configuration
41+
*/
42+
typedef struct {
43+
audio_codec_type_t codec; /* Audio codec to use */
44+
audio_format_t format; /* Audio format */
45+
uint32_t bitrate; /* Target bitrate in kbps (for compressed formats) */
46+
uint16_t frame_duration_ms; /* Frame duration in milliseconds */
47+
void *codec_specific; /* Codec-specific parameters if needed */
48+
} audio_capture_config_t;
49+
50+
/**
51+
* @brief Audio frame buffer structure
52+
*/
53+
typedef struct {
54+
uint8_t *buffer; /* Data buffer */
55+
uint32_t len; /* Buffer length in bytes */
56+
uint64_t timestamp; /* Timestamp in microseconds */
57+
} audio_frame_t;
58+
59+
/**
60+
* @brief Audio capture handle
61+
*/
62+
typedef void* audio_capture_handle_t;
63+
64+
/**
65+
* @brief Initialize audio capture with specified configuration
66+
*
67+
* @param config Audio capture configuration
68+
* @param ret_handle Pointer to store the created handle
69+
* @return esp_err_t ESP_OK on success, otherwise an error code
70+
*/
71+
esp_err_t audio_capture_init(audio_capture_config_t *config, audio_capture_handle_t *ret_handle);
72+
73+
/**
74+
* @brief Start audio capture
75+
*
76+
* @param handle Audio capture handle
77+
* @return esp_err_t ESP_OK on success, otherwise an error code
78+
*/
79+
esp_err_t audio_capture_start(audio_capture_handle_t handle);
80+
81+
/**
82+
* @brief Stop audio capture
83+
*
84+
* @param handle Audio capture handle
85+
* @return esp_err_t ESP_OK on success, otherwise an error code
86+
*/
87+
esp_err_t audio_capture_stop(audio_capture_handle_t handle);
88+
89+
/**
90+
* @brief Get the next captured audio frame
91+
*
92+
* @param handle Audio capture handle
93+
* @param frame Pointer to store the audio frame
94+
* @param wait_ms Time to wait for a frame in milliseconds (0 for non-blocking)
95+
* @return esp_err_t ESP_OK on success, ESP_ERR_TIMEOUT if no frame available, otherwise an error code
96+
*/
97+
esp_err_t audio_capture_get_frame(audio_capture_handle_t handle, audio_frame_t **frame, uint32_t wait_ms);
98+
99+
/**
100+
* @brief Release an audio frame when no longer needed
101+
*
102+
* @param handle Audio capture handle
103+
* @param frame Frame to release
104+
* @return esp_err_t ESP_OK on success, otherwise an error code
105+
*/
106+
esp_err_t audio_capture_release_frame(audio_capture_handle_t handle, audio_frame_t *frame);
107+
108+
/**
109+
* @brief Deinitialize audio capture and free resources
110+
*
111+
* @param handle Audio capture handle
112+
* @return esp_err_t ESP_OK on success, otherwise an error code
113+
*/
114+
esp_err_t audio_capture_deinit(audio_capture_handle_t handle);
115+
116+
#ifdef __cplusplus
117+
}
118+
#endif

0 commit comments

Comments
 (0)