Skip to content

Commit e257af3

Browse files
committedApr 16, 2025
fix: encoder args #225 #224
- 修复 ffmpeg 参数覆盖问题
1 parent c080af7 commit e257af3

File tree

2 files changed

+21
-12
lines changed

2 files changed

+21
-12
lines changed
 

‎modules/core/handler/AudioHandler.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def remove_wav_bytes_header(wav_bytes: bytes):
3232
return wav_file.get_body_data()
3333

3434

35-
def read_np_to_wav(audio_data: np.ndarray) -> bytes:
35+
def covert_to_s16le(audio_data: np.ndarray) -> bytes:
3636
audio_data: np.ndarray = audio_data / np.max(np.abs(audio_data))
3737
audio_data = (audio_data * 32767).astype(np.int16)
3838
return audio_data.tobytes()
@@ -104,8 +104,10 @@ async def enqueue_to_stream(self) -> AsyncGenerator[bytes, None]:
104104

105105
chunk_data = bytes()
106106
async for sample_rate, audio_data in self.enqueue_stream():
107-
encoder.set_header(sample_rate=sample_rate)
108-
audio_bytes = read_np_to_wav(audio_data=audio_data)
107+
encoder.set_header(
108+
sample_rate=sample_rate, sample_width=audio_data.dtype.itemsize
109+
)
110+
audio_bytes = covert_to_s16le(audio_data=audio_data)
109111

110112
logger.debug(f"write audio_bytes len: {len(audio_bytes)}")
111113
encoder.write(audio_bytes)
@@ -151,8 +153,10 @@ async def enqueue_to_stream_join(self) -> AsyncGenerator[bytes, None]:
151153
encoder = self.get_encoder()
152154
chunk_data = bytes()
153155
async for sample_rate, audio_data in self.enqueue_stream():
154-
encoder.set_header(sample_rate=sample_rate)
155-
audio_bytes = read_np_to_wav(audio_data=audio_data)
156+
encoder.set_header(
157+
sample_rate=sample_rate, sample_width=audio_data.dtype.itemsize
158+
)
159+
audio_bytes = covert_to_s16le(audio_data=audio_data)
156160
encoder.write(audio_bytes)
157161

158162
encoder.close()
@@ -172,8 +176,10 @@ async def enqueue_to_bytes(self) -> bytes:
172176
async with cancel_on_disconnect(self.current_request):
173177
try:
174178
sample_rate, audio_data = await self.enqueue()
175-
audio_bytes = read_np_to_wav(audio_data=audio_data)
176-
encoder.set_header(sample_rate=sample_rate)
179+
audio_bytes = covert_to_s16le(audio_data=audio_data)
180+
encoder.set_header(
181+
sample_rate=sample_rate, sample_width=audio_data.dtype.itemsize
182+
)
177183
encoder.write(audio_bytes)
178184
encoder.close()
179185
buffer = encoder.read_all()

‎modules/core/handler/encoder/StreamEncoder.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,15 @@ def open(
6565
"-re",
6666
"-threads",
6767
str(os.cpu_count() or 4),
68+
# NOTE: 指定输入格式为 16 位 PCM
69+
# NOTE: 其实文件头里面有写,但是没有文件名,所以需要手动指定
6870
"-f",
69-
"s16le", # 指定输入格式为 16 位 PCM
70-
"-ar",
71-
str(self.sample_rate), # 输入采样率
72-
"-ac",
73-
"1", # 输入单声道
71+
"s16le",
72+
# NOTE: 不要在这里传递 ar/ac ,我们写在wav文件头上,这里会覆盖掉文件头读取的数据
73+
# "-ar",
74+
# str(self.sample_rate), # 输入采样率
75+
# "-ac",
76+
# "1", # 输入单声道
7477
"-i",
7578
"pipe:0",
7679
"-f",

0 commit comments

Comments
 (0)