|
1 |
| -const path = require("path"); |
2 |
| -const { whisper } = require(path.join( |
3 |
| - __dirname, |
4 |
| - "../../../build/Release/addon.node" |
5 |
| -)); |
6 |
| -const { promisify } = require("util"); |
| 1 | +const { join } = require('path'); |
| 2 | +const { whisper } = require('../../../build/Release/addon.node'); |
| 3 | +const { promisify } = require('util'); |
7 | 4 |
|
8 | 5 | const whisperAsync = promisify(whisper);
|
9 | 6 |
|
10 |
| -const whisperParamsMock = { |
11 |
| - language: "en", |
12 |
| - model: path.join(__dirname, "../../../models/ggml-base.en.bin"), |
13 |
| - fname_inp: path.join(__dirname, "../../../samples/jfk.wav"), |
| 7 | +const commonParams = { |
| 8 | + language: 'en', |
| 9 | + model: join(__dirname, '../../../models/ggml-base.en.bin'), |
| 10 | + fname_inp: join(__dirname, '../../../samples/jfk.wav'), |
14 | 11 | use_gpu: true,
|
15 | 12 | flash_attn: false,
|
16 | 13 | no_prints: true,
|
17 |
| - comma_in_time: false, |
18 |
| - translate: true, |
19 | 14 | no_timestamps: false,
|
20 | 15 | detect_language: false,
|
21 | 16 | audio_ctx: 0,
|
22 |
| - max_len: 0, |
23 |
| - prompt: "", |
24 |
| - print_progress: false, |
25 |
| - progress_callback: (progress) => { |
26 |
| - console.log(`Progress: ${progress}`); |
27 |
| - }, |
28 |
| - max_context: -1 |
| 17 | + max_len: 0 |
29 | 18 | };
|
30 | 19 |
|
31 |
| -describe("Run whisper.node", () => { |
32 |
| - test("it should receive a non-empty value", async () => { |
33 |
| - let result = await whisperAsync(whisperParamsMock); |
34 |
| - console.log(result); |
| 20 | +describe('Whisper.cpp Node.js addon with VAD support', () => { |
| 21 | + test('Basic whisper transcription without VAD', async () => { |
| 22 | + const params = { |
| 23 | + ...commonParams, |
| 24 | + vad: false |
| 25 | + }; |
35 | 26 |
|
36 |
| - expect(result['transcription'].length).toBeGreaterThan(0); |
37 |
| - }, 10000); |
| 27 | + const result = await whisperAsync(params); |
| 28 | + |
| 29 | + expect(typeof result).toBe('object'); |
| 30 | + expect(Array.isArray(result.transcription)).toBe(true); |
| 31 | + expect(result.transcription.length).toBeGreaterThan(0); |
| 32 | + |
| 33 | + // Check that we got some transcription text |
| 34 | + const text = result.transcription.map(segment => segment[2]).join(' '); |
| 35 | + expect(text.length).toBeGreaterThan(0); |
| 36 | + expect(text.toLowerCase()).toContain('ask not'); |
| 37 | + }, 30000); |
| 38 | + |
| 39 | + test('VAD parameters validation', async () => { |
| 40 | + // Test with invalid VAD model - should return empty transcription |
| 41 | + const invalidParams = { |
| 42 | + ...commonParams, |
| 43 | + vad: true, |
| 44 | + vad_model: 'non-existent-model.bin', |
| 45 | + vad_threshold: 0.5 |
| 46 | + }; |
| 47 | + |
| 48 | + // This should handle the error gracefully and return empty transcription |
| 49 | + const result = await whisperAsync(invalidParams); |
| 50 | + expect(typeof result).toBe('object'); |
| 51 | + expect(Array.isArray(result.transcription)).toBe(true); |
| 52 | + // When VAD model doesn't exist, it should return empty transcription |
| 53 | + expect(result.transcription.length).toBe(0); |
| 54 | + }, 10000); |
| 55 | + |
| 56 | + test('VAD parameter parsing', async () => { |
| 57 | + // Test that VAD parameters are properly parsed (even if VAD model doesn't exist) |
| 58 | + const vadParams = { |
| 59 | + ...commonParams, |
| 60 | + vad: false, // Disabled so no model required |
| 61 | + vad_threshold: 0.7, |
| 62 | + vad_min_speech_duration_ms: 300, |
| 63 | + vad_min_silence_duration_ms: 150, |
| 64 | + vad_max_speech_duration_s: 45.0, |
| 65 | + vad_speech_pad_ms: 50, |
| 66 | + vad_samples_overlap: 0.15 |
| 67 | + }; |
| 68 | + |
| 69 | + const result = await whisperAsync(vadParams); |
| 70 | + |
| 71 | + expect(typeof result).toBe('object'); |
| 72 | + expect(Array.isArray(result.transcription)).toBe(true); |
| 73 | + }, 30000); |
| 74 | + |
| 75 | + test('Progress callback with VAD disabled', async () => { |
| 76 | + let progressCalled = false; |
| 77 | + let lastProgress = 0; |
| 78 | + |
| 79 | + const params = { |
| 80 | + ...commonParams, |
| 81 | + vad: false, |
| 82 | + progress_callback: (progress) => { |
| 83 | + progressCalled = true; |
| 84 | + lastProgress = progress; |
| 85 | + expect(progress).toBeGreaterThanOrEqual(0); |
| 86 | + expect(progress).toBeLessThanOrEqual(100); |
| 87 | + } |
| 88 | + }; |
| 89 | + |
| 90 | + const result = await whisperAsync(params); |
| 91 | + |
| 92 | + expect(progressCalled).toBe(true); |
| 93 | + expect(lastProgress).toBe(100); |
| 94 | + expect(typeof result).toBe('object'); |
| 95 | + }, 30000); |
| 96 | + |
| 97 | + test('Language detection without VAD', async () => { |
| 98 | + const params = { |
| 99 | + ...commonParams, |
| 100 | + vad: false, |
| 101 | + detect_language: true, |
| 102 | + language: 'auto' |
| 103 | + }; |
| 104 | + |
| 105 | + const result = await whisperAsync(params); |
| 106 | + |
| 107 | + expect(typeof result).toBe('object'); |
| 108 | + expect(typeof result.language).toBe('string'); |
| 109 | + expect(result.language.length).toBeGreaterThan(0); |
| 110 | + }, 30000); |
| 111 | + |
| 112 | + test('Basic transcription with all VAD parameters set', async () => { |
| 113 | + // Test with VAD disabled but all parameters set to ensure no crashes |
| 114 | + const params = { |
| 115 | + ...commonParams, |
| 116 | + vad: false, // Disabled so it works without VAD model |
| 117 | + vad_model: '', // Empty model path |
| 118 | + vad_threshold: 0.6, |
| 119 | + vad_min_speech_duration_ms: 200, |
| 120 | + vad_min_silence_duration_ms: 80, |
| 121 | + vad_max_speech_duration_s: 25.0, |
| 122 | + vad_speech_pad_ms: 40, |
| 123 | + vad_samples_overlap: 0.08 |
| 124 | + }; |
| 125 | + |
| 126 | + const result = await whisperAsync(params); |
| 127 | + |
| 128 | + expect(typeof result).toBe('object'); |
| 129 | + expect(Array.isArray(result.transcription)).toBe(true); |
| 130 | + expect(result.transcription.length).toBeGreaterThan(0); |
| 131 | + }, 30000); |
38 | 132 | });
|
39 | 133 |
|
0 commit comments