Skip to content

Commit 9d7ebe3

Browse files
committed
vad : update dimensions of VAD model tensors
1 parent f4423f4 commit 9d7ebe3

File tree

1 file changed

+13
-19
lines changed

1 file changed

+13
-19
lines changed

src/whisper.cpp

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4369,35 +4369,35 @@ struct whisper_vad_model {
43694369
e_vad_model type = VAD_MODEL_UNKNOWN;
43704370
whisper_vad_hparams hparams;
43714371

4372-
struct ggml_tensor * stft_forward_basis; // [258, 256]
4372+
struct ggml_tensor * stft_forward_basis; // [256, 258]
43734373

43744374
// Encoder tensors - 4 convolutional layers
4375-
struct ggml_tensor * encoder_0_weight; // [128, 129, 3]
4375+
struct ggml_tensor * encoder_0_weight; // [3, 129, 128]
43764376
struct ggml_tensor * encoder_0_bias; // [128]
43774377

43784378
// Second encoder layer
4379-
struct ggml_tensor * encoder_1_weight; // [64, 128, 3]
4379+
struct ggml_tensor * encoder_1_weight; // [3, 128, 64]
43804380
struct ggml_tensor * encoder_1_bias; // [64]
43814381

43824382
// Third encoder layer
4383-
struct ggml_tensor * encoder_2_weight; // [64, 64, 3]
4383+
struct ggml_tensor * encoder_2_weight; // [3, 64, 64]
43844384
struct ggml_tensor * encoder_2_bias; // [64]
43854385

43864386
// Fourth encoder layer
4387-
struct ggml_tensor * encoder_3_weight; // [128, 64, 3]
4387+
struct ggml_tensor * encoder_3_weight; // [3, 64, 128]
43884388
struct ggml_tensor * encoder_3_bias; // [128]
43894389

43904390
// LSTM decoder tensors
4391-
struct ggml_tensor * lstm_ih_weight; // [512, 128] input-to-hidden
4391+
struct ggml_tensor * lstm_ih_weight; // [128, 512] input-to-hidden
43924392
struct ggml_tensor * lstm_ih_bias; // [512]
4393-
struct ggml_tensor * lstm_hh_weight; // [512, 128] hidden-to-hidden
4393+
struct ggml_tensor * lstm_hh_weight; // [128, 512] hidden-to-hidden
43944394
struct ggml_tensor * lstm_hh_bias; // [512]
43954395

43964396
// Final conv layer
4397-
struct ggml_tensor * final_conv_weight; // [1, 128, 1]
4397+
struct ggml_tensor * final_conv_weight; // [128]
43984398
struct ggml_tensor * final_conv_bias; // [1]
43994399

4400-
// ggml context
4400+
// ggml contexts
44014401
std::vector<ggml_context *> ctxs;
44024402

44034403
// buffer for the model tensors
@@ -4887,20 +4887,16 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
48874887
VAD_TENSOR_LSTM_WEIGHT_IH,
48884888
ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hparams.lstm_hidden_size, hstate_dim)
48894889
);
4890+
model.lstm_ih_bias = create_tensor(
4891+
VAD_TENSOR_LSTM_BIAS_IH,
4892+
ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hstate_dim)
4893+
);
48904894

48914895
// LSTM weights - hidden to hidden
48924896
model.lstm_hh_weight = create_tensor(
48934897
VAD_TENSOR_LSTM_WEIGHT_HH,
48944898
ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hparams.lstm_hidden_size, hstate_dim)
48954899
);
4896-
4897-
// LSTM bias - input to hidden
4898-
model.lstm_ih_bias = create_tensor(
4899-
VAD_TENSOR_LSTM_BIAS_IH,
4900-
ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hstate_dim)
4901-
);
4902-
4903-
// LSTM bias - hidden to hidden
49044900
model.lstm_hh_bias = create_tensor(
49054901
VAD_TENSOR_LSTM_BIAS_HH,
49064902
ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hstate_dim)
@@ -4911,8 +4907,6 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
49114907
VAD_TENSOR_FINAL_CONV_WEIGHT,
49124908
ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hparams.final_conv_in, 1)
49134909
);
4914-
4915-
// Final conv layer bias
49164910
model.final_conv_bias = create_tensor(
49174911
VAD_TENSOR_FINAL_CONV_BIAS,
49184912
ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1)

0 commit comments

Comments
 (0)