Skip to content

Commit 7ce5785

Browse files
committed
replace model of v0 spec decode ci
Signed-off-by: Shanshan Shen <87969357+shen-shanshan@users.noreply.github.com>
1 parent 18495f4 commit 7ce5785

File tree

6 files changed

+17
-17
lines changed

6 files changed

+17
-17
lines changed

tests/e2e/long_term/spec_decode_v0/e2e/test_eagle_correctness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
run_equality_correctness_test
4444

4545
# main model
46-
MAIN_MODEL = "JackFram/llama-68m"
46+
MAIN_MODEL = "LLM-Research/Meta-Llama-3.1-8B-Instruct"
4747

4848
# speculative model
4949
SPEC_MODEL = "abhigoyal/vllm-eagle-llama-68m-random"

tests/e2e/long_term/spec_decode_v0/e2e/test_medusa_correctness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
# main model
5050
# lmsys/vicuna-7b-v1.3 was to be used but it's causing
5151
# OOM in CI pipeline, so using a smaller model.
52-
MAIN_MODEL = "JackFram/llama-68m"
52+
MAIN_MODEL = "LLM-Research/Meta-Llama-3.1-8B-Instruct"
5353

5454
# speculative model
5555
SPEC_MODEL = "abhigoyal/vllm-medusa-llama-68m-random"

tests/e2e/long_term/spec_decode_v0/e2e/test_ngram_correctness.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
}])
6262
@pytest.mark.parametrize("per_test_common_llm_kwargs", [
6363
{
64-
"model_name": "JackFram/llama-68m",
64+
"model_name": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
6565
},
6666
])
6767
@pytest.mark.parametrize("baseline_llm_kwargs", [{}])
@@ -124,7 +124,7 @@ def test_ngram_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
124124
}])
125125
@pytest.mark.parametrize("per_test_common_llm_kwargs", [
126126
{
127-
"model_name": "JackFram/llama-68m",
127+
"model_name": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
128128
},
129129
])
130130
@pytest.mark.parametrize("baseline_llm_kwargs", [{}])
@@ -242,7 +242,7 @@ def test_ngram_e2e_greedy_correctness_with_preemption(
242242
@pytest.mark.parametrize(
243243
"common_llm_kwargs",
244244
[{
245-
"model_name": "JackFram/llama-68m",
245+
"model_name": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
246246
247247
# Skip cuda graph recording for fast test.
248248
"enforce_eager": True,
@@ -302,7 +302,7 @@ def test_ngram_different_k(vllm_runner, common_llm_kwargs,
302302
@pytest.mark.parametrize(
303303
"common_llm_kwargs",
304304
[{
305-
"model_name": "JackFram/llama-68m",
305+
"model_name": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
306306
307307
# Skip cuda graph recording for fast test.
308308
"enforce_eager": True,
@@ -364,7 +364,7 @@ def test_ngram_disable_queue(vllm_runner, common_llm_kwargs,
364364
@pytest.mark.parametrize(
365365
"common_llm_kwargs",
366366
[{
367-
"model_name": "JackFram/llama-68m",
367+
"model_name": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
368368
369369
# Skip cuda graph recording for fast test.
370370
"enforce_eager": True,

tests/e2e/long_term/spec_decode_v0/test_multi_step_worker.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def test_same_output_for_single_step():
9696
worker for num_steps=1.
9797
"""
9898
seed = 100
99-
model_name = 'JackFram/llama-68m'
99+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
100100

101101
block_size = 32
102102
num_gpu_blocks = 2048 // block_size
@@ -181,7 +181,7 @@ def test_same_output_for_multi_step():
181181
then runs the worker num_steps times, and compares the output.
182182
"""
183183
seed = 100
184-
model_name = 'JackFram/llama-68m'
184+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
185185

186186
block_size = 16
187187
num_gpu_blocks = 2048 // block_size
@@ -308,7 +308,7 @@ def test_multi_step_with_batch_expansion_correct_output():
308308
expanded batch is then used for predicting the next tokens.
309309
"""
310310
seed = 100
311-
model_name = 'JackFram/llama-68m'
311+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
312312

313313
block_size = 16
314314
num_gpu_blocks = 2048 // block_size
@@ -403,7 +403,7 @@ def test_multi_step_with_batch_expansion_incorrect_output():
403403
the sequence ID is specified incorrectly.
404404
"""
405405
seed = 100
406-
model_name = 'JackFram/llama-68m'
406+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
407407

408408
block_size = 16
409409
num_gpu_blocks = 2048 // block_size
@@ -504,7 +504,7 @@ def test_multi_step_correct_kvcache(num_steps):
504504
is correctly updated for sequences with bonus token.
505505
"""
506506
seed = 100
507-
model_name = "JackFram/llama-68m"
507+
model_name = "LLM-Research/Meta-Llama-3.1-8B-Instruct"
508508

509509
block_size = 16
510510
num_gpu_blocks = 2048 // block_size
@@ -768,7 +768,7 @@ def test_use_draft_model_runner_advance_step():
768768
when applicable.
769769
"""
770770
seed = 100
771-
model_name = 'JackFram/llama-68m'
771+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
772772

773773
k = 5
774774
batch_size = 32

tests/e2e/long_term/spec_decode_v0/test_ngram_worker.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_ngram_algo_correctness_for_single_no_match():
3434
block_size = 32
3535
num_gpu_blocks = 2048 // block_size
3636
seed = 100
37-
model_name = 'JackFram/llama-68m'
37+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
3838
vocab_size = 32_000
3939
device = 'npu:0'
4040

@@ -92,7 +92,7 @@ def test_ngram_algo_correctness_for_batches_not_match_all():
9292
block_size = 32
9393
num_gpu_blocks = 2048 // block_size
9494
seed = 100
95-
model_name = 'JackFram/llama-68m'
95+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
9696
vocab_size = 32_000
9797
device = 'npu:0'
9898

@@ -173,7 +173,7 @@ def test_ngram_algo_correctness_for_batches_match_all():
173173
block_size = 32
174174
num_gpu_blocks = 2048 // block_size
175175
seed = 100
176-
model_name = 'JackFram/llama-68m'
176+
model_name = 'LLM-Research/Meta-Llama-3.1-8B-Instruct'
177177
vocab_size = 32_000
178178
device = 'npu:0'
179179

tests/e2e/long_term/spec_decode_v0/test_spec_decode_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ def test_correctly_load_weight_for_eagle():
932932
num_gpu_blocks = 8096 // block_size
933933
target_worker = create_worker(
934934
NPUWorker,
935-
"JackFram/llama-68m",
935+
"LLM-Research/Meta-Llama-3.1-8B-Instruct",
936936
block_size,
937937
num_gpu_blocks,
938938
seed,

0 commit comments

Comments
 (0)