Skip to content

Commit 0c3720d

Browse files
authored
[CI] Update CI docker for latest ml_dtypes (#3162)
This PR updates the CI docker images so that the latest packages of `ml_dtypes` can be installed.
1 parent 56a5ff9 commit 0c3720d

File tree

22 files changed

+43
-45
lines changed

22 files changed

+43
-45
lines changed

.pylintrc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[MESSAGES CONTROL]
2+
disable=too-many-positional-arguments,duplicate-code

ci/jenkinsfile.groovy

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
1919

20-
run_cpu = "bash ci/bash.sh mlcaidev/ci-cpu:4d61e5d -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
21-
run_cuda = "bash ci/bash.sh mlcaidev/ci-cu121:4d61e5d -e GPU cuda-12.1 -e MLC_CI_SETUP_DEPS 1"
22-
run_rocm = "bash ci/bash.sh mlcaidev/ci-rocm57:4d61e5d -e GPU rocm-5.7 -e MLC_CI_SETUP_DEPS 1"
20+
run_cpu = "bash ci/bash.sh mlcaidev/ci-cpu:26d65cc -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
21+
run_cuda = "bash ci/bash.sh mlcaidev/ci-cu128:26d65cc -e GPU cuda-12.8 -e MLC_CI_SETUP_DEPS 1"
22+
// run_rocm = "bash ci/bash.sh mlcaidev/ci-rocm57:26d65cc -e GPU rocm-5.7 -e MLC_CI_SETUP_DEPS 1"
2323

2424
pkg_cpu = "bash ci/bash.sh mlcaidev/package-rocm61:5b6f876 -e GPU cpu -e MLC_CI_SETUP_DEPS 1"
2525
pkg_cuda = "bash ci/bash.sh mlcaidev/package-cu128:5b6f876 -e GPU cuda-12.8 -e MLC_CI_SETUP_DEPS 1"
@@ -123,10 +123,10 @@ stage('Build') {
123123
ws(per_exec_ws('mlc-llm-build-cuda')) {
124124
init_git(true)
125125
sh(script: "ls -alh", label: 'Show work directory')
126-
sh(script: "${pkg_cuda} conda env export --name py38", label: 'Checkout version')
127-
sh(script: "${pkg_cuda} -j 8 -v \$HOME/.ccache /ccache conda run -n py38 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
128-
sh(script: "${pkg_cuda} -j 8 conda run -n py38 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
129-
sh(script: "${pkg_cuda} -j 1 conda run -n py38 ./ci/task/build_clean.sh", label: 'Clean up after build')
126+
sh(script: "${pkg_cuda} conda env export --name py312", label: 'Checkout version')
127+
sh(script: "${pkg_cuda} -j 8 -v \$HOME/.ccache /ccache conda run -n py312 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
128+
sh(script: "${pkg_cuda} -j 8 conda run -n py312 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
129+
sh(script: "${pkg_cuda} -j 1 conda run -n py312 ./ci/task/build_clean.sh", label: 'Clean up after build')
130130
sh(script: "ls -alh ./wheels/", label: 'Build artifact')
131131
pack_lib('mlc_wheel_cuda', 'wheels/*.whl')
132132
}
@@ -165,10 +165,10 @@ stage('Build') {
165165
ws(per_exec_ws('mlc-llm-build-vulkan')) {
166166
init_git(true)
167167
sh(script: "ls -alh", label: 'Show work directory')
168-
sh(script: "${pkg_cpu} conda env export --name py38", label: 'Checkout version')
169-
sh(script: "${pkg_cpu} -j 8 conda run -n py38 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
170-
sh(script: "${pkg_cpu} -j 8 conda run -n py38 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
171-
sh(script: "${pkg_cpu} -j 1 conda run -n py38 ./ci/task/build_clean.sh", label: 'Clean up after build')
168+
sh(script: "${pkg_cpu} conda env export --name py312", label: 'Checkout version')
169+
sh(script: "${pkg_cpu} -j 8 conda run -n py312 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
170+
sh(script: "${pkg_cpu} -j 8 conda run -n py312 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
171+
sh(script: "${pkg_cpu} -j 1 conda run -n py312 ./ci/task/build_clean.sh", label: 'Clean up after build')
172172
sh(script: "ls -alh ./wheels/", label: 'Build artifact')
173173
pack_lib('mlc_wheel_vulkan', 'wheels/*.whl')
174174
}

ci/task/pylint.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ if [[ -n ${MLC_CI_SETUP_DEPS:-} ]]; then
1010
echo "MLC_CI_SETUP_DEPS=1 start setup deps"
1111
# TVM Unity is a dependency to this testing
1212
pip install --quiet --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cpu
13+
pip install requests
1314
pip install --quiet --pre -U cuda-python
1415
fi
1516

ci/task/test_model_compile.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ elif [[ ${GPU} == ios ]]; then
3131
elif [[ ${GPU} == android* ]]; then
3232
TARGET=android
3333
pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cpu
34-
source /android_env_vars.sh
3534
else
3635
TARGET=vulkan
3736
pip install --pre -U --no-index -f https://mlc.ai/wheels mlc-ai-nightly-cpu

python/mlc_llm/cli/delivery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ def _main( # pylint: disable=too-many-locals, too-many-arguments
282282
failed_cases: List[Tuple[str, str]] = []
283283
delivered_log = _get_current_log(log)
284284
for task_index, task in enumerate(delivery_diff.tasks, 1):
285-
logger.info(
285+
logger.info( # pylint: disable=logging-not-lazy
286286
bold("[{task_index}/{total_tasks}] Processing model: ").format(
287287
task_index=task_index,
288288
total_tasks=len(delivery_diff.tasks),

python/mlc_llm/cli/lib_delivery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _main( # pylint: disable=too-many-locals
125125
"""Compile the model libs in the spec and save them to the binary_libs_dir."""
126126
failed_cases: List[Any] = []
127127
for task_index, task in enumerate(spec["tasks"], 1):
128-
logger.info(
128+
logger.info( # pylint: disable=logging-not-lazy
129129
bold("[{task_index}/{total_tasks}] Processing model: ").format(
130130
task_index=task_index,
131131
total_tasks=len(spec["tasks"]),

python/mlc_llm/compiler_pass/attach_logit_processor.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ def _apply_logit_bias_inplace(
7272
def _get_apply_logit_bias_inplace(target: tvm.target.Target):
7373
tx = 1024 # default
7474
max_num_threads_per_block = get_max_num_threads_per_block(target)
75-
if max_num_threads_per_block < tx:
76-
tx = max_num_threads_per_block
75+
tx = min(tx, max_num_threads_per_block)
7776
check_thread_limits(target, bdx=tx, bdy=1, bdz=1, gdz=1)
7877

7978
@T.prim_func
@@ -157,8 +156,7 @@ def _apply_penalty_inplace( # pylint: disable=too-many-arguments,too-many-local
157156
def _get_apply_penalty_inplace(target: tvm.target.Target):
158157
tx = 1024 # default
159158
max_num_threads_per_block = get_max_num_threads_per_block(target)
160-
if max_num_threads_per_block < tx:
161-
tx = max_num_threads_per_block
159+
tx = min(tx, max_num_threads_per_block)
162160
check_thread_limits(target, bdx=tx, bdy=1, bdz=1, gdz=1)
163161

164162
@T.prim_func
@@ -248,8 +246,7 @@ def _apply_bitmask_inplace(
248246
def _get_apply_bitmask_inplace(target: tvm.target.Target):
249247
tx = 1024 # default
250248
max_num_threads_per_block = get_max_num_threads_per_block(target)
251-
if max_num_threads_per_block < tx:
252-
tx = max_num_threads_per_block
249+
tx = min(tx, max_num_threads_per_block)
253250
check_thread_limits(target, bdx=tx, bdy=1, bdz=1, gdz=1)
254251

255252
@T.prim_func

python/mlc_llm/contrib/embeddings/embeddings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ def _tokenize_queries(self, queries: List[str]) -> Tuple[np.ndarray, np.ndarray]
170170
tokens = engine_utils.process_prompts(queries, self.tokenizer.encode) # type: ignore
171171
max_query_length = max(len(token_seq) for token_seq in tokens)
172172

173-
token_inputs = np.zeros((len(tokens), max_query_length), dtype=np.int32)
174-
attention_mask = np.zeros((len(tokens), max_query_length), dtype=np.int32)
173+
token_inputs: np.ndarray = np.zeros((len(tokens), max_query_length), dtype=np.int32)
174+
attention_mask: np.ndarray = np.zeros((len(tokens), max_query_length), dtype=np.int32)
175175

176176
for i, token_seq in enumerate(tokens):
177177
token_inputs[i, : len(token_seq)] = token_seq

python/mlc_llm/json_ffi/engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def create( # pylint: disable=too-many-arguments,too-many-locals
194194
),
195195
request_id=request_id,
196196
)
197-
for response in chatcmpl_generator:
197+
for response in chatcmpl_generator: # pylint: disable=use-yield-from
198198
yield response
199199

200200

python/mlc_llm/model/phi/phi_model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,7 @@ def __post_init__(self):
135135
)
136136
if self.prefill_chunk_size == 0:
137137
self.prefill_chunk_size = self.context_window_size
138-
if self.prefill_chunk_size > self.context_window_size:
139-
self.prefill_chunk_size = self.context_window_size
138+
self.prefill_chunk_size = min(self.prefill_chunk_size, self.context_window_size)
140139
if self.n_head_kv == 0 or self.n_head_kv is None:
141140
self.n_head_kv = self.n_head
142141
if self.n_inner == 0 or self.n_inner is None:

0 commit comments

Comments
 (0)