From 7672a40a85b537d58052dc19ce20ecc5a9b2d9ba Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Mon, 31 Mar 2025 04:34:36 +0530 Subject: [PATCH 1/3] add dry run = generalise rclone download --- script/download-and-extract/customize.py | 4 +++ script/download-and-extract/meta.yaml | 3 ++ script/download-file/customize.py | 5 +++- script/download-file/run.sh | 2 +- .../customize.py | 7 ----- .../get-dataset-waymo-calibration/meta.yaml | 30 +++++++++++++++++++ .../run-rclone.sh | 4 --- script/get-dataset-waymo-calibration/run.sh | 5 ++++ script/get-dataset-waymo/customize.py | 7 ----- script/get-dataset-waymo/meta.yaml | 30 +++++++++++++++++++ script/get-dataset-waymo/run-rclone.sh | 7 ----- script/get-dataset-waymo/run.sh | 6 ++++ script/get-ml-model-llama3/customize.py | 13 +++----- script/get-ml-model-llama3/meta.yaml | 30 +++++++++++++++++++ script/get-ml-model-llama3/run-rclone.sh | 4 --- .../get-preprocessed-dataset-criteo/meta.yaml | 20 +++++++++++-- 16 files changed, 135 insertions(+), 42 deletions(-) delete mode 100644 script/get-dataset-waymo-calibration/run-rclone.sh create mode 100644 script/get-dataset-waymo-calibration/run.sh delete mode 100644 script/get-dataset-waymo/run-rclone.sh delete mode 100644 script/get-ml-model-llama3/run-rclone.sh diff --git a/script/download-and-extract/customize.py b/script/download-and-extract/customize.py index 86f8b1d77..45e2d6fdc 100644 --- a/script/download-and-extract/customize.py +++ b/script/download-and-extract/customize.py @@ -54,6 +54,10 @@ def preprocess(i): def postprocess(i): env = i['env'] + + if env.get('MLC_DOWNLOAD_MODE') == "dry": + return {'return': 0} + filepath = env.get('MLC_EXTRACT_EXTRACTED_PATH', '') if filepath == '': filepath = env.get('MLC_DOWNLOAD_DOWNLOADED_PATH', '') diff --git a/script/download-and-extract/meta.yaml b/script/download-and-extract/meta.yaml index 2b8748b2e..10b7527d1 100644 --- a/script/download-and-extract/meta.yaml +++ b/script/download-and-extract/meta.yaml @@ -31,6 +31,9 @@ posthook_deps: MLC_DAE_EXTRACT_DOWNLOADED: - 'yes' - 'True' + skip_if_env: + MLC_DOWNLOAD_MODE: + - 'dry' names: - extract-script tags: extract,file diff --git a/script/download-file/customize.py b/script/download-file/customize.py index 1081c31e6..1ae0c16a3 100644 --- a/script/download-file/customize.py +++ b/script/download-file/customize.py @@ -237,7 +237,7 @@ def preprocess(i): "%", "%%") env['MLC_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P --error-on-no-transfer" else: - env['MLC_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['MLC_DOWNLOAD_FILENAME'])}{q} -P --error-on-no-transfer" + env['MLC_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['MLC_DOWNLOAD_FILENAME'])}{q} -P --error-on-no-transfer {extra_download_options}" if not verify_ssl: env['MLC_DOWNLOAD_CMD'] += f" --no-check-certificate" @@ -313,6 +313,9 @@ def postprocess(i): env = i['env'] + if env.get('MLC_DOWNLOAD_MODE') == "dry": + return {'return': 0} + filepath = env['MLC_DOWNLOAD_DOWNLOADED_PATH'] if not os.path.exists(filepath): diff --git a/script/download-file/run.sh b/script/download-file/run.sh index 2664ac52b..b792a9e61 100644 --- a/script/download-file/run.sh +++ b/script/download-file/run.sh @@ -50,7 +50,7 @@ if [[ ${require_download} == 1 ]]; then fi # Verify checksum again if necessary -if [[ ${MLC_DOWNLOAD_TOOL} == "mlcutil" || ${require_download} == 1 ]]; then +if [[ "${MLC_DOWNLOAD_MODE}" != "dry" && ( "${MLC_DOWNLOAD_TOOL}" == "mlcutil" || ${require_download} == 1 ) ]]; then if [[ -n "${MLC_DOWNLOAD_CHECKSUM_CMD}" ]]; then echo -e "\nVerifying checksum after download: ${MLC_DOWNLOAD_CHECKSUM_CMD}" eval "${MLC_DOWNLOAD_CHECKSUM_CMD}" || exit $? diff --git a/script/get-dataset-waymo-calibration/customize.py b/script/get-dataset-waymo-calibration/customize.py index e404aa512..b181ce895 100644 --- a/script/get-dataset-waymo-calibration/customize.py +++ b/script/get-dataset-waymo-calibration/customize.py @@ -17,13 +17,6 @@ def preprocess(i): 'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exists!"} else: env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes" - if env['MLC_DOWNLOAD_SRC'] == "mlcommons": - i['run_script_input']['script_name'] = 'run-rclone' - if env.get('MLC_OUTDIRNAME', '') != '': - env['MLC_DATASET_WAYMO_CALIBRATION_PATH'] = env['MLC_OUTDIRNAME'] - else: - env['MLC_DATASET_WAYMO_CALIBRATION_PATH'] = os.path.join( - os.getcwd(), "kitti_format", "calibration") return {'return': 0} diff --git a/script/get-dataset-waymo-calibration/meta.yaml b/script/get-dataset-waymo-calibration/meta.yaml index dbc17d494..cf9976fa7 100644 --- a/script/get-dataset-waymo-calibration/meta.yaml +++ b/script/get-dataset-waymo-calibration/meta.yaml @@ -33,3 +33,33 @@ variations: - true force_cache: true tags: get,rclone-config,_waymo + - enable_if_env: + MLC_TMP_REQUIRE_DOWNLOAD: + - 'yes' + env: + MLC_DOWNLOAD_FINAL_ENV_NAME: MLC_DATASET_WAYMO_CALIBRATION_PATH + MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_WAYMO_CALIBRATION_PATH + MLC_DOWNLOAD_URL: mlc-waymo:waymo_preprocessed_dataset/kitti_format/testing + extra_cache_tags: waymo,dataset + force_cache: true + names: + - dae + tags: download-and-extract + force_env_keys: + - MLC_OUTDIRNAME + update_tags_from_env_with_prefix: + _url.: + - MLC_DOWNLOAD_URL + rclone: + group: download-tool + add_deps_recursive: + dae: + tags: _rclone + default: true + dry-run: + group: run-mode + env: + MLC_DOWNLOAD_MODE: dry + dry-run,rclone: + env: + MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run \ No newline at end of file diff --git a/script/get-dataset-waymo-calibration/run-rclone.sh b/script/get-dataset-waymo-calibration/run-rclone.sh deleted file mode 100644 index fd289eff4..000000000 --- a/script/get-dataset-waymo-calibration/run-rclone.sh +++ /dev/null @@ -1,4 +0,0 @@ -cmd="rclone sync mlc-waymo:waymo_preprocessed_dataset/kitti_format/testing ${MLC_DATASET_WAYMO_CALIBRATION_PATH} -P" -echo $cmd -eval $cmd -test $? -eq 0 || exit $? \ No newline at end of file diff --git a/script/get-dataset-waymo-calibration/run.sh b/script/get-dataset-waymo-calibration/run.sh new file mode 100644 index 000000000..8fdfea598 --- /dev/null +++ b/script/get-dataset-waymo-calibration/run.sh @@ -0,0 +1,5 @@ +if [[ "$MLC_DOWNLOAD_MODE" != "dry" && "$MLC_TMP_REQUIRE_DOWNLOAD" = "true" ]]; then + cd "${MLC_DATASET_WAYMO_CALIBRATION_PATH}/testing" || exit + for f in *.tar.gz; do tar -xzvf "$f"; done + cd - || exit +fi \ No newline at end of file diff --git a/script/get-dataset-waymo/customize.py b/script/get-dataset-waymo/customize.py index cb625f443..833fcbf6c 100644 --- a/script/get-dataset-waymo/customize.py +++ b/script/get-dataset-waymo/customize.py @@ -17,13 +17,6 @@ def preprocess(i): 'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_PATH']} does not exists!"} else: env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes" - if env['MLC_DOWNLOAD_SRC'] == "mlcommons": - i['run_script_input']['script_name'] = 'run-rclone' - if env.get('MLC_OUTDIRNAME', '') != '': - env['MLC_DATASET_WAYMO_PATH'] = env['MLC_OUTDIRNAME'] - else: - env['MLC_DATASET_WAYMO_PATH'] = os.path.join( - os.getcwd(), "kitti_format") return {'return': 0} diff --git a/script/get-dataset-waymo/meta.yaml b/script/get-dataset-waymo/meta.yaml index 63bbf2472..bfeb56a8c 100644 --- a/script/get-dataset-waymo/meta.yaml +++ b/script/get-dataset-waymo/meta.yaml @@ -30,5 +30,35 @@ variations: enable_if_env: MLC_TMP_REQUIRE_DOWNLOAD: - yes + - enable_if_env: + MLC_TMP_REQUIRE_DOWNLOAD: + - 'yes' + env: + MLC_DOWNLOAD_FINAL_ENV_NAME: MLC_DATASET_WAYMO_PATH + MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_WAYMO_PATH + MLC_DOWNLOAD_URL: mlc-waymo:waymo_preprocessed_dataset/kitti_format + extra_cache_tags: waymo,dataset + force_cache: true + names: + - dae + tags: download-and-extract + force_env_keys: + - MLC_OUTDIRNAME + update_tags_from_env_with_prefix: + _url.: + - MLC_DOWNLOAD_URL env: MLC_DOWNLOAD_SRC: mlcommons + rclone: + group: download-tool + add_deps_recursive: + dae: + tags: _rclone + default: true + dry-run: + group: run-mode + env: + MLC_DOWNLOAD_MODE: dry + dry-run,rclone: + env: + MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run diff --git a/script/get-dataset-waymo/run-rclone.sh b/script/get-dataset-waymo/run-rclone.sh deleted file mode 100644 index 12aac3377..000000000 --- a/script/get-dataset-waymo/run-rclone.sh +++ /dev/null @@ -1,7 +0,0 @@ -cmd="rclone sync mlc-waymo:waymo_preprocessed_dataset/kitti_format ${MLC_DATASET_WAYMO_PATH} -P" -echo $cmd -eval $cmd -test $? -eq 0 || exit $? -cd ${MLC_DATASET_WAYMO_PATH}/kitti_format/training -for f in *.tar.gz; do tar -xzvf "$f"; done -cd - diff --git a/script/get-dataset-waymo/run.sh b/script/get-dataset-waymo/run.sh index 3197bb8ad..ba1412ac9 100644 --- a/script/get-dataset-waymo/run.sh +++ b/script/get-dataset-waymo/run.sh @@ -6,3 +6,9 @@ #echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out #${MLC_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + +if [[ "$MLC_DOWNLOAD_MODE" != "dry" && "$MLC_TMP_REQUIRE_DOWNLOAD" = "true" ]]; then + cd "${MLC_DATASET_WAYMO_PATH}/kitti_format/training" || exit + for f in *.tar.gz; do tar -xzvf "$f"; done + cd - || exit +fi \ No newline at end of file diff --git a/script/get-ml-model-llama3/customize.py b/script/get-ml-model-llama3/customize.py index 17ebedabd..91c22df1b 100644 --- a/script/get-ml-model-llama3/customize.py +++ b/script/get-ml-model-llama3/customize.py @@ -15,17 +15,11 @@ def preprocess(i): path = env.get('MLC_OUTDIRNAME', '').strip() - if path != "": + if path != "" and env.get('MLC_DOWNLOAD_SRC', '') == "huggingface": os.makedirs(path, exist_ok=True) env['MLC_GIT_CHECKOUT_FOLDER'] = os.path.join( path, env['MLC_ML_MODEL_NAME']) - if env['MLC_DOWNLOAD_SRC'] == "mlcommons": - i['run_script_input']['script_name'] = 'run-rclone' - if env.get('MLC_OUTDIRNAME', '') != '': - env['LLAMA3_CHECKPOINT_PATH'] = env['MLC_OUTDIRNAME'] - else: - env['LLAMA3_CHECKPOINT_PATH'] = os.getcwd() env['MLC_TMP_REQUIRE_DOWNLOAD'] = 'yes' return {'return': 0} @@ -35,7 +29,8 @@ def postprocess(i): env = i['env'] - env['MLC_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH'] - env['MLC_GET_DEPENDENT_CACHED_PATH'] = env['MLC_ML_MODEL_PATH'] + if env.get('MLC_DOWNLOAD_MODE', '') != "dry": + env['MLC_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH'] + env['MLC_GET_DEPENDENT_CACHED_PATH'] = env['MLC_ML_MODEL_PATH'] return {'return': 0} diff --git a/script/get-ml-model-llama3/meta.yaml b/script/get-ml-model-llama3/meta.yaml index 2ae6ba84e..4d52456cb 100644 --- a/script/get-ml-model-llama3/meta.yaml +++ b/script/get-ml-model-llama3/meta.yaml @@ -61,8 +61,38 @@ variations: enable_if_env: MLC_TMP_REQUIRE_DOWNLOAD: - yes + - enable_if_env: + MLC_TMP_REQUIRE_DOWNLOAD: + - 'yes' + env: + MLC_DOWNLOAD_FINAL_ENV_NAME: LLAMA3_CHECKPOINT_PATH + MLC_EXTRACT_FINAL_ENV_NAME: LLAMA3_CHECKPOINT_PATH + MLC_DOWNLOAD_URL: mlc-llama3-1:inference/<<>> + extra_cache_tags: waymo,dataset + force_cache: true + names: + - dae + tags: download-and-extract + force_env_keys: + - MLC_OUTDIRNAME + update_tags_from_env_with_prefix: + _url.: + - MLC_DOWNLOAD_URL env: MLC_DOWNLOAD_SRC: mlcommons + rclone: + group: download-tool + add_deps_recursive: + dae: + tags: _rclone + default: true + dry-run: + group: run-mode + env: + MLC_DOWNLOAD_MODE: dry + dry-run,rclone: + env: + MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run hf: group: download-src default_variations: diff --git a/script/get-ml-model-llama3/run-rclone.sh b/script/get-ml-model-llama3/run-rclone.sh deleted file mode 100644 index e1943a00f..000000000 --- a/script/get-ml-model-llama3/run-rclone.sh +++ /dev/null @@ -1,4 +0,0 @@ -cmd="rclone sync mlc-llama3-1:inference/${MLC_ML_MODEL_NAME} ${LLAMA3_CHECKPOINT_PATH}/${MLC_ML_MODEL_NAME} -P" -echo $cmd -eval $cmd -test $? -eq 0 || exit $? diff --git a/script/get-preprocessed-dataset-criteo/meta.yaml b/script/get-preprocessed-dataset-criteo/meta.yaml index b4219c7a6..7455121c2 100644 --- a/script/get-preprocessed-dataset-criteo/meta.yaml +++ b/script/get-preprocessed-dataset-criteo/meta.yaml @@ -142,12 +142,28 @@ variations: MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_PREPROCESSED_PATH MLC_EXTRACT_TO_FOLDER: criteo-preprocessed MLC_RCLONE_CONFIG_NAME: mlc-inference - MLC_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed + MLC_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed extra_cache_tags: criteo,preprocessed,dataset force_cache: true names: - dae - tags: download-and-extract,_rclone,_url.mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - MLC_DOWNLOAD_URL + rclone: + group: download-tool + add_deps_recursive: + dae: + tags: _rclone + default: true + dry-run: + group: run-mode + env: + MLC_DOWNLOAD_MODE: dry + dry-run,rclone: + env: + MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run preprocess: group: src validation: From 1d85bd95422e6cfe4cc4c5a6b8a36712e5e76844 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 30 Mar 2025 23:06:50 +0000 Subject: [PATCH 2/3] [Automated Commit] Format Codebase [skip ci] --- script/download-file/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/download-file/customize.py b/script/download-file/customize.py index 3f5c8fe2e..bb97f5467 100644 --- a/script/download-file/customize.py +++ b/script/download-file/customize.py @@ -315,7 +315,7 @@ def postprocess(i): if env.get('MLC_DOWNLOAD_MODE') == "dry": return {'return': 0} - + filepath = env['MLC_DOWNLOAD_DOWNLOADED_PATH'] if not os.path.exists(filepath): From 862a1d46fb44adc896e408fcb0a2c2c7c790d228 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Mon, 31 Mar 2025 04:38:29 +0530 Subject: [PATCH 3/3] fix sentence --- script/get-dataset-waymo-calibration/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-dataset-waymo-calibration/customize.py b/script/get-dataset-waymo-calibration/customize.py index b181ce895..a831e4c3f 100644 --- a/script/get-dataset-waymo-calibration/customize.py +++ b/script/get-dataset-waymo-calibration/customize.py @@ -14,7 +14,7 @@ def preprocess(i): if env.get('MLC_DATASET_WAYMO_CALIBRATION_PATH', '') != '': if not os.path.exists(env['MLC_DATASET_WAYMO_CALIBRATION_PATH']): return { - 'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exists!"} + 'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exist!"} else: env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes"