From adde35fcc2509022776ca338c1f767d6e5ccfd4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 22 Apr 2025 11:36:43 +0200 Subject: [PATCH 1/9] initial version/draft of scripts for generating easystacks based on an existing stack --- .../json_to_easystacks.sh | 28 +++++++ .../stack_to_json.sh | 84 +++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100755 scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh create mode 100755 scripts/generate_easystacks_for_existing_stack/stack_to_json.sh diff --git a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh new file mode 100755 index 0000000000..31a524e5a8 --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +input_file=$1 + +prev_eb_version="0.0.0" +#prev_toolchain="none" +easystack_num=0 + +while read app +do + eval $(echo $app | jq -r '. | to_entries | .[] | .key + "=" + (.value | @sh)') + + #if [[ ${prev_toolchain} != ${toolchain} ]] || [[ ${prev_eb_version} != ${easybuild} ]]; then + if [[ ${prev_eb_version} != ${easybuild} ]]; then + easystack_num=$(( easystack_num + 1)) + #prev_toolchain=${toolchain} + prev_eb_version=${easybuild} + fi + + #easystack="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}-${toolchain}.yml" + easystack="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}.yml" + if [ ! -f "${easystack}" ]; then + echo "easyconfigs:" > ${easystack} + fi + echo " - ${easyconfig}:" >> ${easystack} + echo " options:" >> ${easystack} + echo " include-easyblocks: ${easyblocks}" >> ${easystack} +done < <(jq -c '.[]' ${input_file}) diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh new file mode 100755 index 0000000000..9daa2dba77 --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +DEBUG=0 +BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software +EB_BOOTSTRAP=4.9.4 + +declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" ) + +if [[ ! -d ${BASE_STACK} ]]; then + echo "The given base stack (${BASE_STACK}) is not a directory." + exit 1 +fi + +apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d | head -n 3) + +json_output="[" +for app_dir in $apps; do + app_version=$(basename ${app_dir}) + app_name=$(basename $(dirname ${app_dir})) + + if [[ ${app_name} == "EESSI-extend" ]]; then + # Skip EESSI-extend, as it will be installed automatically. + continue + fi + + easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py + easyconfig=${app_dir}/easybuild/reprod/${app_name}-${app_version}.eb + if [[ ! -f ${easyconfig} ]]; then + echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" + fi + # If rebuilds would not remove the original log file, we should take the build time from the first log. + # As we cannot guarantee that at the moment, we are cautious and use the last one. + log_file=$(ls -1 ${app_dir}/easybuild/easybuild-${app_name}*.log* | tail -n 1) + build_time_start=$(bzcat ${log_file} | head -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + build_time_end=$(bzcat ${log_file} | tail -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + #build_time_unix=$( date +%s -d ${build_time}) + build_duration=$(( ($(date +%s -d ${build_time_end}) - $(date +%s -d ${build_time_start}))/60 )) + + eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9].[0-9].[0-9])" ${log_file} | head -n 1) + # Some EB versions have been installed with a temporary EB installation of the same version. + # If that's the case, use the version specified with ${EB_BOOTSTRAP} instead. + # This needs to correspond to the version that gets installed initially by EESSI-install-software.sh, + # which should be the latest EB version available when that script is being run. + if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then + eb_version=${EB_BOOTSTRAP} + fi + + if [[ ${app_version} != *-* ]]; then + toolchain=SYSTEM + else + if [[ ${app_version} == *-GCC* ]]; then + gcc_ver=$(echo ${app_version} | grep -oP "(GCC|GCCcore)-\K.*?(?=-|$)") + toolchain=${gcc_to_foss[$gcc_ver]} + else + toolchain=$(echo ${app_version} | grep -oP "(foss|gfbf|gompi)-\K.*?(?=-|$)") + fi + fi + + json=$( + jq --null-input \ + --arg build_time "${build_time_start}" \ + --arg build_duration_minutes "${build_duration}" \ + --arg name "${app_name}" \ + --arg version "${app_version}" \ + --arg easybuild "${eb_version}" \ + --arg toolchain "${toolchain}" \ + --arg easyconfig "${easyconfig}" \ + --arg easyblocks "${easyblocks}" \ + '$ARGS.named' # requires jq 1.7 or newer + #'{build_time: $build_time, build_duration_minutes: $build_duration, name: $name, version: $version, easybuild: $easybuild, + # toolchain: $toolchain, easyconfig: $easyconfig, easyblocks: $easyblocks}' + ) + + if [[ ${json_output} == "[" ]]; then + json_output="${json_output}${json}" + else + json_output="${json_output},${json}" + fi + [[ ${DEBUG} -ne 0 ]] && echo ${build_time_unix} ${app_name} ${app_version} ${eb_version} ${toolchain} ${easyconfig} ${easyblocks} +done #| sort -nu +json_output="${json_output}]" + +[[ ${DEBUG} -ne 0 ]] && echo ${json_output} +echo ${json_output} | jq 'sort_by(.build_time)' From ef7f671b4860eecbba6ca0bd08c8bfd72a975d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 22 Apr 2025 11:38:03 +0200 Subject: [PATCH 2/9] remove head -n 3 command --- scripts/generate_easystacks_for_existing_stack/stack_to_json.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh index 9daa2dba77..ffb9c18dde 100755 --- a/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh @@ -11,7 +11,7 @@ if [[ ! -d ${BASE_STACK} ]]; then exit 1 fi -apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d | head -n 3) +apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d) json_output="[" for app_dir in $apps; do From c5b1aa004473ba0943f85597fb26e98f786d8221 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 22 Apr 2025 17:32:46 +0200 Subject: [PATCH 3/9] Added parallel version of stack_to_json, and added script to also support spliting by duration --- .../json_to_easystacks_split_by_duration.sh | 47 ++++++++++ .../stack_to_json_parallel.sh | 90 +++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100755 scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh create mode 100755 scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh diff --git a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh new file mode 100755 index 0000000000..124b11e2d0 --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +input_file=$1 +duration_threshold=${2:-180} # Default threshold to 180 minutes if not given + +prev_eb_version="0.0.0" +easystack_num=0 +current_duration_sum=0 +total_duration_sum=0 +current_stack_name="" + +while read app; do + # Extract JSON keys to shell variables + eval $(echo $app | jq -r '. | to_entries | .[] | .key + "=" + (.value | @sh)') + + # Check if we need to start a new easystack + if [[ ${prev_eb_version} != ${easybuild} ]] || (( current_duration_sum + build_duration_minutes > duration_threshold )); then + if [[ ${current_stack_name} != "" ]]; then + echo "${current_stack_name}: total build duration = ${current_duration_sum} minutes" + fi + easystack_num=$(( easystack_num + 1 )) + prev_eb_version=${easybuild} + current_duration_sum=0 + current_stack_name="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}.yml" + fi + + easystack="${current_stack_name}" + if [[ ! -f "${easystack}" ]]; then + echo "easyconfigs:" > "${easystack}" + fi + + echo " - ${easyconfig}:" >> "${easystack}" + echo " options:" >> "${easystack}" + echo " include-easyblocks: ${easyblocks}" >> "${easystack}" + + current_duration_sum=$(( current_duration_sum + build_duration_minutes )) + total_duration_sum=$(( total_duration_sum + build_duration_minutes )) + +done < <(jq -c '.[]' "${input_file}") + +# Print final stack duration +if [[ ${current_stack_name} != "" ]]; then + echo "${current_stack_name}: total build duration = ${current_duration_sum} minutes" +fi + +# Print overall total +echo "Overall total build duration = ${total_duration_sum} minutes" diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh new file mode 100755 index 0000000000..dd7e62b6f3 --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# Usage: ./script.sh [MAX_JOBS] +MAX_JOBS=${1:-4} # Default to 4 concurrent jobs if not specified +DEBUG=0 +BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software +EB_BOOTSTRAP=4.9.4 +TMPDIR=$(mktemp -d) + +declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" ) + +if [[ ! -d ${BASE_STACK} ]]; then + echo "The given base stack (${BASE_STACK}) is not a directory." + exit 1 +fi + +apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d) + +# Job limiter +job_count=0 +run_limited() { + ((job_count++)) + if (( job_count >= MAX_JOBS )); then + wait -n # wait for one job to finish + ((job_count--)) + fi +} + +for app_dir in $apps; do +run_limited +( + app_version=$(basename "${app_dir}") + app_name=$(basename "$(dirname "${app_dir}")") + + if [[ ${app_name} == "EESSI-extend" ]]; then + exit 0 + fi + + easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py + easyconfig=${app_dir}/easybuild/reprod/${app_name}-${app_version}.eb + + if [[ ! -f ${easyconfig} ]]; then + echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" >&2 + exit 1 + fi + + log_file=$(ls -1 ${app_dir}/easybuild/easybuild-${app_name}*.log* 2>/dev/null | tail -n 1) + build_time_start=$(bzcat "${log_file}" | head -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + build_time_end=$(bzcat "${log_file}" | tail -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + build_duration=$(( ($(date +%s -d "${build_time_end}") - $(date +%s -d "${build_time_start}")) / 60 )) + + eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9]+\.[0-9]+\.[0-9]+)" "${log_file}" | head -n 1) + if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then + eb_version=${EB_BOOTSTRAP} + fi + + if [[ ${app_version} != *-* ]]; then + toolchain="SYSTEM" + else + if [[ ${app_version} == *-GCC* ]]; then + gcc_ver=$(echo ${app_version} | grep -oP "(GCC|GCCcore)-\K.*?(?=-|$)") + toolchain=${gcc_to_foss[$gcc_ver]} + else + toolchain=$(echo ${app_version} | grep -oP "(foss|gfbf|gompi)-\K.*?(?=-|$)") + fi + fi + + jq --null-input \ + --arg build_time "${build_time_start}" \ + --arg build_duration_minutes "${build_duration}" \ + --arg name "${app_name}" \ + --arg version "${app_version}" \ + --arg easybuild "${eb_version}" \ + --arg toolchain "${toolchain}" \ + --arg easyconfig "${easyconfig}" \ + --arg easyblocks "${easyblocks}" \ + '$ARGS.named' > "${TMPDIR}/${app_name}_${app_version}.json" + + [[ ${DEBUG} -ne 0 ]] && echo "Processed ${app_name}/${app_version}" >&2 +) & +done + +wait + +# Combine all results and sort by build time +jq -s 'sort_by(.build_time)' "${TMPDIR}"/*.json + +# Optional cleanup +rm -r "${TMPDIR}" + From 84817f0b10e7722f30958910f03c1fdab471e14d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 22 Apr 2025 17:39:56 +0200 Subject: [PATCH 4/9] I don't believe this bootstrap is needed, we should be able to install e.g. 4.8.2 with 4.8.2. Let's just see if that works --- .../stack_to_json_parallel.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh index dd7e62b6f3..d4797767f0 100755 --- a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh @@ -50,9 +50,9 @@ run_limited build_duration=$(( ($(date +%s -d "${build_time_end}") - $(date +%s -d "${build_time_start}")) / 60 )) eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9]+\.[0-9]+\.[0-9]+)" "${log_file}" | head -n 1) - if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then - eb_version=${EB_BOOTSTRAP} - fi +# if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then +# eb_version=${EB_BOOTSTRAP} +# fi if [[ ${app_version} != *-* ]]; then toolchain="SYSTEM" From cebdf182425ae34c4f43895ab24a0dd281d99da6 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 22 Apr 2025 21:55:47 +0200 Subject: [PATCH 5/9] Updated EB bootstrap version and reactivate using the bootstrap --- .../stack_to_json_parallel.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh index d4797767f0..8415238a5c 100755 --- a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh @@ -4,7 +4,7 @@ MAX_JOBS=${1:-4} # Default to 4 concurrent jobs if not specified DEBUG=0 BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software -EB_BOOTSTRAP=4.9.4 +EB_BOOTSTRAP=5.0.0 TMPDIR=$(mktemp -d) declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" ) @@ -50,9 +50,9 @@ run_limited build_duration=$(( ($(date +%s -d "${build_time_end}") - $(date +%s -d "${build_time_start}")) / 60 )) eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9]+\.[0-9]+\.[0-9]+)" "${log_file}" | head -n 1) -# if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then -# eb_version=${EB_BOOTSTRAP} -# fi + if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then + eb_version=${EB_BOOTSTRAP} + fi if [[ ${app_version} != *-* ]]; then toolchain="SYSTEM" From 7cc477f8d1163fbb0d6dd9fd503b49440e5be171 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 24 Apr 2025 23:55:55 +0200 Subject: [PATCH 6/9] Make sure the easystack duration gets prepended to the easystack file as a comment --- .../json_to_easystacks_split_by_duration.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh index 124b11e2d0..07aec0cbcb 100755 --- a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh +++ b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh @@ -16,7 +16,7 @@ while read app; do # Check if we need to start a new easystack if [[ ${prev_eb_version} != ${easybuild} ]] || (( current_duration_sum + build_duration_minutes > duration_threshold )); then if [[ ${current_stack_name} != "" ]]; then - echo "${current_stack_name}: total build duration = ${current_duration_sum} minutes" + { echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}" fi easystack_num=$(( easystack_num + 1 )) prev_eb_version=${easybuild} @@ -40,7 +40,7 @@ done < <(jq -c '.[]' "${input_file}") # Print final stack duration if [[ ${current_stack_name} != "" ]]; then - echo "${current_stack_name}: total build duration = ${current_duration_sum} minutes" + { echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}" fi # Print overall total From 56ae512f8cc55e38d987600850e6e858e3467f15 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Fri, 25 Apr 2025 00:05:16 +0200 Subject: [PATCH 7/9] Print the durations to stdout at teh end of the script --- .../json_to_easystacks_split_by_duration.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh index 07aec0cbcb..42ae5f8b6a 100755 --- a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh +++ b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh @@ -43,5 +43,9 @@ if [[ ${current_stack_name} != "" ]]; then { echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}" fi +for file in *.yml; do + cat "$file" | head -n 1 +done + # Print overall total echo "Overall total build duration = ${total_duration_sum} minutes" From a3c0cdf3efd87ca41ea23de3153de15f3452323d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Fri, 25 Apr 2025 14:41:34 +0200 Subject: [PATCH 8/9] don't use easyconfig from `reprod` --- .../stack_to_json_parallel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh index 8415238a5c..813a4941af 100755 --- a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh @@ -37,7 +37,7 @@ run_limited fi easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py - easyconfig=${app_dir}/easybuild/reprod/${app_name}-${app_version}.eb + easyconfig=${app_dir}/easybuild/${app_name}-${app_version}.eb if [[ ! -f ${easyconfig} ]]; then echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" >&2 From c08a4728ec33d41c32c6e4d7b7361cae48e89677 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Fri, 25 Apr 2025 14:41:42 +0200 Subject: [PATCH 9/9] don't use easyconfig from `reprod` --- scripts/generate_easystacks_for_existing_stack/stack_to_json.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh index ffb9c18dde..89b5aa8c96 100755 --- a/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh @@ -24,7 +24,7 @@ for app_dir in $apps; do fi easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py - easyconfig=${app_dir}/easybuild/reprod/${app_name}-${app_version}.eb + easyconfig=${app_dir}/easybuild/${app_name}-${app_version}.eb if [[ ! -f ${easyconfig} ]]; then echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" fi