diff --git a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh new file mode 100755 index 0000000000..31a524e5a8 --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +input_file=$1 + +prev_eb_version="0.0.0" +#prev_toolchain="none" +easystack_num=0 + +while read app +do + eval $(echo $app | jq -r '. | to_entries | .[] | .key + "=" + (.value | @sh)') + + #if [[ ${prev_toolchain} != ${toolchain} ]] || [[ ${prev_eb_version} != ${easybuild} ]]; then + if [[ ${prev_eb_version} != ${easybuild} ]]; then + easystack_num=$(( easystack_num + 1)) + #prev_toolchain=${toolchain} + prev_eb_version=${easybuild} + fi + + #easystack="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}-${toolchain}.yml" + easystack="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}.yml" + if [ ! -f "${easystack}" ]; then + echo "easyconfigs:" > ${easystack} + fi + echo " - ${easyconfig}:" >> ${easystack} + echo " options:" >> ${easystack} + echo " include-easyblocks: ${easyblocks}" >> ${easystack} +done < <(jq -c '.[]' ${input_file}) diff --git a/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh new file mode 100755 index 0000000000..42ae5f8b6a --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/json_to_easystacks_split_by_duration.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +input_file=$1 +duration_threshold=${2:-180} # Default threshold to 180 minutes if not given + +prev_eb_version="0.0.0" +easystack_num=0 +current_duration_sum=0 +total_duration_sum=0 +current_stack_name="" + +while read app; do + # Extract JSON keys to shell variables + eval $(echo $app | jq -r '. | to_entries | .[] | .key + "=" + (.value | @sh)') + + # Check if we need to start a new easystack + if [[ ${prev_eb_version} != ${easybuild} ]] || (( current_duration_sum + build_duration_minutes > duration_threshold )); then + if [[ ${current_stack_name} != "" ]]; then + { echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}" + fi + easystack_num=$(( easystack_num + 1 )) + prev_eb_version=${easybuild} + current_duration_sum=0 + current_stack_name="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}.yml" + fi + + easystack="${current_stack_name}" + if [[ ! -f "${easystack}" ]]; then + echo "easyconfigs:" > "${easystack}" + fi + + echo " - ${easyconfig}:" >> "${easystack}" + echo " options:" >> "${easystack}" + echo " include-easyblocks: ${easyblocks}" >> "${easystack}" + + current_duration_sum=$(( current_duration_sum + build_duration_minutes )) + total_duration_sum=$(( total_duration_sum + build_duration_minutes )) + +done < <(jq -c '.[]' "${input_file}") + +# Print final stack duration +if [[ ${current_stack_name} != "" ]]; then + { echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}" +fi + +for file in *.yml; do + cat "$file" | head -n 1 +done + +# Print overall total +echo "Overall total build duration = ${total_duration_sum} minutes" diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh new file mode 100755 index 0000000000..89b5aa8c96 --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +DEBUG=0 +BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software +EB_BOOTSTRAP=4.9.4 + +declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" ) + +if [[ ! -d ${BASE_STACK} ]]; then + echo "The given base stack (${BASE_STACK}) is not a directory." + exit 1 +fi + +apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d) + +json_output="[" +for app_dir in $apps; do + app_version=$(basename ${app_dir}) + app_name=$(basename $(dirname ${app_dir})) + + if [[ ${app_name} == "EESSI-extend" ]]; then + # Skip EESSI-extend, as it will be installed automatically. + continue + fi + + easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py + easyconfig=${app_dir}/easybuild/${app_name}-${app_version}.eb + if [[ ! -f ${easyconfig} ]]; then + echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" + fi + # If rebuilds would not remove the original log file, we should take the build time from the first log. + # As we cannot guarantee that at the moment, we are cautious and use the last one. + log_file=$(ls -1 ${app_dir}/easybuild/easybuild-${app_name}*.log* | tail -n 1) + build_time_start=$(bzcat ${log_file} | head -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + build_time_end=$(bzcat ${log_file} | tail -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + #build_time_unix=$( date +%s -d ${build_time}) + build_duration=$(( ($(date +%s -d ${build_time_end}) - $(date +%s -d ${build_time_start}))/60 )) + + eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9].[0-9].[0-9])" ${log_file} | head -n 1) + # Some EB versions have been installed with a temporary EB installation of the same version. + # If that's the case, use the version specified with ${EB_BOOTSTRAP} instead. + # This needs to correspond to the version that gets installed initially by EESSI-install-software.sh, + # which should be the latest EB version available when that script is being run. + if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then + eb_version=${EB_BOOTSTRAP} + fi + + if [[ ${app_version} != *-* ]]; then + toolchain=SYSTEM + else + if [[ ${app_version} == *-GCC* ]]; then + gcc_ver=$(echo ${app_version} | grep -oP "(GCC|GCCcore)-\K.*?(?=-|$)") + toolchain=${gcc_to_foss[$gcc_ver]} + else + toolchain=$(echo ${app_version} | grep -oP "(foss|gfbf|gompi)-\K.*?(?=-|$)") + fi + fi + + json=$( + jq --null-input \ + --arg build_time "${build_time_start}" \ + --arg build_duration_minutes "${build_duration}" \ + --arg name "${app_name}" \ + --arg version "${app_version}" \ + --arg easybuild "${eb_version}" \ + --arg toolchain "${toolchain}" \ + --arg easyconfig "${easyconfig}" \ + --arg easyblocks "${easyblocks}" \ + '$ARGS.named' # requires jq 1.7 or newer + #'{build_time: $build_time, build_duration_minutes: $build_duration, name: $name, version: $version, easybuild: $easybuild, + # toolchain: $toolchain, easyconfig: $easyconfig, easyblocks: $easyblocks}' + ) + + if [[ ${json_output} == "[" ]]; then + json_output="${json_output}${json}" + else + json_output="${json_output},${json}" + fi + [[ ${DEBUG} -ne 0 ]] && echo ${build_time_unix} ${app_name} ${app_version} ${eb_version} ${toolchain} ${easyconfig} ${easyblocks} +done #| sort -nu +json_output="${json_output}]" + +[[ ${DEBUG} -ne 0 ]] && echo ${json_output} +echo ${json_output} | jq 'sort_by(.build_time)' diff --git a/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh new file mode 100755 index 0000000000..813a4941af --- /dev/null +++ b/scripts/generate_easystacks_for_existing_stack/stack_to_json_parallel.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# Usage: ./script.sh [MAX_JOBS] +MAX_JOBS=${1:-4} # Default to 4 concurrent jobs if not specified +DEBUG=0 +BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software +EB_BOOTSTRAP=5.0.0 +TMPDIR=$(mktemp -d) + +declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" ) + +if [[ ! -d ${BASE_STACK} ]]; then + echo "The given base stack (${BASE_STACK}) is not a directory." + exit 1 +fi + +apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d) + +# Job limiter +job_count=0 +run_limited() { + ((job_count++)) + if (( job_count >= MAX_JOBS )); then + wait -n # wait for one job to finish + ((job_count--)) + fi +} + +for app_dir in $apps; do +run_limited +( + app_version=$(basename "${app_dir}") + app_name=$(basename "$(dirname "${app_dir}")") + + if [[ ${app_name} == "EESSI-extend" ]]; then + exit 0 + fi + + easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py + easyconfig=${app_dir}/easybuild/${app_name}-${app_version}.eb + + if [[ ! -f ${easyconfig} ]]; then + echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" >&2 + exit 1 + fi + + log_file=$(ls -1 ${app_dir}/easybuild/easybuild-${app_name}*.log* 2>/dev/null | tail -n 1) + build_time_start=$(bzcat "${log_file}" | head -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + build_time_end=$(bzcat "${log_file}" | tail -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1) + build_duration=$(( ($(date +%s -d "${build_time_end}") - $(date +%s -d "${build_time_start}")) / 60 )) + + eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9]+\.[0-9]+\.[0-9]+)" "${log_file}" | head -n 1) + if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then + eb_version=${EB_BOOTSTRAP} + fi + + if [[ ${app_version} != *-* ]]; then + toolchain="SYSTEM" + else + if [[ ${app_version} == *-GCC* ]]; then + gcc_ver=$(echo ${app_version} | grep -oP "(GCC|GCCcore)-\K.*?(?=-|$)") + toolchain=${gcc_to_foss[$gcc_ver]} + else + toolchain=$(echo ${app_version} | grep -oP "(foss|gfbf|gompi)-\K.*?(?=-|$)") + fi + fi + + jq --null-input \ + --arg build_time "${build_time_start}" \ + --arg build_duration_minutes "${build_duration}" \ + --arg name "${app_name}" \ + --arg version "${app_version}" \ + --arg easybuild "${eb_version}" \ + --arg toolchain "${toolchain}" \ + --arg easyconfig "${easyconfig}" \ + --arg easyblocks "${easyblocks}" \ + '$ARGS.named' > "${TMPDIR}/${app_name}_${app_version}.json" + + [[ ${DEBUG} -ne 0 ]] && echo "Processed ${app_name}/${app_version}" >&2 +) & +done + +wait + +# Combine all results and sort by build time +jq -s 'sort_by(.build_time)' "${TMPDIR}"/*.json + +# Optional cleanup +rm -r "${TMPDIR}" +