Skip to content

[WIP] Scripts for generating easystacks based on an existing stack #1035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

input_file=$1

prev_eb_version="0.0.0"
#prev_toolchain="none"
easystack_num=0

while read app
do
eval $(echo $app | jq -r '. | to_entries | .[] | .key + "=" + (.value | @sh)')

#if [[ ${prev_toolchain} != ${toolchain} ]] || [[ ${prev_eb_version} != ${easybuild} ]]; then
if [[ ${prev_eb_version} != ${easybuild} ]]; then
easystack_num=$(( easystack_num + 1))
#prev_toolchain=${toolchain}
prev_eb_version=${easybuild}
fi

#easystack="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}-${toolchain}.yml"
easystack="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}.yml"
if [ ! -f "${easystack}" ]; then
echo "easyconfigs:" > ${easystack}
fi
echo " - ${easyconfig}:" >> ${easystack}
echo " options:" >> ${easystack}
echo " include-easyblocks: ${easyblocks}" >> ${easystack}
done < <(jq -c '.[]' ${input_file})
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

input_file=$1
duration_threshold=${2:-180} # Default threshold to 180 minutes if not given

prev_eb_version="0.0.0"
easystack_num=0
current_duration_sum=0
total_duration_sum=0
current_stack_name=""

while read app; do
# Extract JSON keys to shell variables
eval $(echo $app | jq -r '. | to_entries | .[] | .key + "=" + (.value | @sh)')

# Check if we need to start a new easystack
if [[ ${prev_eb_version} != ${easybuild} ]] || (( current_duration_sum + build_duration_minutes > duration_threshold )); then
if [[ ${current_stack_name} != "" ]]; then
{ echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}"
fi
easystack_num=$(( easystack_num + 1 ))
prev_eb_version=${easybuild}
current_duration_sum=0
current_stack_name="$(printf '%03d\n' ${easystack_num})-eb-${easybuild}.yml"
fi

easystack="${current_stack_name}"
if [[ ! -f "${easystack}" ]]; then
echo "easyconfigs:" > "${easystack}"
fi

echo " - ${easyconfig}:" >> "${easystack}"
echo " options:" >> "${easystack}"
echo " include-easyblocks: ${easyblocks}" >> "${easystack}"

current_duration_sum=$(( current_duration_sum + build_duration_minutes ))
total_duration_sum=$(( total_duration_sum + build_duration_minutes ))

done < <(jq -c '.[]' "${input_file}")

# Print final stack duration
if [[ ${current_stack_name} != "" ]]; then
{ echo "# ${current_stack_name}: total build duration = ${current_duration_sum} minutes"; cat "${easystack}"; } > temp && mv temp "${easystack}"
fi

for file in *.yml; do
cat "$file" | head -n 1
done

# Print overall total
echo "Overall total build duration = ${total_duration_sum} minutes"
84 changes: 84 additions & 0 deletions scripts/generate_easystacks_for_existing_stack/stack_to_json.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

DEBUG=0
BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software
EB_BOOTSTRAP=4.9.4

declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" )

if [[ ! -d ${BASE_STACK} ]]; then
echo "The given base stack (${BASE_STACK}) is not a directory."
exit 1
fi

apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d)

json_output="["
for app_dir in $apps; do
app_version=$(basename ${app_dir})
app_name=$(basename $(dirname ${app_dir}))

if [[ ${app_name} == "EESSI-extend" ]]; then
# Skip EESSI-extend, as it will be installed automatically.
continue
fi

easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py
easyconfig=${app_dir}/easybuild/${app_name}-${app_version}.eb
if [[ ! -f ${easyconfig} ]]; then
echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}"
fi
# If rebuilds would not remove the original log file, we should take the build time from the first log.
# As we cannot guarantee that at the moment, we are cautious and use the last one.
log_file=$(ls -1 ${app_dir}/easybuild/easybuild-${app_name}*.log* | tail -n 1)
build_time_start=$(bzcat ${log_file} | head -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1)
build_time_end=$(bzcat ${log_file} | tail -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1)
#build_time_unix=$( date +%s -d ${build_time})
build_duration=$(( ($(date +%s -d ${build_time_end}) - $(date +%s -d ${build_time_start}))/60 ))

eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9].[0-9].[0-9])" ${log_file} | head -n 1)
# Some EB versions have been installed with a temporary EB installation of the same version.
# If that's the case, use the version specified with ${EB_BOOTSTRAP} instead.
# This needs to correspond to the version that gets installed initially by EESSI-install-software.sh,
# which should be the latest EB version available when that script is being run.
if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then
eb_version=${EB_BOOTSTRAP}
fi

if [[ ${app_version} != *-* ]]; then
toolchain=SYSTEM
else
if [[ ${app_version} == *-GCC* ]]; then
gcc_ver=$(echo ${app_version} | grep -oP "(GCC|GCCcore)-\K.*?(?=-|$)")
toolchain=${gcc_to_foss[$gcc_ver]}
else
toolchain=$(echo ${app_version} | grep -oP "(foss|gfbf|gompi)-\K.*?(?=-|$)")
fi
fi

json=$(
jq --null-input \
--arg build_time "${build_time_start}" \
--arg build_duration_minutes "${build_duration}" \
--arg name "${app_name}" \
--arg version "${app_version}" \
--arg easybuild "${eb_version}" \
--arg toolchain "${toolchain}" \
--arg easyconfig "${easyconfig}" \
--arg easyblocks "${easyblocks}" \
'$ARGS.named' # requires jq 1.7 or newer
#'{build_time: $build_time, build_duration_minutes: $build_duration, name: $name, version: $version, easybuild: $easybuild,
# toolchain: $toolchain, easyconfig: $easyconfig, easyblocks: $easyblocks}'
)

if [[ ${json_output} == "[" ]]; then
json_output="${json_output}${json}"
else
json_output="${json_output},${json}"
fi
[[ ${DEBUG} -ne 0 ]] && echo ${build_time_unix} ${app_name} ${app_version} ${eb_version} ${toolchain} ${easyconfig} ${easyblocks}
done #| sort -nu
json_output="${json_output}]"

[[ ${DEBUG} -ne 0 ]] && echo ${json_output}
echo ${json_output} | jq 'sort_by(.build_time)'
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash

# Usage: ./script.sh [MAX_JOBS]
MAX_JOBS=${1:-4} # Default to 4 concurrent jobs if not specified
DEBUG=0
BASE_STACK=/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/intel/haswell/software
EB_BOOTSTRAP=5.0.0
TMPDIR=$(mktemp -d)

declare -A gcc_to_foss=( ["12.2.0"]="2022b" ["12.3.0"]="2023a" ["13.2.0"]="2023b" )

if [[ ! -d ${BASE_STACK} ]]; then
echo "The given base stack (${BASE_STACK}) is not a directory."
exit 1
fi

apps=$(find ${BASE_STACK} -mindepth 2 -maxdepth 2 -type d)

# Job limiter
job_count=0
run_limited() {
((job_count++))
if (( job_count >= MAX_JOBS )); then
wait -n # wait for one job to finish
((job_count--))
fi
}

for app_dir in $apps; do
run_limited
(
app_version=$(basename "${app_dir}")
app_name=$(basename "$(dirname "${app_dir}")")

if [[ ${app_name} == "EESSI-extend" ]]; then
exit 0
fi

easyblocks=${app_dir}/easybuild/reprod/easyblocks/*.py
easyconfig=${app_dir}/easybuild/${app_name}-${app_version}.eb

if [[ ! -f ${easyconfig} ]]; then
echo "ERROR: cannot find easyconfig for ${app_name}/${app_version}" >&2
exit 1
fi

log_file=$(ls -1 ${app_dir}/easybuild/easybuild-${app_name}*.log* 2>/dev/null | tail -n 1)
build_time_start=$(bzcat "${log_file}" | head -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1)
build_time_end=$(bzcat "${log_file}" | tail -n 1 | awk '{print $2 "T" $3}' | cut -d, -f1)
build_duration=$(( ($(date +%s -d "${build_time_end}") - $(date +%s -d "${build_time_start}")) / 60 ))

eb_version=$(bzgrep -oP "This is EasyBuild \K([0-9]+\.[0-9]+\.[0-9]+)" "${log_file}" | head -n 1)
if [[ ${app_name} == "EasyBuild" ]] && [[ ${app_version} == ${eb_version} ]]; then
eb_version=${EB_BOOTSTRAP}
fi

if [[ ${app_version} != *-* ]]; then
toolchain="SYSTEM"
else
if [[ ${app_version} == *-GCC* ]]; then
gcc_ver=$(echo ${app_version} | grep -oP "(GCC|GCCcore)-\K.*?(?=-|$)")
toolchain=${gcc_to_foss[$gcc_ver]}
else
toolchain=$(echo ${app_version} | grep -oP "(foss|gfbf|gompi)-\K.*?(?=-|$)")
fi
fi

jq --null-input \
--arg build_time "${build_time_start}" \
--arg build_duration_minutes "${build_duration}" \
--arg name "${app_name}" \
--arg version "${app_version}" \
--arg easybuild "${eb_version}" \
--arg toolchain "${toolchain}" \
--arg easyconfig "${easyconfig}" \
--arg easyblocks "${easyblocks}" \
'$ARGS.named' > "${TMPDIR}/${app_name}_${app_version}.json"

[[ ${DEBUG} -ne 0 ]] && echo "Processed ${app_name}/${app_version}" >&2
) &
done

wait

# Combine all results and sort by build time
jq -s 'sort_by(.build_time)' "${TMPDIR}"/*.json

# Optional cleanup
rm -r "${TMPDIR}"