From f0c2cc42822ef0a2f62d54d392c3559b112fbecb Mon Sep 17 00:00:00 2001 From: Peter Veentjer Date: Thu, 19 Jun 2025 15:24:00 +0300 Subject: [PATCH 1/2] Minor modifications await_process_start & pin_thread. The echo's have been restored and the check at the end isn't count based, based tid/pid not empty based. --- scripts/remote-benchmarks-runner | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/remote-benchmarks-runner b/scripts/remote-benchmarks-runner index ff24fb92..2c547b21 100755 --- a/scripts/remote-benchmarks-runner +++ b/scripts/remote-benchmarks-runner @@ -170,16 +170,17 @@ function await_process_start() # note: this multiline string is converted to single line string at the end of this function local script=" - pid=${pid_cmd}; + echo 'Await process start'; count=0; + pid=${pid_cmd}; while [ -z \"\${pid}\" ] && [ \${count} -lt 120 ]; do sleep 0.5; pid=${pid_cmd}; count=\$((count + 1)); done; - if [ \${count} -ge 120 ]; then - echo \"Timeout waiting for process to start\" >&2; - exit 1; + if [ -z \"\${pid}\" ]; then + echo 'Timeout: process not found after 60 seconds.'; + exit 1; fi; echo \"pid='\${pid}'\" " @@ -207,15 +208,16 @@ function pin_thread() # note: this multiline string is converted to single line string at the end of this function local script=" - tid=${tid_cmd}; + echo 'Pinning thread: ${thread_name}'; count=0; + tid=${tid_cmd}; while [ -z \"\${tid}\" ] && [ \${count} -lt 600 ]; do sleep 0.1; tid=${tid_cmd}; count=\$((count + 1)); done; - if [ \${count} -ge 600 ]; then - echo \"Timeout waiting for thread ${thread_name} to start\" >&2; + if [ -z \"\${tid}\" ]; then + echo 'Timeout: thread ${thread_name} not found after 60 seconds.'; exit 1; fi; echo \"tid_${thread_name}='\${tid}'\"; From 5883798b6dbe6d3a645263ed613a795c2c5b0f58 Mon Sep 17 00:00:00 2001 From: Peter Veentjer Date: Fri, 20 Jun 2025 10:22:22 +0300 Subject: [PATCH 2/2] Removed code for server_driver_async_executor_cpu_core_var This variable isn't filled by an environment variable because the async core pinnig has been removed. Currently I'm running into a: Starting Cluster node 0... remote-cluster-benchmarks: line 566: server_driver_async_executor_cpu_core_var: invalid indirect expansion Lock released. --- scripts/aeron/remote-cluster-benchmarks | 2 -- scripts/aeron/remote-echo-mdc-benchmarks | 1 - 2 files changed, 3 deletions(-) diff --git a/scripts/aeron/remote-cluster-benchmarks b/scripts/aeron/remote-cluster-benchmarks index a2ad1cf3..398c91aa 100755 --- a/scripts/aeron/remote-cluster-benchmarks +++ b/scripts/aeron/remote-cluster-benchmarks @@ -563,7 +563,6 @@ do server_driver="${server_driver//driver_conductor_cpu_core_var/${!server_driver_conductor_cpu_core}}" server_driver="${server_driver//driver_sender_cpu_core_var/${!server_driver_sender_cpu_core}}" server_driver="${server_driver//driver_receiver_cpu_core_var/${!server_driver_receiver_cpu_core}}" - server_driver="${server_driver//driver_async_executor_cpu_core_var/${!server_driver_async_executor_cpu_core_var}}" server_driver="${server_driver//driver_aeron_dpdk_gateway_ipv4_address_var/${!server_driver_aeron_dpdk_gateway_ipv4_address}}" server_driver="${server_driver//driver_aeron_dpdk_local_ipv4_address_var/${!server_driver_aeron_dpdk_local_ipv4_address}}" execute_remote_command "${!ssh_user}" "${!ssh_key_file}" "${!ssh_node}" "($(start_cluster_node "${n}" "${cluster_node_class_name}" "${server_driver}" "${fsync}" "${cluster_service}" "${!server_benchmarks_path}/${output_dir}") &) > /tmp/benchmarks-cluster-node-${n}.log 2>&1 && exit" @@ -588,7 +587,6 @@ do server_driver="${server_driver//driver_conductor_cpu_core_var/${!server_driver_conductor_cpu_core}}" server_driver="${server_driver//driver_sender_cpu_core_var/${!server_driver_sender_cpu_core}}" server_driver="${server_driver//driver_receiver_cpu_core_var/${!server_driver_receiver_cpu_core}}" - server_driver="${server_driver//driver_async_executor_cpu_core_var/${!server_driver_async_executor_cpu_core_var}}" server_driver="${server_driver//driver_aeron_dpdk_gateway_ipv4_address_var/${!server_driver_aeron_dpdk_gateway_ipv4_address}}" server_driver="${server_driver//driver_aeron_dpdk_local_ipv4_address_var/${!server_driver_aeron_dpdk_local_ipv4_address}}" execute_remote_command "${!ssh_user}" "${!ssh_key_file}" "${!ssh_node}" "($(start_cluster_backup_node "${n}" "${cluster_backup_node_class_name}" "${server_driver}" "${!server_benchmarks_path}/${output_dir}") &) > /tmp/benchmarks-cluster-backup-node-${n}.log 2>&1 && exit" diff --git a/scripts/aeron/remote-echo-mdc-benchmarks b/scripts/aeron/remote-echo-mdc-benchmarks index e777fb0d..4ed29d02 100755 --- a/scripts/aeron/remote-echo-mdc-benchmarks +++ b/scripts/aeron/remote-echo-mdc-benchmarks @@ -425,7 +425,6 @@ do server_driver="${server_driver//driver_conductor_cpu_core_var/${!server_driver_conductor_cpu_core}}" server_driver="${server_driver//driver_sender_cpu_core_var/${!server_driver_sender_cpu_core}}" server_driver="${server_driver//driver_receiver_cpu_core_var/${!server_driver_receiver_cpu_core}}" - server_driver="${server_driver//driver_async_executor_cpu_core_var/${!server_driver_async_executor_cpu_core_var}}" server_driver="${server_driver//driver_aeron_dpdk_gateway_ipv4_address_var/${!server_driver_aeron_dpdk_gateway_ipv4_address}}" server_driver="${server_driver//driver_aeron_dpdk_local_ipv4_address_var/${!server_driver_aeron_dpdk_local_ipv4_address}}" execute_remote_command "${!ssh_user}" "${!ssh_key_file}" "${!ssh_node}" "($(start_node "${n}" "${node_class_name}" "${server_driver}" "${!server_benchmarks_path}/${output_dir}") &) > /tmp/benchmarks-node-${n}.log 2>&1 && exit"