Skip to content

Commit f4936e0

Browse files
committed
remove hwloc, fix benchmark bugs
1 parent 733e0b2 commit f4936e0

19 files changed

+303
-338
lines changed

CMakeLists.txt

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,18 @@ find_package(MPI COMPONENTS C Fortran REQUIRED)
3636

3737
include(cmake/gcd.cmake) # math functions
3838

39-
# --- external libraries: HDF5, HWLOC
39+
# --- external libraries: HDF5
4040

4141
include(cmake/check_mpi.cmake)
4242
check_mpi_version()
4343

44-
find_package(HDF5 1.10.2 COMPONENTS Fortran HL parallel REQUIRED)
44+
find_package(HDF5 COMPONENTS Fortran HL parallel REQUIRED)
45+
if(HDF5_VERSION VERSION_LESS 1.10.2)
46+
message(WARNING "HDF5 >= 1.10.2 is needed for HDF5-MPI")
47+
endif()
4548

4649
if(ENABLE_BENCHMARKS)
47-
find_package(HWLOC 2.4 REQUIRED)
48-
find_package(HWLOCfortran CONFIG REQUIRED)
49-
5050
add_subdirectory(benchmark)
51-
add_subdirectory(benchmark/test)
5251
endif()
5352

5453
# --- HDF5-MPI object oriented API
@@ -62,7 +61,6 @@ endif()
6261
# additional Find*.cmake necessary
6362
install(FILES
6463
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindHDF5.cmake
65-
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/check_hdf5_mpi.f90
6664
DESTINATION cmake
6765
)
6866

benchmark/CMakeLists.txt

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
cmake_minimum_required(VERSION 3.20)
1+
cmake_minimum_required(VERSION 3.20...3.24)
22

33
project(h5mpi-benchmark
4-
LANGUAGES C Fortran
4+
LANGUAGES C CXX Fortran
55
)
66

7+
set_directory_properties(PROPERTIES LABELS benchmark)
8+
79
cmake_path(SET CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../cmake/Modules)
810

911
find_package(MPI COMPONENTS Fortran REQUIRED)
@@ -14,8 +16,6 @@ if(NOT EXISTS ${PROJECT_BINARY_DIR}/.gitignore)
1416
file(WRITE ${PROJECT_BINARY_DIR}/.gitignore "*")
1517
endif()
1618

17-
add_subdirectory(test)
18-
1919
set(runner_os \"${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}\")
2020
configure_file(perf.in.f90 perf.f90 @ONLY)
2121
add_library(perf OBJECT
@@ -29,13 +29,21 @@ add_library(partition OBJECT partition.f90)
2929
3030
add_library(cli OBJECT cli.f90)
3131
32-
add_executable(runner frontend.f90)
33-
target_link_libraries(runner PRIVATE partition cli HWLOCfortran::hwloc_ifc)
34-
set_target_properties(runner PROPERTIES
35-
RUNTIME_OUTPUT_DIRECTORY $<TARGET_FILE_DIR:slab_mpi_write>
36-
)
32+
add_executable(runner frontend.f90 cpu.cpp)
33+
target_link_libraries(runner PRIVATE partition cli)
34+
# set_target_properties(runner PROPERTIES
35+
# RUNTIME_OUTPUT_DIRECTORY $<TARGET_FILE_DIR:slab_mpi_write>
36+
# )
37+
if(CMAKE_Fortran_COMPILER_ID MATCHES "^Intel")
38+
set_target_properties(runner PROPERTIES LINKER_LANGUAGE Fortran)
39+
else()
40+
set_target_properties(runner PROPERTIES LINKER_LANGUAGE CXX)
41+
endif()
3742
3843
foreach(t cli kernel perf)
3944
set_target_properties(${t} PROPERTIES Fortran_MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include)
4045
target_include_directories(${t} INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/include)
4146
endforeach()
47+
48+
49+
add_subdirectory(test)

benchmark/cpu.cpp

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
// adapted from Kitware kwsys, with BSD 3-Clause license
2+
// https://gitlab.kitware.com/utils/kwsys/-/blob/master/SystemInformation.cxx
3+
4+
// Tested with:
5+
// Windows (g++, clang++, icx, cl)
6+
// MacOS (g++, clang++, icpc)
7+
// Linux (g++, clang++, icpx)
8+
9+
#include <vector>
10+
#include <cassert>
11+
#include <bitset>
12+
#include <limits>
13+
#include <string>
14+
#include <cstring>
15+
#include <set>
16+
#include <thread>
17+
18+
#ifdef _WIN32
19+
#include <windows.h>
20+
#elif defined (__APPLE__)
21+
#include <sys/sysctl.h>
22+
#else
23+
#include <unistd.h>
24+
#endif
25+
26+
extern "C" unsigned int cpu_count();
27+
28+
std::string ExtractValueFromCpuInfoFile(std::string buffer, const char* word,
29+
size_t& CurrentPositionInFile, size_t init = 0);
30+
31+
32+
unsigned int cpu_count(){
33+
34+
unsigned int NumberOfPhysicalCPU = 0;
35+
unsigned int NumberOfLogicalCPU = 0;
36+
37+
#ifdef _WIN32
38+
39+
typedef BOOL(WINAPI * GetLogicalProcessorInformationType)(
40+
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
41+
static GetLogicalProcessorInformationType pGetLogicalProcessorInformation =
42+
(GetLogicalProcessorInformationType)GetProcAddress(
43+
GetModuleHandleW(L"kernel32"), "GetLogicalProcessorInformation");
44+
45+
if (!pGetLogicalProcessorInformation) {
46+
return 0;
47+
}
48+
49+
std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> ProcInfo;
50+
{
51+
DWORD Length = 0;
52+
DWORD rc = pGetLogicalProcessorInformation(nullptr, &Length);
53+
assert(rc == 0);
54+
assert(GetLastError() == ERROR_INSUFFICIENT_BUFFER);
55+
ProcInfo.resize(Length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION));
56+
rc = pGetLogicalProcessorInformation(&ProcInfo[0], &Length);
57+
assert(rc != 0);
58+
}
59+
60+
typedef std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION>::iterator
61+
pinfoIt_t;
62+
for (pinfoIt_t it = ProcInfo.begin(); it != ProcInfo.end(); ++it) {
63+
SYSTEM_LOGICAL_PROCESSOR_INFORMATION PInfo = *it;
64+
if (PInfo.Relationship != RelationProcessorCore) {
65+
continue;
66+
}
67+
68+
std::bitset<std::numeric_limits<ULONG_PTR>::digits> ProcMask(
69+
(unsigned long long)PInfo.ProcessorMask);
70+
unsigned int count = (unsigned int)ProcMask.count();
71+
if (count == 0) { // I think this should never happen, but just to be safe.
72+
continue;
73+
}
74+
NumberOfPhysicalCPU++;
75+
NumberOfLogicalCPU += (unsigned int)count;
76+
}
77+
78+
#elif defined(__linux)
79+
80+
std::string buffer;
81+
82+
FILE* fd = fopen("/proc/cpuinfo", "r");
83+
if (!fd) return 0;
84+
85+
size_t fileSize = 0;
86+
while (!feof(fd)) {
87+
buffer += static_cast<char>(fgetc(fd));
88+
fileSize++;
89+
}
90+
fclose(fd);
91+
buffer.resize(fileSize - 2);
92+
// Number of logical CPUs (combination of multiple processors, multi-core
93+
// and SMT)
94+
size_t pos = buffer.find("processor\t");
95+
while (pos != std::string::npos) {
96+
NumberOfLogicalCPU++;
97+
pos = buffer.find("processor\t", pos + 1);
98+
}
99+
100+
// Count sockets.
101+
size_t CurrentPositionInFile;
102+
std::set<int> PhysicalIDs;
103+
std::string idc = ExtractValueFromCpuInfoFile(buffer, "physical id", CurrentPositionInFile);
104+
while (CurrentPositionInFile != std::string::npos) {
105+
int id = atoi(idc.c_str());
106+
PhysicalIDs.insert(id);
107+
idc = ExtractValueFromCpuInfoFile(buffer, "physical id",
108+
CurrentPositionInFile, CurrentPositionInFile + 1);
109+
}
110+
111+
uint64_t NumberOfSockets = PhysicalIDs.size();
112+
NumberOfSockets = std::max(NumberOfSockets, (uint64_t)1);
113+
// Physical ids returned by Linux don't distinguish cores.
114+
// We want to record the total number of cores in this->NumberOfPhysicalCPU
115+
// (checking only the first proc)
116+
std::string Cores = ExtractValueFromCpuInfoFile(buffer, "cpu cores", CurrentPositionInFile);
117+
if (Cores.empty()) {
118+
// Linux Sparc is different
119+
Cores = ExtractValueFromCpuInfoFile(buffer, "ncpus probed", CurrentPositionInFile);
120+
}
121+
auto NumberOfCoresPerSocket = (unsigned int)atoi(Cores.c_str());
122+
NumberOfCoresPerSocket = std::max(NumberOfCoresPerSocket, 1u);
123+
NumberOfPhysicalCPU = NumberOfCoresPerSocket * (unsigned int)NumberOfSockets;
124+
125+
#elif defined(__APPLE__)
126+
127+
int N;
128+
size_t size = sizeof(N);
129+
130+
if (sysctlbyname("hw.physicalcpu", &N, &size, nullptr, 0) == 0)
131+
NumberOfPhysicalCPU = N;
132+
133+
#elif defined(_SC_NPROCESSORS_ONLN)
134+
135+
long N = sysconf(_SC_NPROCESSORS_ONLN);
136+
if (N > 0)
137+
NumberOfPhysicalCPU = static_cast<unsigned int>(N);
138+
139+
#else
140+
NumberOfPhysicalCPU = std::thread::hardware_concurrency();
141+
#endif
142+
143+
return NumberOfPhysicalCPU;
144+
145+
}
146+
147+
/** Extract a value from the CPUInfo file */
148+
std::string ExtractValueFromCpuInfoFile(std::string buffer, const char* word,
149+
size_t & CurrentPositionInFile, size_t init)
150+
{
151+
152+
size_t pos = buffer.find(word, init);
153+
if (pos != std::string::npos) {
154+
CurrentPositionInFile = pos;
155+
pos = buffer.find(':', pos);
156+
size_t pos2 = buffer.find('\n', pos);
157+
if (pos != std::string::npos && pos2 != std::string::npos) {
158+
// It may happen that the beginning matches, but this is still not the
159+
// requested key.
160+
// An example is looking for "cpu" when "cpu family" comes first. So we
161+
// check that
162+
// we have only spaces from here to pos, otherwise we search again.
163+
for (size_t i = CurrentPositionInFile + strlen(word); i < pos;
164+
++i) {
165+
if (buffer[i] != ' ' && buffer[i] != '\t') {
166+
return ExtractValueFromCpuInfoFile(buffer, word, CurrentPositionInFile, pos2);
167+
}
168+
}
169+
buffer.erase(0, pos + 2);
170+
buffer.resize(pos2 - pos - 2);
171+
return buffer;
172+
}
173+
}
174+
CurrentPositionInFile = std::string::npos;
175+
return "";
176+
}

benchmark/frontend.f90

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
program frontend
22

33
use, intrinsic :: iso_fortran_env, only: compiler_version
4-
use hwloc_ifc, only : get_cpu_count
4+
use, intrinsic :: iso_c_binding, only : C_INT
55
use partition, only : max_gcd
66
use cli, only : get_cli, get_simsize
77

88
implicit none (type, external)
99

10+
interface
11+
integer(c_int) function cpu_count() bind(c, name="cpu_count")
12+
import c_int
13+
end function cpu_count
14+
end interface
15+
16+
1017
integer :: lid, lx1, lx2, lx3, Ncpu, ierr, Nrun, i, comp_lvl
1118
character(2000) :: buf, exe, mpiexec, outfn, refh5fn
1219
character(:), allocatable :: cmd, args
@@ -68,7 +75,7 @@ program frontend
6875
inquire(file=exe, exist=exists)
6976
if(.not. exists) error stop trim(exe) // " is not a file."
7077

71-
if(Ncpu < 1) Ncpu = get_cpu_count()
78+
if(Ncpu < 1) Ncpu = cpu_count()
7279
lid = max_gcd(lx2, Ncpu)
7380

7481
print '(A,I0)', 'MPI images: ', lid

benchmark/test/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,16 +177,16 @@ set(pyargs ${PROJECT_SOURCE_DIR}/scripts/bench_slab.py --datadir ${CMAKE_CURRENT
177177
add_test(NAME PythonRunner COMMAND Python::Interpreter ${pyargs})
178178
set_tests_properties(PythonRunner PROPERTIES
179179
FIXTURES_SETUP PyRun_fxt
180-
LABELS python
181-
TIMEOUT 60
182-
DISABLED ${PySkip}
183180
RUN_SERIAL true
184181
)
185182

186183
add_test(NAME Plotter COMMAND Python::Interpreter ${PROJECT_SOURCE_DIR}/scripts/bench_plot.py --datadir ${CMAKE_CURRENT_BINARY_DIR} -lx ${lx3})
187184
set_tests_properties(Plotter PROPERTIES
188185
FIXTURES_REQUIRED PyRun_fxt
186+
)
187+
188+
set_tests_properties(PythonRunner Plotter PROPERTIES
189189
LABELS python
190-
TIMEOUT 30
190+
TIMEOUT 60
191191
DISABLED ${PySkip}
192192
)

benchmark/test/slab_mpi_read.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,10 @@ program read_slab_mpi
148148
!> RESULTS
149149

150150
if(mpi_id == mpi_root_id) then
151+
call h5%open(trim(refh5fn), action="r", mpi=.false.)
151152
call print_timing(Nmpi, h5%comp_lvl, storage_size(A3), int(dims_full), t_elapsed, h5%filesize(), debug, &
152153
trim(h5fn) // ".read_stat.h5")
154+
call h5%close()
153155
endif
154156

155157
if (debug) print '(a,i0)', "mpi finalize: worker: ", mpi_id

benchmark/test/slab_mpi_serial_read.f90

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,17 +147,12 @@ program read_slab_mpi_root
147147
allocate(t3(lx1, lx2, lx3))
148148
call h5%open(trim(refh5fn), action="r", mpi=.false.)
149149
call h5%read("/A3", t3)
150-
call h5%close()
151150

152151
if (any(abs(t3 - A3) > 0.01)) error stop "3D ref mismatch " // trim(refh5fn) // " /= " // trim(h5fn)
153-
endif
154-
155152

156-
!> RESULTS
157-
158-
if(mpi_id == mpi_root_id) then
159153
call print_timing(Nmpi, h5%comp_lvl, storage_size(A3), int(dims_full), t_elapsed, h5%filesize(), debug, &
160154
trim(h5fn) // ".read_stat.h5")
155+
call h5%close()
161156
endif
162157

163158
if (debug) print '(a,i0)', "mpi finalize: worker: ", mpi_id

benchmark/test/slab_mpi_serial_write.f90

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,17 @@ program write_slab_mpi_root
177177

178178
call h5%open(trim(h5fn), action="r", mpi=.false.)
179179
call h5%read("/A3", ts3)
180-
call h5%close()
181180

182181
if (any(abs(ts3 - S3) > 0.01)) then
183182
write(stderr,'(a,i0,1x,i0)') "ERROR: 3D disk vs. memory mismatch."
184183
write(stderr,'(a,100f5.1)') "disk: ", ts3
185184
write(stderr,'(a,100f5.1)') "memory: ", S3
186185
error stop trim(h5fn)
187186
endif
188-
endif
189-
190-
!> RESULTS
191187

192-
if(mpi_id == mpi_root_id) then
193188
call print_timing(Nmpi, h5%comp_lvl, storage_size(S3), int([lx1, lx2, lx3]), t_elapsed, h5%filesize(), debug, &
194189
trim(h5fn) // ".write_stat.h5")
190+
call h5%close()
195191
endif
196192

197193
if (debug) print '(a,i0)', "mpi finalize: worker: ", mpi_id

benchmark/test/slab_mpi_write.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,10 @@ program write_slab_mpi
185185
!> RESULTS
186186

187187
if(mpi_id == mpi_root_id) then
188+
call h5%open(trim(h5fn), action="r", mpi=.false.)
188189
call print_timing(Nmpi, h5%comp_lvl, storage_size(S3), [lx1, lx2, lx3], t_elapsed, h5%filesize(), debug, &
189190
trim(h5fn) // ".write_stat.h5")
191+
call h5%close()
190192
endif
191193

192194
if (debug) print '(a,i0)', "mpi finalize: worker: ", mpi_id

benchmark/test/slab_serial_read.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@ program read_slab_serial
9393

9494
!> RESULTS
9595

96+
call h5%open(trim(h5fn), action="r", mpi=.false.)
9697
call print_timing(1, h5%comp_lvl, real_bits, int(dims_full), t_elapsed, h5%filesize(), debug, &
9798
trim(h5fn) // ".read_stat.h5")
99+
call h5%close()
98100

99101
end program

0 commit comments

Comments
 (0)