Skip to content

Commit 0eda789

Browse files
Septa2112Nexesenex
authored andcommitted
common : add support for cpu_get_num_physical_cores() on Windows (ggml-org#8771)
* Add support for cpu_get_num_phsical_cores() on Windows * fix build bug on msys2-clang64 and ucrt64 * avoid adding new function * add new macros to avoid windows+mingw64 * Add error checking to return default value
1 parent 1193e08 commit 0eda789

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

common/common.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,34 @@ int32_t cpu_get_num_physical_cores() {
110110
if (result == 0) {
111111
return num_physical_cores;
112112
}
113-
#elif defined(_WIN32)
114-
//TODO: Implement
113+
#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
114+
// TODO: windows + arm64 + mingw64
115+
unsigned int n_threads_win = std::thread::hardware_concurrency();
116+
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
117+
118+
DWORD buffer_size = 0;
119+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
120+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
121+
return default_threads;
122+
}
123+
}
124+
125+
std::vector<char> buffer(buffer_size);
126+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
127+
return default_threads;
128+
}
129+
130+
int32_t num_physical_cores = 0;
131+
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
132+
while (buffer_size > 0) {
133+
if (info->Relationship == RelationProcessorCore) {
134+
num_physical_cores += info->Processor.GroupCount;
135+
}
136+
buffer_size -= info->Size;
137+
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
138+
}
139+
140+
return num_physical_cores > 0 ? num_physical_cores : default_threads;
115141
#endif
116142
unsigned int n_threads = std::thread::hardware_concurrency();
117143
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
@@ -1740,7 +1766,13 @@ std::string gpt_params_get_system_info(const gpt_params & params) {
17401766
if (params.n_threads_batch != -1) {
17411767
os << " (n_threads_batch = " << params.n_threads_batch << ")";
17421768
}
1769+
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
1770+
// TODO: windows + arm64 + mingw64
1771+
DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
1772+
os << " / " << logicalProcessorCount << " | " << llama_print_system_info();
1773+
#else
17431774
os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
1775+
#endif
17441776

17451777
return os.str();
17461778
}

0 commit comments

Comments
 (0)