Skip to content

Commit

Permalink
ThreadIdOffsetの最大値が複数NUMAノード持つCPUの時に正しく指定されていないのを修正した。
Browse files Browse the repository at this point in the history
  • Loading branch information
KazApps committed Sep 9, 2024
1 parent 2266fb6 commit db70c2e
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 46 deletions.
151 changes: 106 additions & 45 deletions source/misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,111 @@ const std::string config_info()
return config;
}

// -----------------
// CPU Threads
// -----------------

#if defined(_WIN32)

// NUMAノード数、NUMAノードを考慮したコア数、スレッド数を取得する。
bool get_cpu_info(int* nodes, int* cores, int* threads)
{
*nodes = *cores = *threads = 0;

DWORD returnLength = 0;
DWORD byteOffset = 0;

// Early exit if the needed API is not available at runtime
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx");
if (!fun1)
return false;

// First call to GetLogicalProcessorInformationEx() to get returnLength.
// We expect the call to fail due to null buffer.
if (fun1(RelationAll, nullptr, &returnLength))
return false;

// Once we know returnLength, allocate the buffer
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);

// Second call, now we expect to succeed
if (!fun1(RelationAll, buffer, &returnLength))
{
free(buffer);
return false;
}

while (byteOffset < returnLength)
{
// NUMA NODEの数
if (ptr->Relationship == RelationNumaNode)
(*nodes)++;

else if (ptr->Relationship == RelationProcessorCore)
{
// 物理コアの数
(*cores)++;

// 論理コア数の加算。HT対応なら2を足す。HT非対応なら1を足す。
*threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
}

ASSERT_LV3(ptr->Size);
byteOffset += ptr->Size;
ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
}

free(buffer);

return true;
}

int total_thread_count()
{
int n, c, t;

if (get_cpu_info(&n, &c, &t))
return t;

return std::thread::hardware_concurrency();
}

#elif defined(__linux__)

int total_thread_count() {
int threads = 0;

// /proc/cpuinfo を開く
std::ifstream cpuinfo("/proc/cpuinfo");

if (!cpuinfo) {
std::cerr << "Failed to open /proc/cpuinfo" << std::endl;
return std::thread::hardware_concurrency();
}

std::string line;

// 各行を読み込みながらコア数をカウント
while (std::getline(cpuinfo, line)) {
if (line.find("processor") == 0) {
threads++;
}
}

return threads;
}

#else

int total_thread_count()
{
return std::thread::hardware_concurrency();
}

#endif

// --------------------
// 統計情報
// --------------------
Expand Down Expand Up @@ -861,53 +966,9 @@ namespace WinProcGroup {
// 物理コア数
int cores = 0;

DWORD returnLength = 0;
DWORD byteOffset = 0;

// Early exit if the needed API is not available at runtime
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx");
if (!fun1)
if (!get_cpu_info(&nodes, &cores, &threads))
return -1;

// First call to GetLogicalProcessorInformationEx() to get returnLength.
// We expect the call to fail due to null buffer.
if (fun1(RelationAll, nullptr, &returnLength))
return -1;

// Once we know returnLength, allocate the buffer
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);

// Second call, now we expect to succeed
if (!fun1(RelationAll, buffer, &returnLength))
{
free(buffer);
return -1;
}

while (byteOffset < returnLength)
{
// NUMA NODEの数
if (ptr->Relationship == RelationNumaNode)
nodes++;

else if (ptr->Relationship == RelationProcessorCore)
{
// 物理コアの数
cores++;

// 論理コア数の加算。HT対応なら2を足す。HT非対応なら1を足す。
threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
}

ASSERT_LV3(ptr->Size);
byteOffset += ptr->Size;
ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
}

free(buffer);

std::vector<int> groups;

// Run as many threads as possible on the same node until core limit is
Expand Down
7 changes: 7 additions & 0 deletions source/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ void prefetch(const void* addr);
// cin/coutへの入出力をファイルにリダイレクトを開始/終了する。
void start_logger(const std::string& fname);

// -----------------
// CPU Threads
// -----------------

// NUMAノード環境にも対応したCPUスレッド数の取得。
int total_thread_count();

// --------------------
// Large Page確保
// --------------------
Expand Down
2 changes: 1 addition & 1 deletion source/usi_option.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ namespace USI {
// (プロセッサグループは64論理コアごとに1つ作られる。上のケースでは、ThreadIdOffset = 0,0,64,64でも同じ意味。)
// ※ 1つのPCで複数の思考エンジンを同時に起動して対局させる場合はこれを適切に設定すべき。

o["ThreadIdOffset"] << Option(0, 0, std::thread::hardware_concurrency() - 1);
o["ThreadIdOffset"] << Option(0, 0, total_thread_count() - 1);
#endif

#if defined(_WIN64)
Expand Down

0 comments on commit db70c2e

Please sign in to comment.