Skip to content

Commit

Permalink
Upstream cpuinfo updates in XNNPACK as of XNNPACK:d793f6c2ec145be3ddb…
Browse files Browse the repository at this point in the history
…ffea951e6e5480f4646b8.
  • Loading branch information
AshkanAliabadi authored and dreiss committed May 11, 2020
1 parent 2b14e44 commit c209221
Show file tree
Hide file tree
Showing 23 changed files with 423 additions and 69 deletions.
16 changes: 13 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
"Target operating system is not specified. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$")
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
MESSAGE(WARNING
"Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
Expand Down Expand Up @@ -125,7 +125,7 @@ SET(CPUINFO_SRCS
src/cache.c)

IF(CPUINFO_SUPPORTED_PLATFORM)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
LIST(APPEND CPUINFO_SRCS
src/x86/init.c
src/x86/info.c
Expand All @@ -143,7 +143,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
src/x86/linux/cpuinfo.c)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows")
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
Expand Down Expand Up @@ -175,6 +175,11 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
ENDIF()
ENDIF()

IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
LIST(APPEND CPUINFO_SRCS
src/emscripten/init.c)
ENDIF()

IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/linux/smallfile.c
Expand Down Expand Up @@ -205,6 +210,11 @@ ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
CPUINFO_TARGET_ENABLE_C99(cpuinfo)
CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
# Target Windows 7+ API
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601)
ENDIF()
SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Detect if target is a 32-bit or 64-bit ARM system:
```

Check if the host CPU support ARM NEON

```c
cpuinfo_initialize();
if (cpuinfo_has_arm_neon()) {
Expand All @@ -57,6 +58,7 @@ if (cpuinfo_has_arm_neon()) {
```

Check if the host CPU supports x86 AVX

```c
cpuinfo_initialize();
if (cpuinfo_has_x86_avx()) {
Expand All @@ -65,6 +67,7 @@ if (cpuinfo_has_x86_avx()) {
```

Check if the thread runs on a Cortex-A53 core

```c
cpuinfo_initialize();
switch (cpuinfo_get_current_core()->uarch) {
Expand All @@ -78,12 +81,14 @@ switch (cpuinfo_get_current_core()->uarch) {
```

Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):

```c
cpuinfo_initialize();
const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
```

Pin thread to cores sharing L2 cache with the current core (Linux or Android)

```c
cpuinfo_initialize();
cpu_set_t cpu_set;
Expand Down
9 changes: 9 additions & 0 deletions bench/get-current.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,13 @@ static void cpuinfo_get_current_uarch_index(benchmark::State& state) {
}
BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond);

static void cpuinfo_get_current_uarch_index_with_default(benchmark::State& state) {
cpuinfo_initialize();
while (state.KeepRunning()) {
const uint32_t uarch_index = cpuinfo_get_current_uarch_index_with_default(0);
benchmark::DoNotOptimize(uarch_index);
}
}
BENCHMARK(cpuinfo_get_current_uarch_index_with_default)->Unit(benchmark::kNanosecond);

BENCHMARK_MAIN();
3 changes: 1 addition & 2 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def main(args):
build.export_cpath("include", ["cpuinfo.h"])

with build.options(source_dir="src", macros=macros, extra_include_dirs="src", deps=build.deps.clog):
sources = ["init.c", "api.c"]
sources = ["api.c", "init.c", "cache.c"]
if build.target.is_x86 or build.target.is_x86_64:
sources += [
"x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c",
Expand Down Expand Up @@ -61,7 +61,6 @@ def main(args):
sources += ["mach/topology.c"]
if build.target.is_linux or build.target.is_android:
sources += [
"linux/current.c",
"linux/cpulist.c",
"linux/smallfile.c",
"linux/multiline.c",
Expand Down
19 changes: 14 additions & 5 deletions include/cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,11 +499,11 @@ enum cpuinfo_uarch {
/** Applied Micro X-Gene. */
cpuinfo_uarch_xgene = 0x00B00100,

/** Huawei hisilicon Kunpeng Series CPU. */
cpuinfo_uarch_taishanv110 = 0x00C00100,

/* Hygon Dhyana (a modification of AMD Zen for Chinese market). */
cpuinfo_uarch_dhyana = 0x01000100,

/** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */
cpuinfo_uarch_taishan_v110 = 0x00C00100,
};

struct cpuinfo_processor {
Expand All @@ -523,7 +523,7 @@ struct cpuinfo_processor {
*/
int linux_id;
#endif
#if defined(_WIN32)
#if defined(_WIN32) || defined(__CYGWIN__)
/** Windows-specific ID for the group containing the logical processor. */
uint16_t windows_group_id;
/**
Expand Down Expand Up @@ -1799,13 +1799,22 @@ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);

/**
* Identify the microarchitecture index of the core that executes the current thread.
* If the system does not support such identification, the function return 0.
* If the system does not support such identification, the function returns 0.
*
* There is no guarantee that the thread will stay on the same type of core for any time.
* Callers should treat the result as only a hint.
*/
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void);

/**
* Identify the microarchitecture index of the core that executes the current thread.
* If the system does not support such identification, the function returns the user-specified default value.
*
* There is no guarantee that the thread will stay on the same type of core for any time.
* Callers should treat the result as only a hint.
*/
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index);

#ifdef __cplusplus
} /* extern "C" */
#endif
Expand Down
30 changes: 30 additions & 0 deletions src/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,3 +374,33 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
return 0;
#endif
}

uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
}
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
#ifdef __linux__
if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
/* Special case: avoid syscall on systems with only a single type of cores */
return 0;
}

/* General case */
unsigned cpu;
if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
return default_uarch_index;
}
if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
return default_uarch_index;
}
return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
#else
/* Fallback: no API to query current core, use default uarch index. */
return default_uarch_index;
#endif
#else
/* Only ARM/ARM64 processors may include cores of different types in the same package. */
return 0;
#endif
}
43 changes: 22 additions & 21 deletions src/arm/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -1448,23 +1448,24 @@ void cpuinfo_arm_decode_cache(
.line_size = 64 /* assumption */
};
break;
case cpuinfo_uarch_taishanv110:
case cpuinfo_uarch_taishan_v110:
/*
* Kunpeng920 series CPU designed by Huawei hisilicon for server,
* L1 and L2 cache is private to each core, L3 is shared with all cores.
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
* | Kunpeng920-3226 | 32 | 64K | 64K | 512K | 32M | [1] |
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
* | Kunpeng920-4826 | 48 | 64K | 64K | 512K | 48M | [2] |
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
* | Kunpeng920-6426 | 64 | 64K | 64K | 512K | 64M | [3] |
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
*
* [1] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
* [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
* [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
* It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1]
*
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
* | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] |
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
* | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] |
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
* | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] |
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
*
* [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110
* [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
* [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
* [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
*/
*l1i = (struct cpuinfo_cache) {
.size = 64 * 1024,
Expand All @@ -1482,11 +1483,11 @@ void cpuinfo_arm_decode_cache(
.line_size = 128 /* assumption */,
.flags = CPUINFO_CACHE_INCLUSIVE /* assumption */,
};
*l3 = (struct cpuinfo_cache) {
.size = cluster_cores * 1024 * 1024,
.associativity = 16 /* assumption */,
.line_size = 128 /* assumption */,
};
*l3 = (struct cpuinfo_cache) {
.size = cluster_cores * 1024 * 1024,
.associativity = 16 /* assumption */,
.line_size = 128 /* assumption */,
};
break;
#endif
case cpuinfo_uarch_cortex_a12:
Expand Down
2 changes: 1 addition & 1 deletion src/arm/linux/aarch32-isa.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if ((architecture_version >= 7) || (features & vfpv3_mask)) {
isa->vfpv3 = true;

const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if (features & d32_mask) {
isa->d32 = true;
Expand Down
10 changes: 5 additions & 5 deletions src/arm/linux/clusters.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
*
* @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* frequency, MIDR infromation, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
Expand Down Expand Up @@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
* @p processors array have cluster information.
*
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* frequency, MIDR infromation, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
Expand Down Expand Up @@ -466,7 +466,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
* This function should be called after all processors are assigned to core clusters.
*
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
* and decoded core cluster (package_leader_id) information.
* The function expects the value of processors[i].package_processor_count to be zero.
* Upon return, processors[i].package_processor_count will contain the number of logical
Expand All @@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors(
const uint32_t package_leader_id = processors[i].package_leader_id;
processors[package_leader_id].package_processor_count += 1;
}
}
}
/* Second pass: copy the package_processor_count from the group leader processor */
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
const uint32_t package_leader_id = processors[i].package_leader_id;
processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
}
}
}
}
6 changes: 3 additions & 3 deletions src/arm/linux/cpuinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ static uint32_t parse_processor_number(

/*
* Full list of ARM features reported in /proc/cpuinfo:
*
*
* * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
* * half - support for half-word loads and stores. These instruction are part of ARMv4,
* so no need to check it on supported CPUs.
Expand Down Expand Up @@ -620,7 +620,7 @@ static void parse_cache_number(
break;
default:
cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
number_name, (int) (number_end - number_start), number_start);
number_name, (int) (number_end - number_start), number_start);
}
}

Expand Down Expand Up @@ -670,7 +670,7 @@ static bool parse_line(
if (line_start == line_end) {
return true;
}

/* Search for ':' on the line. */
const char* separator = line_start;
for (; separator != line_end; separator++) {
Expand Down
2 changes: 1 addition & 1 deletion src/arm/tlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ switch (uarch) {
* Cortex-A5 Technical Reference Manual:
* 6.3.1. Micro TLB
* The first level of caching for the page table information is a micro TLB of
* 10 entries that is implemented on each of the instruction and data sides.
* 10 entries that is implemented on each of the instruction and data sides.
* 6.3.2. Main TLB
* Misses from the instruction and data micro TLBs are handled by a unified main TLB.
* The main TLB is 128-entry two-way set-associative.
Expand Down
6 changes: 4 additions & 2 deletions src/arm/uarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,11 @@ void cpuinfo_arm_decode_vendor_uarch(
case 'H':
*vendor = cpuinfo_vendor_huawei;
switch (midr_get_part(midr)) {
case 0xD01: /* Kunpeng920 ARM-base CPU*/
*uarch = cpuinfo_uarch_taishanv110;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xD01: /* Kunpeng 920 series */
*uarch = cpuinfo_uarch_taishan_v110;
break;
#endif
case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a76;
Expand Down
4 changes: 2 additions & 2 deletions src/cpuinfo/internal-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <stdint.h>
#include <stdbool.h>

#ifdef _WIN32
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#endif

Expand Down Expand Up @@ -50,7 +50,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;

CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
#ifdef _WIN32
#if defined(_WIN32) || defined(__CYGWIN__)
CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
#endif
CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
Expand Down
Loading

0 comments on commit c209221

Please sign in to comment.