RISC-V Support SE-148 (#234)

This pull request adds support for rv64iam. That is, 64 bit RISC-V base, atomic and multiply extensions working in all three simulation modes. Expanding on previous work, the general code base has been adjusted to be more ISA agnostic. Specific config option parsing has been moved to the ISA specific classes and the exception handler has become more generic. Some more work may be needed on this as exception types are currently ISA specific; this causes the need for repeated functions which is undesirable. Almost all of the base, atomic and multiply extension instructions have been implemented along with their pseudoinstructions. ### Architecture updates - rv64i - RISC-V base extension - a - RISC-V atomic extension - m - RISC-V multiply/divide extension ### Changes to config options - Addition of ```ISA``` option with valid arguments <```aarch64```, ```rv64```> ### To note - FENCE is implemented as a NOP which is considered allowable by the spec - EBREAK is unimplemented but only needed for debugging - Atomics don't operate truly atomically which is currently functional for single threaded programs but not accurate - MULH and MULHSU aren't implemented as they will need a library such as GMP. Tests are implemented and expected to fail This functionality is sufficient to run many benchmarks including STREAM, Dhrystone and an implementation of the Smith-Waterman algorithm.
UoB-HPC · Nov 28, 2022 · 53f99f9 · 53f99f9
1 parent 7e149d0
commit 53f99f9
Show file tree

Hide file tree

Showing 64 changed files with 8,353 additions and 484 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 .clang_complete
 .vscode
+.idea
 .DS_Store
 obj
 build
@@ -18,4 +19,4 @@ CPackSourceConfig.cmake
 CMakeFiles/
 
 **/capstone-config-version.cmake
-**/capstone-config.cmake
+**/capstone-config.cmake
diff --git a/.jenkins/build_test_run.sh b/.jenkins/build_test_run.sh
@@ -41,6 +41,7 @@ test () {
     cd "$SIMENG_BUILD" || exit
     ./test/unit/unittests --gtest_output=xml:unittests.xml || true
     ./test/regression/aarch64/regression-aarch64 --gtest_output=xml:regressiontests.xml || true
+    ./test/regression/riscv/regression-riscv --gtest_output=xml:regressiontests.xml || true
 }
 
 # Run default program with and without specified configuration

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -111,7 +111,6 @@ set(CAPSTONE_EVM_SUPPORT OFF CACHE BOOL "Disable EVM support")
 set(CAPSTONE_MOS65XX_SUPPORT OFF CACHE BOOL "Disable MSO65XX support")
 set(CAPSTONE_WASM_SUPPORT OFF CACHE BOOL "Disable WASM support")
 set(CAPSTONE_BPF_SUPPORT OFF CACHE BOOL "Disable BPF support")
-set(CAPSTONE_RISCV_SUPPORT OFF CACHE BOOL "Disable RISCV support")
 
 FetchContent_MakeAvailable_Args(capstone-lib EXCLUDE_FROM_ALL)
 include_directories("${capstone_BINARY_DIR}/include" "${capstone_SOURCE_DIR}/include")
@@ -165,7 +164,7 @@ if(SIMENG_ENABLE_TESTS)
 
   else()
 
-    set(LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "")
+    set(LLVM_TARGETS_TO_BUILD "AArch64;RISCV" CACHE INTERNAL "")
 
     set(LLVM_BUILD_RUNTIME OFF)
 
@@ -198,8 +197,7 @@ if(SIMENG_ENABLE_TESTS)
 
     # NOTE: we don't do the usual version checks here because it needs vars exported in find_LLVM
     # we just assume it's good beacuse it must be whitelisted in FetchContent_Declare
-
-  endif()
+endif()
 
   set(SIMENG_LLVM_VERSION ${LLVM_VERSION_MAJOR} CACHE INTERNAL "LLVM major version number used.")
   message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
@@ -209,6 +207,9 @@ if(SIMENG_ENABLE_TESTS)
   if (NOT "AArch64" IN_LIST LLVM_TARGETS_TO_BUILD)
     message(FATAL_ERROR "LLVM was built without AArch64 target")
   endif()
+  if (NOT "RISCV" IN_LIST LLVM_TARGETS_TO_BUILD)
+      message(FATAL_ERROR "LLVM was built without RISC-V target")
+  endif()
 
   ## Setup googletest ##
   FetchContent_MakeAvailable_Args(googletest EXCLUDE_FROM_ALL)
@@ -218,7 +219,7 @@ if(SIMENG_ENABLE_TESTS)
   # saves us from having to build all targets before running the tests
   add_custom_target(test-all
     COMMAND ${CMAKE_CTEST_COMMAND}
-    DEPENDS unittests regression-aarch64
+    DEPENDS unittests regression-aarch64 regression-riscv
   )
 endif()
 

diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml
@@ -0,0 +1,141 @@
+---
+# This file is based off of the current tx2.yaml config and serves as an example configuration for RISC-V cores.
+# The following resources where utilised to create the config file and naming schemes:
+# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan
+
+Core:
+  ISA: rv64
+  Simulation-Mode: outoforder
+  Clock-Frequency: 2.5
+  Fetch-Block-Size: 32
+Fetch:
+  Fetch-Block-Size: 32
+  Loop-Buffer-Size: 0
+  Loop-Detection-Threshold: 0
+Process-Image:
+  Heap-Size: 1073741824
+  Stack-Size: 1048576
+Register-Set:
+  GeneralPurpose-Count: 154
+  FloatingPoint-Count: 90
+Pipeline-Widths:
+  Commit: 4
+  Dispatch-Rate: 4
+  FrontEnd: 4
+  LSQ-Completion: 2
+Queue-Sizes:
+  ROB: 180
+  Load: 64
+  Store: 36
+Branch-Predictor:
+  BTB-Tag-Bits: 11
+  Saturating-Count-Bits: 2
+  Global-History-Length: 10
+  RAS-entries: 1
+  Fallback-Static-Predictor: "Always-Taken"
+  Branch-Predictor:
+  BTB-bitlength: 16
+L1-Data-Memory:
+  Interface-Type: Fixed
+L1-Instruction-Memory:
+  Interface-Type: Flat
+LSQ-L1-Interface:
+  Access-Latency: 4
+  Exclusive: False
+  Load-Bandwidth: 32
+  Store-Bandwidth: 16
+  Permitted-Requests-Per-Cycle: 2
+  Permitted-Loads-Per-Cycle: 2
+  Permitted-Stores-Per-Cycle: 1
+Ports:
+  0:
+    Portname: Port 0
+    Instruction-Support:
+      - INT_SIMPLE
+      - INT_MUL
+  1:
+    Portname: Port 1
+    Instruction-Support:
+      - INT
+  2:
+    Portname: Port 2
+    Instruction-Support:
+      - INT_SIMPLE
+      - INT_MUL
+      - BRANCH
+  3:
+    Portname: Port 4
+    Instruction-Support:
+      - LOAD
+  4:
+    Portname: Port 5
+    Instruction-Support:
+      - LOAD
+  5:
+    Portname: Port 3
+    Instruction-Support:
+      - STORE
+Reservation-Stations:
+  0:
+    Size: 60
+    Dispatch-Rate: 4
+    Ports:
+      - Port 0
+      - Port 1
+      - Port 2
+      - Port 4
+      - Port 5
+      - Port 3
+Execution-Units:
+  0:
+    Pipelined: True
+  1:
+    Pipelined: True
+  2:
+    Pipelined: True
+  3:
+    Pipelined: True
+  4:
+    Pipelined: True
+  5:
+    Pipelined: True
+Latencies:
+  0:
+    Instruction-Groups:
+      - INT_SIMPLE_ARTH
+      - INT_SIMPLE_LOGICAL
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  1:
+    Instruction-Groups:
+      - INT_MUL
+    Execution-Latency: 5
+    Execution-Throughput: 1
+  2:
+    Instruction-Groups:
+      - INT_DIV
+    Execution-Latency: 39
+    Execution-Throughput: 39
+# CPU-Info mainly used to generate a replica of the special (or system) file directory
+# structure
+CPU-Info:
+  # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not.
+  # (Not generating the special files directory may require the user to copy over files manually)
+  Generate-Special-Dir: true
+  # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32)
+  Core-Count: 1
+  # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2)
+  Socket-Count: 1
+  # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4)
+  SMT: 1
+  # Below are the values needed to generate /proc/cpuinfo
+  BogoMIPS: 400.00
+  Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm
+  CPU-Implementer: "0x43"
+  CPU-Architecture: 8
+  CPU-Variant: "0x1"
+  CPU-Part: "0x0af"
+  CPU-Revision: 2
+  # Package-Count is used to generate
+  # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id}
+  Package-Count: 1
diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml
@@ -3,6 +3,7 @@
 # https://github.com/fujitsu/A64FX
 
 Core:
+  ISA: AArch64
   Simulation-Mode: outoforder
   # Clock Frequency is in GHz.
   Clock-Frequency: 1.8
@@ -134,71 +135,71 @@ Execution-Units:
     - SVE_DIV_OR_SQRT
   1:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
   2:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
   3:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
   4:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
   5:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
   6:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
   7:
     Pipelined: True
-    Blocking-Groups: 
+    Blocking-Groups:
     - INT_DIV_OR_SQRT
     - FP_DIV_OR_SQRT
     - SVE_DIV_OR_SQRT
 Latencies:
   0:
-    Instruction-Groups: 
+    Instruction-Groups:
     - INT
     Execution-Latency: 2
     Execution-Throughput: 2
   1:
-    Instruction-Groups: 
+    Instruction-Groups:
     - INT_SIMPLE_ARTH_NOSHIFT
     - INT_SIMPLE_LOGICAL_NOSHIFT
     - INT_SIMPLE_CVT
     Execution-Latency: 1
     Execution-Throughput: 1
   2:
-    Instruction-Groups: 
+    Instruction-Groups:
     - INT_MUL
     Execution-Latency: 5
     Execution-Throughput: 1
   3:
-    Instruction-Groups: 
+    Instruction-Groups:
     - INT_DIV_OR_SQRT
     Execution-Latency: 41
     Execution-Throughput: 41
   4:
-    Instruction-Groups: 
+    Instruction-Groups:
     - SCALAR_SIMPLE
     - VECTOR_SIMPLE_LOGICAL
     - SVE_SIMPLE_LOGICAL
@@ -207,12 +208,12 @@ Latencies:
     Execution-Latency: 4
     Execution-Throughput: 1
   5:
-    Instruction-Groups: 
+    Instruction-Groups:
     - FP_DIV_OR_SQRT
     Execution-Latency: 29
     Execution-Throughput: 29
   6:
-    Instruction-Groups: 
+    Instruction-Groups:
     - VECTOR_SIMPLE
     - SVE_SIMPLE
     - SCALAR_SIMPLE_CVT
@@ -221,30 +222,30 @@ Latencies:
     Execution-Latency: 9
     Execution-Throughput: 1
   7:
-    Instruction-Groups: 
+    Instruction-Groups:
     - SVE_DIV_OR_SQRT
     Execution-Latency: 98
     Execution-Throughput: 98
   8:
-    Instruction-Groups: 
+    Instruction-Groups:
     - PREDICATE
     Execution-Latency: 3
     Execution-Throughput: 1
   9:
-    Instruction-Groups: 
+    Instruction-Groups:
     - LOAD_SCALAR
     - LOAD_VECTOR
     - STORE_ADDRESS_SCALAR
     - STORE_ADDRESS_VECTOR
     Execution-Latency: 3
     Execution-Throughput: 1
   10:
-    Instruction-Groups: 
+    Instruction-Groups:
     - LOAD_SVE
     - STORE_ADDRESS_SVE
     Execution-Latency: 6
     Execution-Throughput: 1
-# CPU-Info mainly used to generate a replica of the special (or system) file directory 
+# CPU-Info mainly used to generate a replica of the special (or system) file directory
 # structure
 CPU-Info:
   # Set Generate-Special-Dir to True to generate the special files directory, or to False to not.
@@ -264,6 +265,6 @@ CPU-Info:
   CPU-Variant: "0x1"
   CPU-Part: "0x001"
   CPU-Revision: 0
-  # Package-Count is used to generate 
+  # Package-Count is used to generate
   # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id}
   Package-Count: 1
diff --git a/configs/m1_firestorm.yaml b/configs/m1_firestorm.yaml
@@ -1,5 +1,6 @@
 # M1 Firestorm core
 Core:
+  ISA: AArch64
   Simulation-Mode: outoforder
   Clock-Frequency: 3.2
   Timer-Frequency: 100

diff --git a/configs/sst-cores/a64fx-sst.yaml b/configs/sst-cores/a64fx-sst.yaml
@@ -3,6 +3,7 @@
 # https://github.com/fujitsu/A64FX
 
 Core:
+  ISA: AArch64
   Simulation-Mode: outoforder
   # Clock Frequency is in GHz.
   Clock-Frequency: 1.8

diff --git a/configs/sst-cores/m1_firestorm-sst.yaml b/configs/sst-cores/m1_firestorm-sst.yaml
@@ -1,5 +1,6 @@
 # M1 Firestorm core
 Core:
+  ISA: AArch64
   Simulation-Mode: outoforder
   Clock-Frequency: 3.2
   Timer-Frequency: 100

diff --git a/configs/sst-cores/tx2-sst.yaml b/configs/sst-cores/tx2-sst.yaml
@@ -3,6 +3,7 @@
 # https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan
 
 Core:
+  ISA: AArch64
   Simulation-Mode: outoforder
   # Clock Frequency is in GHz.
   Clock-Frequency: 2.5