diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e6f7e637e..542049f2f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ set(CMAKE_MACOSX_RPATH 1) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Create variable to enable additional compiler warnings for SimEng targets only -set(SIMENG_COMPILE_OPTIONS -Wall -pedantic) #-Wextra +set(SIMENG_COMPILE_OPTIONS -Wall -pedantic -Werror) #-Wextra # Disable RTTI for all targets add_compile_options($<$:-fno-rtti>) diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index b9159b8848..859fe2283a 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -62,12 +62,7 @@ Ports: Instruction-Group-Support: - INT_SIMPLE - INT_MUL - - STORE_DATA_INT - - STORE_DATA_SCALAR - - STORE_DATA_VECTOR - - STORE_DATA_SVE - - STORE_DATA_STREAMING_SVE - - STORE_DATA_SME + - STORE_DATA 3: Portname: FLB Instruction-Group-Support: @@ -83,36 +78,16 @@ Ports: 5: Portname: EAGA Instruction-Group-Support: - - LOAD_INT - - LOAD_SCALAR - - LOAD_VECTOR - - LOAD_SVE - - LOAD_STREAMING_SVE - - LOAD_SME - - STORE_ADDRESS_INT - - STORE_ADDRESS_SCALAR - - STORE_ADDRESS_VECTOR - - STORE_ADDRESS_SVE - - STORE_ADDRESS_STREAMING_SVE - - STORE_ADDRESS_SME + - LOAD + - STORE_ADDRESS - INT_SIMPLE_ARTH_NOSHIFT - INT_SIMPLE_LOGICAL_NOSHIFT - INT_SIMPLE_CMP 6: Portname: EAGB Instruction-Group-Support: - - LOAD_INT - - LOAD_SCALAR - - LOAD_VECTOR - - LOAD_SVE - - LOAD_STREAMING_SVE - - LOAD_SME - - STORE_ADDRESS_INT - - STORE_ADDRESS_SCALAR - - STORE_ADDRESS_VECTOR - - STORE_ADDRESS_SVE - - STORE_ADDRESS_STREAMING_SVE - - STORE_ADDRESS_SME + - LOAD + - STORE_ADDRESS - INT_SIMPLE_ARTH_NOSHIFT - INT_SIMPLE_LOGICAL_NOSHIFT - INT_SIMPLE_CMP diff --git a/docs/sphinx/assets/instruction_groups_AArch64.png b/docs/sphinx/assets/instruction_groups_AArch64.png index ba3319435f..f877fd7795 100644 Binary files a/docs/sphinx/assets/instruction_groups_AArch64.png and b/docs/sphinx/assets/instruction_groups_AArch64.png differ diff --git a/src/include/simeng/Register.hh b/src/include/simeng/Register.hh index 5758d8e67b..0152813268 100644 --- a/src/include/simeng/Register.hh +++ b/src/include/simeng/Register.hh @@ -1,6 +1,5 @@ #pragma once #include -#include namespace simeng { diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh index 1041bf85fe..a654fc897a 100644 --- a/src/include/simeng/arch/aarch64/Architecture.hh +++ b/src/include/simeng/arch/aarch64/Architecture.hh @@ -74,7 +74,7 @@ class Architecture : public arch::Architecture { bool isStreamingModeEnabled() const; /** Returns if the SME ZA Register is enabled. */ - bool isZA_RegisterEnabled() const; + bool isZARegisterEnabled() const; /** Update the value of SVCRval_. */ void setSVCRval(const uint64_t newVal) const; diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh index 94941b552d..44263432a1 100644 --- a/src/include/simeng/arch/aarch64/Instruction.hh +++ b/src/include/simeng/arch/aarch64/Instruction.hh @@ -374,7 +374,7 @@ class Instruction : public simeng::Instruction { * this instruction was first decoded, and updates the instruction group * accordingly if required. * Returns TRUE if the group was updated, FALSE otherwise. */ - bool checkStreamingGroup(); + bool checkStreamingGroupAndUpdate(); private: /** Process the instruction's metadata to determine source/destination diff --git a/src/include/simeng/arch/aarch64/InstructionGroups.hh b/src/include/simeng/arch/aarch64/InstructionGroups.hh index 3c28712537..6c58ff4976 100644 --- a/src/include/simeng/arch/aarch64/InstructionGroups.hh +++ b/src/include/simeng/arch/aarch64/InstructionGroups.hh @@ -4,7 +4,33 @@ namespace simeng { namespace arch { namespace aarch64 { -/** The IDs of the instruction groups for AArch64 instructions. */ +/** The IDs of the instruction groups for AArch64 instructions. + * Each new group must contain 14 entries to ensure correct group assignment and + * general functionality. + * Their order must be as follows: + * - BASE + * - BASE_SIMPLE + * - BASE_SIMPLE_ARTH + * - BASE_SIMPLE_ARTH_NOSHIFT + * - BASE_SIMPLE_LOGICAL + * - BASE_SIMPLE_LOGICAL_NOSHIFT + * - BASE_SIMPLE_CMP + * - BASE_SIMPLE_CVT + * - BASE_MUL + * - BASE_DIV_OR_SQRT + * - LOAD_BASE + * - STORE_ADDRESS_BASE + * - STORE_DATA_BASE + * - STORE_BASE + * + * An exception to the above is "Parent" groups which do not require the LOAD_* + * or STORE_* groups. + * "Parent" groups allow for easier grouping of similar groups that may have + * identical execution latencies, ports, etc. For example, FP is the parent + * group of SCALAR and VECTOR. + * In simulation, an instruction's allocated group will never be a "Parent" + * group; they are only used to simplify config file creation and management. + */ namespace InstructionGroups { const uint16_t INT = 0; const uint16_t INT_SIMPLE = 1; diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc index 9b8a20dd6d..fea53d7767 100644 --- a/src/lib/arch/aarch64/Architecture.cc +++ b/src/lib/arch/aarch64/Architecture.cc @@ -193,7 +193,7 @@ uint8_t Architecture::predecode(const uint8_t* ptr, uint16_t bytesAvailable, // Check if SVE or Predicate instructions need their group updating due to // SVE Streaming Mode activeness being different from when the instruction // was first decoded. - if (cachedInsn.checkStreamingGroup()) { + if (cachedInsn.checkStreamingGroupAndUpdate()) { // If the instruction's group has changed then update its execution info. // The newly set group is most likely to be the most accurate, as an // incorrect group allocation is only achieved when an exception/flush is @@ -299,7 +299,7 @@ void Architecture::setSVCRval(const uint64_t newVal) const { bool Architecture::isStreamingModeEnabled() const { return SVCRval_ & 1; } // 1st bit of SVCR register determines if ZA register is enabled. -bool Architecture::isZA_RegisterEnabled() const { return SVCRval_ & 2; } +bool Architecture::isZARegisterEnabled() const { return SVCRval_ & 2; } } // namespace aarch64 } // namespace arch diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index 967a72b7ba..2811c72625 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -164,7 +164,7 @@ const Architecture& Instruction::getArchitecture() const { InstructionException Instruction::getException() const { return exception_; } -bool Instruction::checkStreamingGroup() { +bool Instruction::checkStreamingGroupAndUpdate() { // Only instruction groups that depend on SVE Streaming Mode are SVE and // PREDICATE const uint16_t currentGroup = instructionGroup_; diff --git a/src/lib/arch/aarch64/Instruction_address.cc b/src/lib/arch/aarch64/Instruction_address.cc index ef5f73f15c..f3026b7b50 100644 --- a/src/lib/arch/aarch64/Instruction_address.cc +++ b/src/lib/arch/aarch64/Instruction_address.cc @@ -91,8 +91,10 @@ span Instruction::generateAddresses() { setMemoryAddresses({{sourceValues_[2].get(), 8}}); break; } - case Opcode::AArch64_LD1_MXIPXX_V_B: // ld1b {zatv.b[ws, #imm]}, pg/z, - // [{, xm}] + case Opcode::AArch64_LD1_MXIPXX_V_B: // ld1b {zatv.b[ws, #imm]}, pg/z, + // [{, xm}] + // SME + [[fallthrough]]; case Opcode::AArch64_LD1_MXIPXX_H_B: { // ld1b {zath.b[ws, #imm]}, pg/z, // [{, xm}] // SME @@ -104,8 +106,10 @@ span Instruction::generateAddresses() { setMemoryAddresses({(n + m), static_cast(VL_bits / 8)}); break; } - case Opcode::AArch64_LD1_MXIPXX_V_D: // ld1d {zatv.d[ws, #imm]}, pg/z, - // [{, xm, lsl #3}] + case Opcode::AArch64_LD1_MXIPXX_V_D: // ld1d {zatv.d[ws, #imm]}, pg/z, + // [{, xm, lsl #3}] + // SME + [[fallthrough]]; case Opcode::AArch64_LD1_MXIPXX_H_D: { // ld1d {zath.d[ws, #imm]}, pg/z, // [{, xm, lsl #3}] // SME @@ -117,8 +121,10 @@ span Instruction::generateAddresses() { setMemoryAddresses({(n + m), static_cast(VL_bits / 8)}); break; } - case Opcode::AArch64_LD1_MXIPXX_V_H: // ld1h {zatv.h[ws, #imm]}, pg/z, - // [{, xm, lsl #1}] + case Opcode::AArch64_LD1_MXIPXX_V_H: // ld1h {zatv.h[ws, #imm]}, pg/z, + // [{, xm, lsl #1}] + // SME + [[fallthrough]]; case Opcode::AArch64_LD1_MXIPXX_H_H: { // ld1h {zath.h[ws, #imm]}, pg/z, // [{, xm, lsl #1}] // SME @@ -130,8 +136,10 @@ span Instruction::generateAddresses() { setMemoryAddresses({(n + m), static_cast(VL_bits / 8)}); break; } - case Opcode::AArch64_LD1_MXIPXX_V_Q: // ld1q {zatv.q[ws]}, pg/z, - // [{, xm, lsl #4}] + case Opcode::AArch64_LD1_MXIPXX_V_Q: // ld1q {zatv.q[ws]}, pg/z, + // [{, xm, lsl #4}] + // SME + [[fallthrough]]; case Opcode::AArch64_LD1_MXIPXX_H_Q: { // ld1q {zath.q[ws]}, pg/z, // [{, xm, lsl #4}] // SME @@ -143,8 +151,10 @@ span Instruction::generateAddresses() { setMemoryAddresses({(n + m), static_cast(VL_bits / 8)}); break; } - case Opcode::AArch64_LD1_MXIPXX_V_S: // ld1w {zatv.s[ws, #imm]}, pg/z, - // [{, xm, LSL #2}] + case Opcode::AArch64_LD1_MXIPXX_V_S: // ld1w {zatv.s[ws, #imm]}, pg/z, + // [{, xm, LSL #2}] + // SME + [[fallthrough]]; case Opcode::AArch64_LD1_MXIPXX_H_S: { // ld1w {zath.s[ws, #imm]}, pg/z, // [{, xm, LSL #2}] // SME @@ -551,19 +561,32 @@ span Instruction::generateAddresses() { setMemoryAddresses({{sourceValues_[0].get() + offset, 8}}); break; } - case Opcode::AArch64_LDRBui: // ldr bt, [xn, #imm] - case Opcode::AArch64_LDRBpre: // ldr bt, [xn, #imm]! - case Opcode::AArch64_LDRDui: // ldr dt, [xn, #imm] - case Opcode::AArch64_LDRDpre: // ldr dt, [xn, #imm]! - case Opcode::AArch64_LDRHui: // ldr ht, [xn, #imm] - case Opcode::AArch64_LDRHpre: // ldr ht, [xn, #imm]! - case Opcode::AArch64_LDRQui: // ldr qt, [xn, #imm] - case Opcode::AArch64_LDRQpre: // ldr qt, [xn, #imm]! - case Opcode::AArch64_LDRSui: // ldr st, [xn, #imm] - case Opcode::AArch64_LDRSpre: // ldr st, [xn, #imm]! - case Opcode::AArch64_LDRWui: // ldr wt, [xn, #imm] - case Opcode::AArch64_LDRWpre: // ldr wt, [xn, #imm]! - case Opcode::AArch64_LDRXui: // ldr xt, [xn, #imm] + case Opcode::AArch64_LDRBui: // ldr bt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDRBpre: // ldr bt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_LDRDui: // ldr dt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDRDpre: // ldr dt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_LDRHui: // ldr ht, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDRHpre: // ldr ht, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_LDRQui: // ldr qt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDRQpre: // ldr qt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_LDRSui: // ldr st, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDRSpre: // ldr st, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_LDRWui: // ldr wt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDRWpre: // ldr wt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_LDRXui: // ldr xt, [xn, #imm] + [[fallthrough]]; case Opcode::AArch64_LDRXpre: { // ldr xt, [xn, #imm]! std::vector addresses; generateContiguousAddresses( @@ -572,12 +595,18 @@ span Instruction::generateAddresses() { setMemoryAddresses(addresses); break; } - case Opcode::AArch64_LDRBpost: // ldr bt, [xn], #imm - case Opcode::AArch64_LDRDpost: // ldr dt, [xn], #imm - case Opcode::AArch64_LDRHpost: // ldr ht, [xn], #imm - case Opcode::AArch64_LDRQpost: // ldr qt, [xn], #imm - case Opcode::AArch64_LDRSpost: // ldr st, [xn], #imm - case Opcode::AArch64_LDRWpost: // ldr wt, [xn], #imm + case Opcode::AArch64_LDRBpost: // ldr bt, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDRDpost: // ldr dt, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDRHpost: // ldr ht, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDRQpost: // ldr qt, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDRSpost: // ldr st, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDRWpost: // ldr wt, [xn], #imm + [[fallthrough]]; case Opcode::AArch64_LDRXpost: { // ldr xt, [xn], #imm std::vector addresses; generateContiguousAddresses(sourceValues_[0].get(), 1, @@ -695,15 +724,24 @@ span Instruction::generateAddresses() { setMemoryAddresses({{base, 4}, {base + 4, 4}}); break; } - case Opcode::AArch64_LDPDi: // ldp dt1, dt2, [xn, #imm] - case Opcode::AArch64_LDPDpre: // ldp dt1, dt2, [xn, #imm!] - case Opcode::AArch64_LDPQi: // ldp qt1, qt2, [xn, #imm] - case Opcode::AArch64_LDPQpre: // ldp qt1, qt2, [xn, #imm!] - case Opcode::AArch64_LDPSi: // ldp st1, st2, [xn, #imm] - case Opcode::AArch64_LDPSpre: // ldp st1, st2, [xn, #imm!] - case Opcode::AArch64_LDPWi: // ldp wt1, wt2, [xn, #imm] - case Opcode::AArch64_LDPWpre: // ldp wt1, wt2, [xn, #imm!] - case Opcode::AArch64_LDPXi: // ldp xt1, xt2, [xn, #imm] + case Opcode::AArch64_LDPDi: // ldp dt1, dt2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDPDpre: // ldp dt1, dt2, [xn, #imm!] + [[fallthrough]]; + case Opcode::AArch64_LDPQi: // ldp qt1, qt2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDPQpre: // ldp qt1, qt2, [xn, #imm!] + [[fallthrough]]; + case Opcode::AArch64_LDPSi: // ldp st1, st2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDPSpre: // ldp st1, st2, [xn, #imm!] + [[fallthrough]]; + case Opcode::AArch64_LDPWi: // ldp wt1, wt2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_LDPWpre: // ldp wt1, wt2, [xn, #imm!] + [[fallthrough]]; + case Opcode::AArch64_LDPXi: // ldp xt1, xt2, [xn, #imm] + [[fallthrough]]; case Opcode::AArch64_LDPXpre: { // ldp xt1, xt2, [xn, #imm!] std::vector addresses; generateContiguousAddresses( @@ -712,10 +750,14 @@ span Instruction::generateAddresses() { setMemoryAddresses(addresses); break; } - case Opcode::AArch64_LDPDpost: // ldp dt1, dt2, [xn], #imm - case Opcode::AArch64_LDPQpost: // ldp qt1, qt2, [xn], #imm - case Opcode::AArch64_LDPSpost: // ldp st1, st2, [xn], #imm - case Opcode::AArch64_LDPWpost: // ldp wt1, wt2, [xn], #imm + case Opcode::AArch64_LDPDpost: // ldp dt1, dt2, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDPQpost: // ldp qt1, qt2, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDPSpost: // ldp st1, st2, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_LDPWpost: // ldp wt1, wt2, [xn], #imm + [[fallthrough]]; case Opcode::AArch64_LDPXpost: { // ldp xt1, xt2, [xn], #imm std::vector addresses; generateContiguousAddresses(sourceValues_[0].get(), 2, @@ -1008,8 +1050,10 @@ span Instruction::generateAddresses() { setMemoryAddresses(std::move(addresses)); break; } - case Opcode::AArch64_ST1_MXIPXX_H_B: // st1b {zath.b[ws, #imm]}, pg, - // [{, xm}] + case Opcode::AArch64_ST1_MXIPXX_H_B: // st1b {zath.b[ws, #imm]}, pg, + // [{, xm}] + // SME + [[fallthrough]]; case Opcode::AArch64_ST1_MXIPXX_V_B: { // st1b {zatv.b[ws, #imm]}, pg, // [{, xm}] // SME @@ -1029,8 +1073,10 @@ span Instruction::generateAddresses() { setMemoryAddresses(std::move(addresses)); break; } - case Opcode::AArch64_ST1_MXIPXX_H_D: // st1d {zath.d[ws, #imm]}, pg, - // [{, xm, lsl #3}] + case Opcode::AArch64_ST1_MXIPXX_H_D: // st1d {zath.d[ws, #imm]}, pg, + // [{, xm, lsl #3}] + // SME + [[fallthrough]]; case Opcode::AArch64_ST1_MXIPXX_V_D: { // st1d {zatv.d[ws, #imm]}, pg, // [{, xm, lsl #3}] // SME @@ -1050,8 +1096,10 @@ span Instruction::generateAddresses() { setMemoryAddresses(std::move(addresses)); break; } - case Opcode::AArch64_ST1_MXIPXX_H_H: // st1h {zath.h[ws, #imm]}, pg, - // [{, xm, lsl #1}] + case Opcode::AArch64_ST1_MXIPXX_H_H: // st1h {zath.h[ws, #imm]}, pg, + // [{, xm, lsl #1}] + // SME + [[fallthrough]]; case Opcode::AArch64_ST1_MXIPXX_V_H: { // st1h {zatv.h[ws, #imm]}, pg, // [{, xm, lsl #1}] // SME @@ -1073,6 +1121,8 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_ST1_MXIPXX_H_Q: // st1q {zath.q[ws]}, pg, [{, // xm, lsl #4}] + // SME + [[fallthrough]]; case Opcode::AArch64_ST1_MXIPXX_V_Q: { // st1q {zatv.q[ws]}, pg, // [{, xm, lsl #4}] // SME @@ -1092,8 +1142,10 @@ span Instruction::generateAddresses() { setMemoryAddresses(std::move(addresses)); break; } - case Opcode::AArch64_ST1_MXIPXX_H_S: // st1w {zath.s[ws, #imm]}, pg/z, - // [{, xm, LSL #2}] + case Opcode::AArch64_ST1_MXIPXX_H_S: // st1w {zath.s[ws, #imm]}, pg/z, + // [{, xm, LSL #2}] + // SME + [[fallthrough]]; case Opcode::AArch64_ST1_MXIPXX_V_S: { // st1w {zatv.s[ws, #imm]}, pg/z, // [{, xm, LSL #2}] // SME @@ -1471,15 +1523,24 @@ span Instruction::generateAddresses() { setMemoryAddresses({{sourceValues_[1].get(), 8}}); break; } - case Opcode::AArch64_STPDi: // stp dt1, dt2, [xn, #imm] - case Opcode::AArch64_STPDpre: // stp dt1, dt2, [xn, #imm]! - case Opcode::AArch64_STPQi: // stp qt1, qt2, [xn, #imm] - case Opcode::AArch64_STPQpre: // stp qt1, qt2, [xn, #imm]! - case Opcode::AArch64_STPSi: // stp st1, st2, [xn, #imm] - case Opcode::AArch64_STPSpre: // stp st1, st2, [xn, #imm]! - case Opcode::AArch64_STPWi: // stp wt1, wt2, [xn, #imm] - case Opcode::AArch64_STPWpre: // stp wt1, wt2, [xn, #imm]! - case Opcode::AArch64_STPXi: // stp xt1, xt2, [xn, #imm] + case Opcode::AArch64_STPDi: // stp dt1, dt2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STPDpre: // stp dt1, dt2, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STPQi: // stp qt1, qt2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STPQpre: // stp qt1, qt2, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STPSi: // stp st1, st2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STPSpre: // stp st1, st2, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STPWi: // stp wt1, wt2, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STPWpre: // stp wt1, wt2, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STPXi: // stp xt1, xt2, [xn, #imm] + [[fallthrough]]; case Opcode::AArch64_STPXpre: { // stp xt1, xt2, [xn, #imm]! std::vector addresses; generateContiguousAddresses( @@ -1488,10 +1549,14 @@ span Instruction::generateAddresses() { setMemoryAddresses(addresses); break; } - case Opcode::AArch64_STPDpost: // stp dt1, dt2, [xn], #imm - case Opcode::AArch64_STPQpost: // stp qt1, qt2, [xn], #imm - case Opcode::AArch64_STPSpost: // stp st1, st2, [xn], #imm - case Opcode::AArch64_STPWpost: // stp wt1, wt2, [xn], #imm + case Opcode::AArch64_STPDpost: // stp dt1, dt2, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STPQpost: // stp qt1, qt2, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STPSpost: // stp st1, st2, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STPWpost: // stp wt1, wt2, [xn], #imm + [[fallthrough]]; case Opcode::AArch64_STPXpost: { // stp xt1, xt2, [xn], #imm std::vector addresses; generateContiguousAddresses(sourceValues_[2].get(), 2, @@ -1541,19 +1606,32 @@ span Instruction::generateAddresses() { setMemoryAddresses({{sourceValues_[1].get() + offset, 8}}); break; } - case Opcode::AArch64_STRBui: // str bt, [xn, #imm] - case Opcode::AArch64_STRBpre: // str bt, [xn, #imm]! - case Opcode::AArch64_STRDui: // str dt, [xn, #imm] - case Opcode::AArch64_STRDpre: // str dt, [xn, #imm]! - case Opcode::AArch64_STRHui: // str ht, [xn, #imm] - case Opcode::AArch64_STRHpre: // str ht, [xn, #imm]! - case Opcode::AArch64_STRQui: // str qt, [xn, #imm] - case Opcode::AArch64_STRQpre: // str qt, [xn, #imm]! - case Opcode::AArch64_STRSui: // str st, [xn, #imm] - case Opcode::AArch64_STRSpre: // str st, [xn, #imm]! - case Opcode::AArch64_STRWui: // str wt, [xn, #imm] - case Opcode::AArch64_STRWpre: // str wt, [xn, #imm]! - case Opcode::AArch64_STRXui: // str xt, [xn, #imm] + case Opcode::AArch64_STRBui: // str bt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STRBpre: // str bt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STRDui: // str dt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STRDpre: // str dt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STRHui: // str ht, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STRHpre: // str ht, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STRQui: // str qt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STRQpre: // str qt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STRSui: // str st, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STRSpre: // str st, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STRWui: // str wt, [xn, #imm] + [[fallthrough]]; + case Opcode::AArch64_STRWpre: // str wt, [xn, #imm]! + [[fallthrough]]; + case Opcode::AArch64_STRXui: // str xt, [xn, #imm] + [[fallthrough]]; case Opcode::AArch64_STRXpre: { // str xt, [xn, #imm]! std::vector addresses; generateContiguousAddresses( @@ -1562,12 +1640,18 @@ span Instruction::generateAddresses() { setMemoryAddresses(addresses); break; } - case Opcode::AArch64_STRBpost: // str bt, [xn], #imm - case Opcode::AArch64_STRDpost: // str dt, [xn], #imm - case Opcode::AArch64_STRHpost: // str ht, [xn], #imm - case Opcode::AArch64_STRQpost: // str qt, [xn], #imm - case Opcode::AArch64_STRSpost: // str st, [xn], #imm - case Opcode::AArch64_STRWpost: // str wt, [xn], #imm + case Opcode::AArch64_STRBpost: // str bt, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STRDpost: // str dt, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STRHpost: // str ht, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STRQpost: // str qt, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STRSpost: // str st, [xn], #imm + [[fallthrough]]; + case Opcode::AArch64_STRWpost: // str wt, [xn], #imm + [[fallthrough]]; case Opcode::AArch64_STRXpost: { // str xt, [xn], #imm std::vector addresses; generateContiguousAddresses(sourceValues_[1].get(), 1, diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc index 585e75df01..38915d3186 100644 --- a/src/lib/arch/aarch64/Instruction_decode.cc +++ b/src/lib/arch/aarch64/Instruction_decode.cc @@ -733,7 +733,7 @@ void Instruction::decode() { } else if (isInstruction(InsnType::isShift)) group += 2; else - group += 3; // Default is {Data type}_SIMPLE_ARTH + group += 3; // Default is {Data type}_SIMPLE_ARTH_NOSHIFT instructionGroup_ = group; } diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index 7e2478b06a..8f222c11ac 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -69,7 +69,7 @@ void Instruction::execute() { // 0th bit of SVCR register determines if streaming-mode is enabled. const bool SMenabled = architecture_.isStreamingModeEnabled(); // 1st bit of SVCR register determines if ZA register is enabled. - const bool ZAenabled = architecture_.isZA_RegisterEnabled(); + const bool ZAenabled = architecture_.isZARegisterEnabled(); // When streaming mode is enabled, the architectural vector length goes from // SVE's VL to SME's SVL. const uint16_t VL_bits = SMenabled ? architecture_.getStreamingVectorLength() @@ -125,8 +125,9 @@ void Instruction::execute() { const uint64_t* zaRow = sourceValues_[row].getAsVector(); uint64_t out[32] = {0}; std::memcpy(out, zaRow, rowCount * sizeof(uint64_t)); - // Slice element is active IFF: - // - Element in 1st source pred corresponding to horiz. slice is TRUE + // Slice element is active IFF all of the following conditions hold: + // - Element in 1st source pred corresponding to horizontal + // slice is TRUE // - Corresponding element in 2nd source pred is TRUE const uint64_t shifted_active_pn = 1ull << ((row % 8) * 8); if (pn[row / 8] & shifted_active_pn) { @@ -158,8 +159,9 @@ void Instruction::execute() { const uint32_t* zaRow = sourceValues_[row].getAsVector(); uint32_t out[64] = {0}; std::memcpy(out, zaRow, rowCount * sizeof(uint32_t)); - // Slice element is active IFF: - // - Element in 1st source pred corresponding to horiz. slice is TRUE + // Slice element is active IFF all of the following conditions hold: + // - Element in 1st source pred corresponding to horizontal + // slice is TRUE // - Corresponding element in 2nd source pred is TRUE const uint64_t shifted_active_pn = 1ull << ((row % 16) * 4); if (pn[row / 16] & shifted_active_pn) { @@ -191,9 +193,10 @@ void Instruction::execute() { const uint64_t* zaRow = sourceValues_[row].getAsVector(); uint64_t out[32] = {0}; std::memcpy(out, zaRow, rowCount * sizeof(uint64_t)); - // Slice element is active IFF: + // Slice element is active IFF all of the following conditions hold: // - Corresponding element in 1st source pred is TRUE - // - Element in 2nd source pred corresponding to vert. slice is TRUE + // - Element in 2nd source pred corresponding to vertical + // slice is TRUE const uint64_t shifted_active_pn = 1ull << ((row % 8) * 8); if (pn[row / 8] & shifted_active_pn) { // Corresponding slice element is active (i.e. all elements in row). @@ -227,9 +230,10 @@ void Instruction::execute() { const uint32_t* zaRow = sourceValues_[row].getAsVector(); uint32_t out[64] = {0}; std::memcpy(out, zaRow, rowCount * sizeof(uint32_t)); - // Slice element is active IFF: + // Slice element is active IFF all of the following conditions hold: // - Corresponding element in 1st source pred is TRUE - // - Element in 2nd source pred corresponding to vert. slice is TRUE + // - Element in 2nd source pred corresponding to vertical + // slice is TRUE const uint64_t shifted_active_pn = 1ull << ((row % 16) * 4); if (pn[row / 16] & shifted_active_pn) { // Corresponding slice element is active (i.e. all elements in row). @@ -3177,11 +3181,12 @@ void Instruction::execute() { const uint8_t* zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - uint8_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint8_t* row = sourceValues_[i].getAsVector(); + uint8_t out[256] = {0}; + memcpy(out, row, rowCount * sizeof(uint8_t)); uint64_t shifted_active = 1ull << (i % 64); - if (pg[i / 64] & shifted_active) row[sliceNum] = zn[i]; - results_[i] = {(char*)row, 256}; + if (pg[i / 64] & shifted_active) out[sliceNum] = zn[i]; + results_[i] = {out, 256}; } break; } @@ -3203,11 +3208,12 @@ void Instruction::execute() { sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - uint64_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint64_t* row = sourceValues_[i].getAsVector(); + uint64_t out[32] = {0}; + memcpy(out, row, rowCount * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 8) * 8); - if (pg[i / 8] & shifted_active) row[sliceNum] = zn[i]; - results_[i] = {(char*)row, 256}; + if (pg[i / 8] & shifted_active) out[sliceNum] = zn[i]; + results_[i] = {out, 256}; } break; } @@ -3229,11 +3235,12 @@ void Instruction::execute() { sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - uint16_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint16_t* row = sourceValues_[i].getAsVector(); + uint16_t out[128] = {0}; + memcpy(out, row, rowCount * sizeof(uint16_t)); uint64_t shifted_active = 1ull << ((i % 32) * 2); - if (pg[i / 32] & shifted_active) row[sliceNum] = zn[i]; - results_[i] = {(char*)row, 256}; + if (pg[i / 32] & shifted_active) out[sliceNum] = zn[i]; + results_[i] = {out, 256}; } break; } @@ -3254,16 +3261,18 @@ void Instruction::execute() { for (uint16_t i = 0; i < rowCount; i++) { // Use uint64_t in place of 128-bit - uint64_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint64_t* row = sourceValues_[i].getAsVector(); + uint64_t out[32] = {0}; + // *2 in memcpy as need 128-bit elements but using uint64_t + memcpy(out, row, rowCount * sizeof(uint64_t) * 2); // For 128-bit there are 16-bit for each active element uint64_t shifted_active = 1ull << ((i % 4) * 16); if (pg[i / 4] & shifted_active) { // Need to move two consecutive 64-bit elements - row[2 * sliceNum] = zn[2 * i]; - row[2 * sliceNum + 1] = zn[2 * i + 1]; + out[2 * sliceNum] = zn[2 * i]; + out[2 * sliceNum + 1] = zn[2 * i + 1]; } - results_[i] = {(char*)row, 256}; + results_[i] = {out, 256}; } break; } @@ -3285,11 +3294,12 @@ void Instruction::execute() { sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - uint32_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint32_t* row = sourceValues_[i].getAsVector(); + uint32_t out[64] = {0}; + memcpy(out, row, rowCount * sizeof(uint32_t)); uint64_t shifted_active = 1ull << ((i % 16) * 4); - if (pg[i / 16] & shifted_active) row[sliceNum] = zn[i]; - results_[i] = {(char*)row, 256}; + if (pg[i / 16] & shifted_active) out[sliceNum] = zn[i]; + results_[i] = {out, 256}; } break; } @@ -3507,15 +3517,14 @@ void Instruction::execute() { const uint8_t* data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - uint8_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint8_t* row = sourceValues_[i].getAsVector(); + uint8_t out[256] = {0}; + memcpy(out, row, partition_num * sizeof(uint8_t)); uint64_t shifted_active = 1ull << (i % 64); if (pg[i / 64] & shifted_active) { - row[sliceNum] = data[i]; - } else { - row[sliceNum] = 0; + out[sliceNum] = data[i]; } - results_[i] = RegisterValue(reinterpret_cast(row), 256); + results_[i] = RegisterValue(out, 256); } break; } @@ -3535,15 +3544,14 @@ void Instruction::execute() { const uint64_t* data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - uint64_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint64_t* row = sourceValues_[i].getAsVector(); + uint64_t out[32] = {0}; + memcpy(out, row, partition_num * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 8) * 8); if (pg[i / 8] & shifted_active) { - row[sliceNum] = data[i]; - } else { - row[sliceNum] = 0; + out[sliceNum] = data[i]; } - results_[i] = RegisterValue(reinterpret_cast(row), 256); + results_[i] = RegisterValue(out, 256); } break; } @@ -3563,15 +3571,14 @@ void Instruction::execute() { const uint16_t* data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - uint16_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint16_t* row = sourceValues_[i].getAsVector(); + uint16_t out[128] = {0}; + memcpy(out, row, partition_num * sizeof(uint16_t)); uint64_t shifted_active = 1ull << ((i % 32) * 2); if (pg[i / 32] & shifted_active) { - row[sliceNum] = data[i]; - } else { - row[sliceNum] = 0; + out[sliceNum] = data[i]; } - results_[i] = RegisterValue(reinterpret_cast(row), 256); + results_[i] = RegisterValue(out, 256); } break; } @@ -3592,20 +3599,18 @@ void Instruction::execute() { for (int i = 0; i < partition_num; i++) { // Using uint64_t as no 128-bit data type - uint64_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint64_t* row = sourceValues_[i].getAsVector(); + uint64_t out[32] = {0}; + // *2 in memcpy as need 128-bit but using uint64_t + memcpy(out, row, partition_num * sizeof(uint64_t) * 2); // For 128-bit there are 16-bit for each active element uint64_t shifted_active = 1ull << ((i % 4) * 16); if (pg[i / 4] & shifted_active) { // As using uint64_t need to modify 2 elements - row[2 * sliceNum] = data[2 * i]; - row[2 * sliceNum + 1] = data[2 * i + 1]; - } else { - // As using uint64_t need to modify 2 elements - row[2 * sliceNum] = 0; - row[2 * sliceNum + 1] = 0; + out[2 * sliceNum] = data[2 * i]; + out[2 * sliceNum + 1] = data[2 * i + 1]; } - results_[i] = RegisterValue(reinterpret_cast(row), 256); + results_[i] = RegisterValue(out, 256); } break; } @@ -3625,15 +3630,14 @@ void Instruction::execute() { const uint32_t* data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - uint32_t* row = - const_cast(sourceValues_[i].getAsVector()); + const uint32_t* row = sourceValues_[i].getAsVector(); + uint32_t out[64] = {0}; + memcpy(out, row, partition_num * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 16) * 4); if (pg[i / 16] & shifted_active) { - row[sliceNum] = data[i]; - } else { - row[sliceNum] = 0; + out[sliceNum] = data[i]; } - results_[i] = RegisterValue(reinterpret_cast(row), 256); + results_[i] = RegisterValue(out, 256); } break; } diff --git a/src/lib/arch/aarch64/MicroDecoder.cc b/src/lib/arch/aarch64/MicroDecoder.cc index 0fc3233f4c..3376f34d61 100644 --- a/src/lib/arch/aarch64/MicroDecoder.cc +++ b/src/lib/arch/aarch64/MicroDecoder.cc @@ -620,7 +620,7 @@ uint8_t MicroDecoder::decode(const Architecture& architecture, uint32_t word, // Check if SVE or Predicate instructions need their group updating due // to SVE Streaming Mode activeness being different from when the // instruction was first decoded. - if (cachedUops[uop].checkStreamingGroup()) { + if (cachedUops[uop].checkStreamingGroupAndUpdate()) { // If the instruction's group has changed then update its execution // info. The newly set group is most likely to be the most accurate, // as an incorrect group allocation is only achieved when an diff --git a/test/unit/aarch64/ArchitectureTest.cc b/test/unit/aarch64/ArchitectureTest.cc index f9ed6cb678..8c6d06f52c 100644 --- a/test/unit/aarch64/ArchitectureTest.cc +++ b/test/unit/aarch64/ArchitectureTest.cc @@ -286,19 +286,19 @@ TEST_F(AArch64ArchitectureTest, get_set_SVCRVal) { TEST_F(AArch64ArchitectureTest, isSM_ZA_enabled) { EXPECT_FALSE(arch->isStreamingModeEnabled()); - EXPECT_FALSE(arch->isZA_RegisterEnabled()); + EXPECT_FALSE(arch->isZARegisterEnabled()); arch->setSVCRval(1); EXPECT_TRUE(arch->isStreamingModeEnabled()); - EXPECT_FALSE(arch->isZA_RegisterEnabled()); + EXPECT_FALSE(arch->isZARegisterEnabled()); arch->setSVCRval(2); EXPECT_FALSE(arch->isStreamingModeEnabled()); - EXPECT_TRUE(arch->isZA_RegisterEnabled()); + EXPECT_TRUE(arch->isZARegisterEnabled()); arch->setSVCRval(3); EXPECT_TRUE(arch->isStreamingModeEnabled()); - EXPECT_TRUE(arch->isZA_RegisterEnabled()); + EXPECT_TRUE(arch->isZARegisterEnabled()); arch->setSVCRval(0); EXPECT_FALSE(arch->isStreamingModeEnabled()); - EXPECT_FALSE(arch->isZA_RegisterEnabled()); + EXPECT_FALSE(arch->isZARegisterEnabled()); } } // namespace aarch64 diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 8c0077c45c..95366d96e7 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -642,7 +642,7 @@ TEST_F(AArch64InstructionTest, setters) { EXPECT_TRUE(insn.isWaitingCommit()); } -TEST_F(AArch64InstructionTest, checkStreamingGroup) { +TEST_F(AArch64InstructionTest, checkStreamingGroupAndUpdate) { EXPECT_FALSE(arch.isStreamingModeEnabled()); // Insn is `fdivr z1.s, p0/m, z1.s, z0.s` Instruction SVE_insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo()); @@ -655,42 +655,42 @@ TEST_F(AArch64InstructionTest, checkStreamingGroup) { Instruction PRED_insn = Instruction(arch, *pselMetadata.get(), MicroOpInfo()); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE); - // Without changing SVE Streaming Mode, calling checkStreamingGroup should - // have no effect + // Without changing SVE Streaming Mode, calling checkStreamingGroupAndUpdate + // should have no effect EXPECT_FALSE(arch.isStreamingModeEnabled()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE); - EXPECT_FALSE(SVE_insn.checkStreamingGroup()); - EXPECT_FALSE(nonSVE_insn.checkStreamingGroup()); - EXPECT_FALSE(PRED_insn.checkStreamingGroup()); + EXPECT_FALSE(SVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_FALSE(PRED_insn.checkStreamingGroupAndUpdate()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE); - // Updating SVE Streaming Mode should mean calling checkStreamingGroup changes - // SVE and PRED groups + // Updating SVE Streaming Mode should mean calling + // checkStreamingGroupAndUpdate changes SVE and PRED groups arch.setSVCRval(3); EXPECT_TRUE(arch.isStreamingModeEnabled()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE); - EXPECT_TRUE(SVE_insn.checkStreamingGroup()); - EXPECT_FALSE(nonSVE_insn.checkStreamingGroup()); - EXPECT_TRUE(PRED_insn.checkStreamingGroup()); + EXPECT_TRUE(SVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_TRUE(PRED_insn.checkStreamingGroupAndUpdate()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE); - // Calling checkStreamingGroup again should have no effect on SVE and PRED - // groups, and should return false as a result + // Calling checkStreamingGroupAndUpdate again should have no effect on SVE and + // PRED groups, and should return false as a result EXPECT_TRUE(arch.isStreamingModeEnabled()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE); - EXPECT_FALSE(SVE_insn.checkStreamingGroup()); - EXPECT_FALSE(nonSVE_insn.checkStreamingGroup()); - EXPECT_FALSE(PRED_insn.checkStreamingGroup()); + EXPECT_FALSE(SVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_FALSE(PRED_insn.checkStreamingGroupAndUpdate()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE); @@ -702,9 +702,9 @@ TEST_F(AArch64InstructionTest, checkStreamingGroup) { EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE); - EXPECT_TRUE(SVE_insn.checkStreamingGroup()); - EXPECT_FALSE(nonSVE_insn.checkStreamingGroup()); - EXPECT_TRUE(PRED_insn.checkStreamingGroup()); + EXPECT_TRUE(SVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate()); + EXPECT_TRUE(PRED_insn.checkStreamingGroupAndUpdate()); EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT); EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH); EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE);