diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0e6f7e637e..542049f2f8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,7 +72,7 @@ set(CMAKE_MACOSX_RPATH 1)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
 # Create variable to enable additional compiler warnings for SimEng targets only
-set(SIMENG_COMPILE_OPTIONS -Wall -pedantic) #-Wextra
+set(SIMENG_COMPILE_OPTIONS -Wall -pedantic -Werror) #-Wextra
 
 # Disable RTTI for all targets
 add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fno-rtti>)
diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml
index b9159b8848..859fe2283a 100644
--- a/configs/a64fx_SME.yaml
+++ b/configs/a64fx_SME.yaml
@@ -62,12 +62,7 @@ Ports:
     Instruction-Group-Support:
     - INT_SIMPLE
     - INT_MUL
-    - STORE_DATA_INT
-    - STORE_DATA_SCALAR
-    - STORE_DATA_VECTOR
-    - STORE_DATA_SVE
-    - STORE_DATA_STREAMING_SVE
-    - STORE_DATA_SME
+    - STORE_DATA
   3:
     Portname: FLB
     Instruction-Group-Support:
@@ -83,36 +78,16 @@ Ports:
   5:
     Portname: EAGA
     Instruction-Group-Support: 
-    - LOAD_INT
-    - LOAD_SCALAR
-    - LOAD_VECTOR
-    - LOAD_SVE
-    - LOAD_STREAMING_SVE
-    - LOAD_SME
-    - STORE_ADDRESS_INT
-    - STORE_ADDRESS_SCALAR
-    - STORE_ADDRESS_VECTOR
-    - STORE_ADDRESS_SVE
-    - STORE_ADDRESS_STREAMING_SVE
-    - STORE_ADDRESS_SME
+    - LOAD
+    - STORE_ADDRESS
     - INT_SIMPLE_ARTH_NOSHIFT
     - INT_SIMPLE_LOGICAL_NOSHIFT
     - INT_SIMPLE_CMP
   6:
     Portname: EAGB
     Instruction-Group-Support:
-    - LOAD_INT
-    - LOAD_SCALAR
-    - LOAD_VECTOR
-    - LOAD_SVE
-    - LOAD_STREAMING_SVE
-    - LOAD_SME
-    - STORE_ADDRESS_INT
-    - STORE_ADDRESS_SCALAR
-    - STORE_ADDRESS_VECTOR
-    - STORE_ADDRESS_SVE
-    - STORE_ADDRESS_STREAMING_SVE
-    - STORE_ADDRESS_SME
+    - LOAD
+    - STORE_ADDRESS
     - INT_SIMPLE_ARTH_NOSHIFT
     - INT_SIMPLE_LOGICAL_NOSHIFT
     - INT_SIMPLE_CMP
diff --git a/docs/sphinx/assets/instruction_groups_AArch64.png b/docs/sphinx/assets/instruction_groups_AArch64.png
index ba3319435f..f877fd7795 100644
Binary files a/docs/sphinx/assets/instruction_groups_AArch64.png and b/docs/sphinx/assets/instruction_groups_AArch64.png differ
diff --git a/src/include/simeng/Register.hh b/src/include/simeng/Register.hh
index 5758d8e67b..0152813268 100644
--- a/src/include/simeng/Register.hh
+++ b/src/include/simeng/Register.hh
@@ -1,6 +1,5 @@
 #pragma once
 #include <cstdint>
-#include <iostream>
 
 namespace simeng {
 
diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh
index 1041bf85fe..a654fc897a 100644
--- a/src/include/simeng/arch/aarch64/Architecture.hh
+++ b/src/include/simeng/arch/aarch64/Architecture.hh
@@ -74,7 +74,7 @@ class Architecture : public arch::Architecture {
   bool isStreamingModeEnabled() const;
 
   /** Returns if the SME ZA Register is enabled. */
-  bool isZA_RegisterEnabled() const;
+  bool isZARegisterEnabled() const;
 
   /** Update the value of SVCRval_. */
   void setSVCRval(const uint64_t newVal) const;
diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh
index 94941b552d..44263432a1 100644
--- a/src/include/simeng/arch/aarch64/Instruction.hh
+++ b/src/include/simeng/arch/aarch64/Instruction.hh
@@ -374,7 +374,7 @@ class Instruction : public simeng::Instruction {
    * this instruction was first decoded, and updates the instruction group
    * accordingly if required.
    * Returns TRUE if the group was updated, FALSE otherwise. */
-  bool checkStreamingGroup();
+  bool checkStreamingGroupAndUpdate();
 
  private:
   /** Process the instruction's metadata to determine source/destination
diff --git a/src/include/simeng/arch/aarch64/InstructionGroups.hh b/src/include/simeng/arch/aarch64/InstructionGroups.hh
index 3c28712537..6c58ff4976 100644
--- a/src/include/simeng/arch/aarch64/InstructionGroups.hh
+++ b/src/include/simeng/arch/aarch64/InstructionGroups.hh
@@ -4,7 +4,33 @@ namespace simeng {
 namespace arch {
 namespace aarch64 {
 
-/** The IDs of the instruction groups for AArch64 instructions. */
+/** The IDs of the instruction groups for AArch64 instructions.
+ * Each new group must contain 14 entries to ensure correct group assignment and
+ * general functionality.
+ * Their order must be as follows:
+ *  - BASE
+ *  - BASE_SIMPLE
+ *  - BASE_SIMPLE_ARTH
+ *  - BASE_SIMPLE_ARTH_NOSHIFT
+ *  - BASE_SIMPLE_LOGICAL
+ *  - BASE_SIMPLE_LOGICAL_NOSHIFT
+ *  - BASE_SIMPLE_CMP
+ *  - BASE_SIMPLE_CVT
+ *  - BASE_MUL
+ *  - BASE_DIV_OR_SQRT
+ *  - LOAD_BASE
+ *  - STORE_ADDRESS_BASE
+ *  - STORE_DATA_BASE
+ *  - STORE_BASE
+ *
+ * An exception to the above is "Parent" groups which do not require the LOAD_*
+ * or STORE_* groups.
+ * "Parent" groups allow for easier grouping of similar groups that may have
+ * identical execution latencies, ports, etc. For example, FP is the parent
+ * group of SCALAR and VECTOR.
+ * In simulation, an instruction's allocated group will never be a "Parent"
+ * group; they are only used to simplify config file creation and management.
+ */
 namespace InstructionGroups {
 const uint16_t INT = 0;
 const uint16_t INT_SIMPLE = 1;
diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc
index 9b8a20dd6d..fea53d7767 100644
--- a/src/lib/arch/aarch64/Architecture.cc
+++ b/src/lib/arch/aarch64/Architecture.cc
@@ -193,7 +193,7 @@ uint8_t Architecture::predecode(const uint8_t* ptr, uint16_t bytesAvailable,
     // Check if SVE or Predicate instructions need their group updating due to
     // SVE Streaming Mode activeness being different from when the instruction
     // was first decoded.
-    if (cachedInsn.checkStreamingGroup()) {
+    if (cachedInsn.checkStreamingGroupAndUpdate()) {
       // If the instruction's group has changed then update its execution info.
       // The newly set group is most likely to be the most accurate, as an
       // incorrect group allocation is only achieved when an exception/flush is
@@ -299,7 +299,7 @@ void Architecture::setSVCRval(const uint64_t newVal) const {
 bool Architecture::isStreamingModeEnabled() const { return SVCRval_ & 1; }
 
 // 1st bit of SVCR register determines if ZA register is enabled.
-bool Architecture::isZA_RegisterEnabled() const { return SVCRval_ & 2; }
+bool Architecture::isZARegisterEnabled() const { return SVCRval_ & 2; }
 
 }  // namespace aarch64
 }  // namespace arch
diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc
index 967a72b7ba..2811c72625 100644
--- a/src/lib/arch/aarch64/Instruction.cc
+++ b/src/lib/arch/aarch64/Instruction.cc
@@ -164,7 +164,7 @@ const Architecture& Instruction::getArchitecture() const {
 
 InstructionException Instruction::getException() const { return exception_; }
 
-bool Instruction::checkStreamingGroup() {
+bool Instruction::checkStreamingGroupAndUpdate() {
   // Only instruction groups that depend on SVE Streaming Mode are SVE and
   // PREDICATE
   const uint16_t currentGroup = instructionGroup_;
diff --git a/src/lib/arch/aarch64/Instruction_address.cc b/src/lib/arch/aarch64/Instruction_address.cc
index ef5f73f15c..f3026b7b50 100644
--- a/src/lib/arch/aarch64/Instruction_address.cc
+++ b/src/lib/arch/aarch64/Instruction_address.cc
@@ -91,8 +91,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({{sourceValues_[2].get<uint64_t>(), 8}});
         break;
       }
-      case Opcode::AArch64_LD1_MXIPXX_V_B:    // ld1b {zatv.b[ws, #imm]}, pg/z,
-                                              // [<xn|sp>{, xm}]
+      case Opcode::AArch64_LD1_MXIPXX_V_B:  // ld1b {zatv.b[ws, #imm]}, pg/z,
+                                            // [<xn|sp>{, xm}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_LD1_MXIPXX_H_B: {  // ld1b {zath.b[ws, #imm]}, pg/z,
                                               // [<xn|sp>{, xm}]
         // SME
@@ -104,8 +106,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({(n + m), static_cast<uint16_t>(VL_bits / 8)});
         break;
       }
-      case Opcode::AArch64_LD1_MXIPXX_V_D:    // ld1d {zatv.d[ws, #imm]}, pg/z,
-                                              // [<xn|sp>{, xm, lsl #3}]
+      case Opcode::AArch64_LD1_MXIPXX_V_D:  // ld1d {zatv.d[ws, #imm]}, pg/z,
+                                            // [<xn|sp>{, xm, lsl #3}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_LD1_MXIPXX_H_D: {  // ld1d {zath.d[ws, #imm]}, pg/z,
                                               // [<xn|sp>{, xm, lsl #3}]
         // SME
@@ -117,8 +121,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({(n + m), static_cast<uint16_t>(VL_bits / 8)});
         break;
       }
-      case Opcode::AArch64_LD1_MXIPXX_V_H:    // ld1h {zatv.h[ws, #imm]}, pg/z,
-                                              // [<xn|sp>{, xm, lsl #1}]
+      case Opcode::AArch64_LD1_MXIPXX_V_H:  // ld1h {zatv.h[ws, #imm]}, pg/z,
+                                            // [<xn|sp>{, xm, lsl #1}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_LD1_MXIPXX_H_H: {  // ld1h {zath.h[ws, #imm]}, pg/z,
                                               // [<xn|sp>{, xm, lsl #1}]
         // SME
@@ -130,8 +136,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({(n + m), static_cast<uint16_t>(VL_bits / 8)});
         break;
       }
-      case Opcode::AArch64_LD1_MXIPXX_V_Q:    // ld1q {zatv.q[ws]}, pg/z,
-                                              // [<xn|sp>{, xm, lsl #4}]
+      case Opcode::AArch64_LD1_MXIPXX_V_Q:  // ld1q {zatv.q[ws]}, pg/z,
+                                            // [<xn|sp>{, xm, lsl #4}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_LD1_MXIPXX_H_Q: {  // ld1q {zath.q[ws]}, pg/z,
                                               // [<xn|sp>{, xm, lsl #4}]
         // SME
@@ -143,8 +151,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({(n + m), static_cast<uint16_t>(VL_bits / 8)});
         break;
       }
-      case Opcode::AArch64_LD1_MXIPXX_V_S:    // ld1w {zatv.s[ws, #imm]}, pg/z,
-                                              // [<xn|sp>{, xm, LSL #2}]
+      case Opcode::AArch64_LD1_MXIPXX_V_S:  // ld1w {zatv.s[ws, #imm]}, pg/z,
+                                            // [<xn|sp>{, xm, LSL #2}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_LD1_MXIPXX_H_S: {  // ld1w {zath.s[ws, #imm]}, pg/z,
                                               // [<xn|sp>{, xm, LSL #2}]
         // SME
@@ -551,19 +561,32 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({{sourceValues_[0].get<uint64_t>() + offset, 8}});
         break;
       }
-      case Opcode::AArch64_LDRBui:     // ldr bt, [xn, #imm]
-      case Opcode::AArch64_LDRBpre:    // ldr bt, [xn, #imm]!
-      case Opcode::AArch64_LDRDui:     // ldr dt, [xn, #imm]
-      case Opcode::AArch64_LDRDpre:    // ldr dt, [xn, #imm]!
-      case Opcode::AArch64_LDRHui:     // ldr ht, [xn, #imm]
-      case Opcode::AArch64_LDRHpre:    // ldr ht, [xn, #imm]!
-      case Opcode::AArch64_LDRQui:     // ldr qt, [xn, #imm]
-      case Opcode::AArch64_LDRQpre:    // ldr qt, [xn, #imm]!
-      case Opcode::AArch64_LDRSui:     // ldr st, [xn, #imm]
-      case Opcode::AArch64_LDRSpre:    // ldr st, [xn, #imm]!
-      case Opcode::AArch64_LDRWui:     // ldr wt, [xn, #imm]
-      case Opcode::AArch64_LDRWpre:    // ldr wt, [xn, #imm]!
-      case Opcode::AArch64_LDRXui:     // ldr xt, [xn, #imm]
+      case Opcode::AArch64_LDRBui:  // ldr bt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDRBpre:  // ldr bt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_LDRDui:  // ldr dt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDRDpre:  // ldr dt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_LDRHui:  // ldr ht, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDRHpre:  // ldr ht, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_LDRQui:  // ldr qt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDRQpre:  // ldr qt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_LDRSui:  // ldr st, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDRSpre:  // ldr st, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_LDRWui:  // ldr wt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDRWpre:  // ldr wt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_LDRXui:  // ldr xt, [xn, #imm]
+        [[fallthrough]];
       case Opcode::AArch64_LDRXpre: {  // ldr xt, [xn, #imm]!
         std::vector<simeng::memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(
@@ -572,12 +595,18 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(addresses);
         break;
       }
-      case Opcode::AArch64_LDRBpost:    // ldr bt, [xn], #imm
-      case Opcode::AArch64_LDRDpost:    // ldr dt, [xn], #imm
-      case Opcode::AArch64_LDRHpost:    // ldr ht, [xn], #imm
-      case Opcode::AArch64_LDRQpost:    // ldr qt, [xn], #imm
-      case Opcode::AArch64_LDRSpost:    // ldr st, [xn], #imm
-      case Opcode::AArch64_LDRWpost:    // ldr wt, [xn], #imm
+      case Opcode::AArch64_LDRBpost:  // ldr bt, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDRDpost:  // ldr dt, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDRHpost:  // ldr ht, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDRQpost:  // ldr qt, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDRSpost:  // ldr st, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDRWpost:  // ldr wt, [xn], #imm
+        [[fallthrough]];
       case Opcode::AArch64_LDRXpost: {  // ldr xt, [xn], #imm
         std::vector<memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(sourceValues_[0].get<uint64_t>(), 1,
@@ -695,15 +724,24 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({{base, 4}, {base + 4, 4}});
         break;
       }
-      case Opcode::AArch64_LDPDi:      // ldp dt1, dt2, [xn, #imm]
-      case Opcode::AArch64_LDPDpre:    // ldp dt1, dt2, [xn, #imm!]
-      case Opcode::AArch64_LDPQi:      // ldp qt1, qt2, [xn, #imm]
-      case Opcode::AArch64_LDPQpre:    // ldp qt1, qt2, [xn, #imm!]
-      case Opcode::AArch64_LDPSi:      // ldp st1, st2, [xn, #imm]
-      case Opcode::AArch64_LDPSpre:    // ldp st1, st2, [xn, #imm!]
-      case Opcode::AArch64_LDPWi:      // ldp wt1, wt2, [xn, #imm]
-      case Opcode::AArch64_LDPWpre:    // ldp wt1, wt2, [xn, #imm!]
-      case Opcode::AArch64_LDPXi:      // ldp xt1, xt2, [xn, #imm]
+      case Opcode::AArch64_LDPDi:  // ldp dt1, dt2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPDpre:  // ldp dt1, dt2, [xn, #imm!]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPQi:  // ldp qt1, qt2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPQpre:  // ldp qt1, qt2, [xn, #imm!]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPSi:  // ldp st1, st2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPSpre:  // ldp st1, st2, [xn, #imm!]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPWi:  // ldp wt1, wt2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPWpre:  // ldp wt1, wt2, [xn, #imm!]
+        [[fallthrough]];
+      case Opcode::AArch64_LDPXi:  // ldp xt1, xt2, [xn, #imm]
+        [[fallthrough]];
       case Opcode::AArch64_LDPXpre: {  // ldp xt1, xt2, [xn, #imm!]
         std::vector<simeng::memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(
@@ -712,10 +750,14 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(addresses);
         break;
       }
-      case Opcode::AArch64_LDPDpost:    // ldp dt1, dt2, [xn], #imm
-      case Opcode::AArch64_LDPQpost:    // ldp qt1, qt2, [xn], #imm
-      case Opcode::AArch64_LDPSpost:    // ldp st1, st2, [xn], #imm
-      case Opcode::AArch64_LDPWpost:    // ldp wt1, wt2, [xn], #imm
+      case Opcode::AArch64_LDPDpost:  // ldp dt1, dt2, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDPQpost:  // ldp qt1, qt2, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDPSpost:  // ldp st1, st2, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_LDPWpost:  // ldp wt1, wt2, [xn], #imm
+        [[fallthrough]];
       case Opcode::AArch64_LDPXpost: {  // ldp xt1, xt2, [xn], #imm
         std::vector<memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(sourceValues_[0].get<uint64_t>(), 2,
@@ -1008,8 +1050,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(std::move(addresses));
         break;
       }
-      case Opcode::AArch64_ST1_MXIPXX_H_B:    // st1b {zath.b[ws, #imm]}, pg,
-                                              // [<xn|sp>{, xm}]
+      case Opcode::AArch64_ST1_MXIPXX_H_B:  // st1b {zath.b[ws, #imm]}, pg,
+                                            // [<xn|sp>{, xm}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_ST1_MXIPXX_V_B: {  // st1b {zatv.b[ws, #imm]}, pg,
                                               // [<xn|sp>{, xm}]
         // SME
@@ -1029,8 +1073,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(std::move(addresses));
         break;
       }
-      case Opcode::AArch64_ST1_MXIPXX_H_D:    // st1d {zath.d[ws, #imm]}, pg,
-                                              // [<xn|sp>{, xm, lsl #3}]
+      case Opcode::AArch64_ST1_MXIPXX_H_D:  // st1d {zath.d[ws, #imm]}, pg,
+                                            // [<xn|sp>{, xm, lsl #3}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_ST1_MXIPXX_V_D: {  // st1d {zatv.d[ws, #imm]}, pg,
                                               // [<xn|sp>{, xm, lsl #3}]
         // SME
@@ -1050,8 +1096,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(std::move(addresses));
         break;
       }
-      case Opcode::AArch64_ST1_MXIPXX_H_H:    // st1h {zath.h[ws, #imm]}, pg,
-                                              // [<xn|sp>{, xm, lsl #1}]
+      case Opcode::AArch64_ST1_MXIPXX_H_H:  // st1h {zath.h[ws, #imm]}, pg,
+                                            // [<xn|sp>{, xm, lsl #1}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_ST1_MXIPXX_V_H: {  // st1h {zatv.h[ws, #imm]}, pg,
                                               // [<xn|sp>{, xm, lsl #1}]
         // SME
@@ -1073,6 +1121,8 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
       }
       case Opcode::AArch64_ST1_MXIPXX_H_Q:  // st1q {zath.q[ws]}, pg, [<xn|sp>{,
                                             // xm, lsl #4}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_ST1_MXIPXX_V_Q: {  // st1q {zatv.q[ws]}, pg,
                                               // [<xn|sp>{, xm, lsl #4}]
         // SME
@@ -1092,8 +1142,10 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(std::move(addresses));
         break;
       }
-      case Opcode::AArch64_ST1_MXIPXX_H_S:    // st1w {zath.s[ws, #imm]}, pg/z,
-                                              // [<xn|sp>{, xm, LSL #2}]
+      case Opcode::AArch64_ST1_MXIPXX_H_S:  // st1w {zath.s[ws, #imm]}, pg/z,
+                                            // [<xn|sp>{, xm, LSL #2}]
+        // SME
+        [[fallthrough]];
       case Opcode::AArch64_ST1_MXIPXX_V_S: {  // st1w {zatv.s[ws, #imm]}, pg/z,
                                               // [<xn|sp>{, xm, LSL #2}]
         // SME
@@ -1471,15 +1523,24 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({{sourceValues_[1].get<uint64_t>(), 8}});
         break;
       }
-      case Opcode::AArch64_STPDi:      // stp dt1, dt2, [xn, #imm]
-      case Opcode::AArch64_STPDpre:    // stp dt1, dt2, [xn, #imm]!
-      case Opcode::AArch64_STPQi:      // stp qt1, qt2, [xn, #imm]
-      case Opcode::AArch64_STPQpre:    // stp qt1, qt2, [xn, #imm]!
-      case Opcode::AArch64_STPSi:      // stp st1, st2, [xn, #imm]
-      case Opcode::AArch64_STPSpre:    // stp st1, st2, [xn, #imm]!
-      case Opcode::AArch64_STPWi:      // stp wt1, wt2, [xn, #imm]
-      case Opcode::AArch64_STPWpre:    // stp wt1, wt2, [xn, #imm]!
-      case Opcode::AArch64_STPXi:      // stp xt1, xt2, [xn, #imm]
+      case Opcode::AArch64_STPDi:  // stp dt1, dt2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STPDpre:  // stp dt1, dt2, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STPQi:  // stp qt1, qt2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STPQpre:  // stp qt1, qt2, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STPSi:  // stp st1, st2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STPSpre:  // stp st1, st2, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STPWi:  // stp wt1, wt2, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STPWpre:  // stp wt1, wt2, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STPXi:  // stp xt1, xt2, [xn, #imm]
+        [[fallthrough]];
       case Opcode::AArch64_STPXpre: {  // stp xt1, xt2, [xn, #imm]!
         std::vector<simeng::memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(
@@ -1488,10 +1549,14 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(addresses);
         break;
       }
-      case Opcode::AArch64_STPDpost:    // stp dt1, dt2, [xn], #imm
-      case Opcode::AArch64_STPQpost:    // stp qt1, qt2, [xn], #imm
-      case Opcode::AArch64_STPSpost:    // stp st1, st2, [xn], #imm
-      case Opcode::AArch64_STPWpost:    // stp wt1, wt2, [xn], #imm
+      case Opcode::AArch64_STPDpost:  // stp dt1, dt2, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STPQpost:  // stp qt1, qt2, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STPSpost:  // stp st1, st2, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STPWpost:  // stp wt1, wt2, [xn], #imm
+        [[fallthrough]];
       case Opcode::AArch64_STPXpost: {  // stp xt1, xt2, [xn], #imm
         std::vector<memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(sourceValues_[2].get<uint64_t>(), 2,
@@ -1541,19 +1606,32 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses({{sourceValues_[1].get<uint64_t>() + offset, 8}});
         break;
       }
-      case Opcode::AArch64_STRBui:     // str bt, [xn, #imm]
-      case Opcode::AArch64_STRBpre:    // str bt, [xn, #imm]!
-      case Opcode::AArch64_STRDui:     // str dt, [xn, #imm]
-      case Opcode::AArch64_STRDpre:    // str dt, [xn, #imm]!
-      case Opcode::AArch64_STRHui:     // str ht, [xn, #imm]
-      case Opcode::AArch64_STRHpre:    // str ht, [xn, #imm]!
-      case Opcode::AArch64_STRQui:     // str qt, [xn, #imm]
-      case Opcode::AArch64_STRQpre:    // str qt, [xn, #imm]!
-      case Opcode::AArch64_STRSui:     // str st, [xn, #imm]
-      case Opcode::AArch64_STRSpre:    // str st, [xn, #imm]!
-      case Opcode::AArch64_STRWui:     // str wt, [xn, #imm]
-      case Opcode::AArch64_STRWpre:    // str wt, [xn, #imm]!
-      case Opcode::AArch64_STRXui:     // str xt, [xn, #imm]
+      case Opcode::AArch64_STRBui:  // str bt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STRBpre:  // str bt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STRDui:  // str dt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STRDpre:  // str dt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STRHui:  // str ht, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STRHpre:  // str ht, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STRQui:  // str qt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STRQpre:  // str qt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STRSui:  // str st, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STRSpre:  // str st, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STRWui:  // str wt, [xn, #imm]
+        [[fallthrough]];
+      case Opcode::AArch64_STRWpre:  // str wt, [xn, #imm]!
+        [[fallthrough]];
+      case Opcode::AArch64_STRXui:  // str xt, [xn, #imm]
+        [[fallthrough]];
       case Opcode::AArch64_STRXpre: {  // str xt, [xn, #imm]!
         std::vector<simeng::memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(
@@ -1562,12 +1640,18 @@ span<const memory::MemoryAccessTarget> Instruction::generateAddresses() {
         setMemoryAddresses(addresses);
         break;
       }
-      case Opcode::AArch64_STRBpost:    // str bt, [xn], #imm
-      case Opcode::AArch64_STRDpost:    // str dt, [xn], #imm
-      case Opcode::AArch64_STRHpost:    // str ht, [xn], #imm
-      case Opcode::AArch64_STRQpost:    // str qt, [xn], #imm
-      case Opcode::AArch64_STRSpost:    // str st, [xn], #imm
-      case Opcode::AArch64_STRWpost:    // str wt, [xn], #imm
+      case Opcode::AArch64_STRBpost:  // str bt, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STRDpost:  // str dt, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STRHpost:  // str ht, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STRQpost:  // str qt, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STRSpost:  // str st, [xn], #imm
+        [[fallthrough]];
+      case Opcode::AArch64_STRWpost:  // str wt, [xn], #imm
+        [[fallthrough]];
       case Opcode::AArch64_STRXpost: {  // str xt, [xn], #imm
         std::vector<memory::MemoryAccessTarget> addresses;
         generateContiguousAddresses(sourceValues_[1].get<uint64_t>(), 1,
diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc
index 585e75df01..38915d3186 100644
--- a/src/lib/arch/aarch64/Instruction_decode.cc
+++ b/src/lib/arch/aarch64/Instruction_decode.cc
@@ -733,7 +733,7 @@ void Instruction::decode() {
   } else if (isInstruction(InsnType::isShift))
     group += 2;
   else
-    group += 3;  // Default is {Data type}_SIMPLE_ARTH
+    group += 3;  // Default is {Data type}_SIMPLE_ARTH_NOSHIFT
   instructionGroup_ = group;
 }
 
diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc
index 7e2478b06a..8f222c11ac 100644
--- a/src/lib/arch/aarch64/Instruction_execute.cc
+++ b/src/lib/arch/aarch64/Instruction_execute.cc
@@ -69,7 +69,7 @@ void Instruction::execute() {
   // 0th bit of SVCR register determines if streaming-mode is enabled.
   const bool SMenabled = architecture_.isStreamingModeEnabled();
   // 1st bit of SVCR register determines if ZA register is enabled.
-  const bool ZAenabled = architecture_.isZA_RegisterEnabled();
+  const bool ZAenabled = architecture_.isZARegisterEnabled();
   // When streaming mode is enabled, the architectural vector length goes from
   // SVE's VL to SME's SVL.
   const uint16_t VL_bits = SMenabled ? architecture_.getStreamingVectorLength()
@@ -125,8 +125,9 @@ void Instruction::execute() {
           const uint64_t* zaRow = sourceValues_[row].getAsVector<uint64_t>();
           uint64_t out[32] = {0};
           std::memcpy(out, zaRow, rowCount * sizeof(uint64_t));
-          // Slice element is active IFF:
-          //  - Element in 1st source pred corresponding to horiz. slice is TRUE
+          // Slice element is active IFF all of the following conditions hold:
+          //  - Element in 1st source pred corresponding to horizontal
+          //    slice is TRUE
           //  - Corresponding element in 2nd source pred is TRUE
           const uint64_t shifted_active_pn = 1ull << ((row % 8) * 8);
           if (pn[row / 8] & shifted_active_pn) {
@@ -158,8 +159,9 @@ void Instruction::execute() {
           const uint32_t* zaRow = sourceValues_[row].getAsVector<uint32_t>();
           uint32_t out[64] = {0};
           std::memcpy(out, zaRow, rowCount * sizeof(uint32_t));
-          // Slice element is active IFF:
-          //  - Element in 1st source pred corresponding to horiz. slice is TRUE
+          // Slice element is active IFF all of the following conditions hold:
+          //  - Element in 1st source pred corresponding to horizontal
+          //    slice is TRUE
           //  - Corresponding element in 2nd source pred is TRUE
           const uint64_t shifted_active_pn = 1ull << ((row % 16) * 4);
           if (pn[row / 16] & shifted_active_pn) {
@@ -191,9 +193,10 @@ void Instruction::execute() {
           const uint64_t* zaRow = sourceValues_[row].getAsVector<uint64_t>();
           uint64_t out[32] = {0};
           std::memcpy(out, zaRow, rowCount * sizeof(uint64_t));
-          // Slice element is active IFF:
+          // Slice element is active IFF all of the following conditions hold:
           //  - Corresponding element in 1st source pred is TRUE
-          //  - Element in 2nd source pred corresponding to vert. slice is TRUE
+          //  - Element in 2nd source pred corresponding to vertical
+          //    slice is TRUE
           const uint64_t shifted_active_pn = 1ull << ((row % 8) * 8);
           if (pn[row / 8] & shifted_active_pn) {
             // Corresponding slice element is active (i.e. all elements in row).
@@ -227,9 +230,10 @@ void Instruction::execute() {
           const uint32_t* zaRow = sourceValues_[row].getAsVector<uint32_t>();
           uint32_t out[64] = {0};
           std::memcpy(out, zaRow, rowCount * sizeof(uint32_t));
-          // Slice element is active IFF:
+          // Slice element is active IFF all of the following conditions hold:
           //  - Corresponding element in 1st source pred is TRUE
-          //  - Element in 2nd source pred corresponding to vert. slice is TRUE
+          //  - Element in 2nd source pred corresponding to vertical
+          //    slice is TRUE
           const uint64_t shifted_active_pn = 1ull << ((row % 16) * 4);
           if (pn[row / 16] & shifted_active_pn) {
             // Corresponding slice element is active (i.e. all elements in row).
@@ -3177,11 +3181,12 @@ void Instruction::execute() {
         const uint8_t* zn = sourceValues_[rowCount + 2].getAsVector<uint8_t>();
 
         for (uint16_t i = 0; i < rowCount; i++) {
-          uint8_t* row =
-              const_cast<uint8_t*>(sourceValues_[i].getAsVector<uint8_t>());
+          const uint8_t* row = sourceValues_[i].getAsVector<uint8_t>();
+          uint8_t out[256] = {0};
+          memcpy(out, row, rowCount * sizeof(uint8_t));
           uint64_t shifted_active = 1ull << (i % 64);
-          if (pg[i / 64] & shifted_active) row[sliceNum] = zn[i];
-          results_[i] = {(char*)row, 256};
+          if (pg[i / 64] & shifted_active) out[sliceNum] = zn[i];
+          results_[i] = {out, 256};
         }
         break;
       }
@@ -3203,11 +3208,12 @@ void Instruction::execute() {
             sourceValues_[rowCount + 2].getAsVector<uint64_t>();
 
         for (uint16_t i = 0; i < rowCount; i++) {
-          uint64_t* row =
-              const_cast<uint64_t*>(sourceValues_[i].getAsVector<uint64_t>());
+          const uint64_t* row = sourceValues_[i].getAsVector<uint64_t>();
+          uint64_t out[32] = {0};
+          memcpy(out, row, rowCount * sizeof(uint64_t));
           uint64_t shifted_active = 1ull << ((i % 8) * 8);
-          if (pg[i / 8] & shifted_active) row[sliceNum] = zn[i];
-          results_[i] = {(char*)row, 256};
+          if (pg[i / 8] & shifted_active) out[sliceNum] = zn[i];
+          results_[i] = {out, 256};
         }
         break;
       }
@@ -3229,11 +3235,12 @@ void Instruction::execute() {
             sourceValues_[rowCount + 2].getAsVector<uint16_t>();
 
         for (uint16_t i = 0; i < rowCount; i++) {
-          uint16_t* row =
-              const_cast<uint16_t*>(sourceValues_[i].getAsVector<uint16_t>());
+          const uint16_t* row = sourceValues_[i].getAsVector<uint16_t>();
+          uint16_t out[128] = {0};
+          memcpy(out, row, rowCount * sizeof(uint16_t));
           uint64_t shifted_active = 1ull << ((i % 32) * 2);
-          if (pg[i / 32] & shifted_active) row[sliceNum] = zn[i];
-          results_[i] = {(char*)row, 256};
+          if (pg[i / 32] & shifted_active) out[sliceNum] = zn[i];
+          results_[i] = {out, 256};
         }
         break;
       }
@@ -3254,16 +3261,18 @@ void Instruction::execute() {
 
         for (uint16_t i = 0; i < rowCount; i++) {
           // Use uint64_t in place of 128-bit
-          uint64_t* row =
-              const_cast<uint64_t*>(sourceValues_[i].getAsVector<uint64_t>());
+          const uint64_t* row = sourceValues_[i].getAsVector<uint64_t>();
+          uint64_t out[32] = {0};
+          // *2 in memcpy as need 128-bit elements but using uint64_t
+          memcpy(out, row, rowCount * sizeof(uint64_t) * 2);
           // For 128-bit there are 16-bit for each active element
           uint64_t shifted_active = 1ull << ((i % 4) * 16);
           if (pg[i / 4] & shifted_active) {
             // Need to move two consecutive 64-bit elements
-            row[2 * sliceNum] = zn[2 * i];
-            row[2 * sliceNum + 1] = zn[2 * i + 1];
+            out[2 * sliceNum] = zn[2 * i];
+            out[2 * sliceNum + 1] = zn[2 * i + 1];
           }
-          results_[i] = {(char*)row, 256};
+          results_[i] = {out, 256};
         }
         break;
       }
@@ -3285,11 +3294,12 @@ void Instruction::execute() {
             sourceValues_[rowCount + 2].getAsVector<uint32_t>();
 
         for (uint16_t i = 0; i < rowCount; i++) {
-          uint32_t* row =
-              const_cast<uint32_t*>(sourceValues_[i].getAsVector<uint32_t>());
+          const uint32_t* row = sourceValues_[i].getAsVector<uint32_t>();
+          uint32_t out[64] = {0};
+          memcpy(out, row, rowCount * sizeof(uint32_t));
           uint64_t shifted_active = 1ull << ((i % 16) * 4);
-          if (pg[i / 16] & shifted_active) row[sliceNum] = zn[i];
-          results_[i] = {(char*)row, 256};
+          if (pg[i / 16] & shifted_active) out[sliceNum] = zn[i];
+          results_[i] = {out, 256};
         }
         break;
       }
@@ -3507,15 +3517,14 @@ void Instruction::execute() {
         const uint8_t* data = memoryData_[0].getAsVector<uint8_t>();
 
         for (int i = 0; i < partition_num; i++) {
-          uint8_t* row =
-              const_cast<uint8_t*>(sourceValues_[i].getAsVector<uint8_t>());
+          const uint8_t* row = sourceValues_[i].getAsVector<uint8_t>();
+          uint8_t out[256] = {0};
+          memcpy(out, row, partition_num * sizeof(uint8_t));
           uint64_t shifted_active = 1ull << (i % 64);
           if (pg[i / 64] & shifted_active) {
-            row[sliceNum] = data[i];
-          } else {
-            row[sliceNum] = 0;
+            out[sliceNum] = data[i];
           }
-          results_[i] = RegisterValue(reinterpret_cast<char*>(row), 256);
+          results_[i] = RegisterValue(out, 256);
         }
         break;
       }
@@ -3535,15 +3544,14 @@ void Instruction::execute() {
         const uint64_t* data = memoryData_[0].getAsVector<uint64_t>();
 
         for (int i = 0; i < partition_num; i++) {
-          uint64_t* row =
-              const_cast<uint64_t*>(sourceValues_[i].getAsVector<uint64_t>());
+          const uint64_t* row = sourceValues_[i].getAsVector<uint64_t>();
+          uint64_t out[32] = {0};
+          memcpy(out, row, partition_num * sizeof(uint64_t));
           uint64_t shifted_active = 1ull << ((i % 8) * 8);
           if (pg[i / 8] & shifted_active) {
-            row[sliceNum] = data[i];
-          } else {
-            row[sliceNum] = 0;
+            out[sliceNum] = data[i];
           }
-          results_[i] = RegisterValue(reinterpret_cast<char*>(row), 256);
+          results_[i] = RegisterValue(out, 256);
         }
         break;
       }
@@ -3563,15 +3571,14 @@ void Instruction::execute() {
         const uint16_t* data = memoryData_[0].getAsVector<uint16_t>();
 
         for (int i = 0; i < partition_num; i++) {
-          uint16_t* row =
-              const_cast<uint16_t*>(sourceValues_[i].getAsVector<uint16_t>());
+          const uint16_t* row = sourceValues_[i].getAsVector<uint16_t>();
+          uint16_t out[128] = {0};
+          memcpy(out, row, partition_num * sizeof(uint16_t));
           uint64_t shifted_active = 1ull << ((i % 32) * 2);
           if (pg[i / 32] & shifted_active) {
-            row[sliceNum] = data[i];
-          } else {
-            row[sliceNum] = 0;
+            out[sliceNum] = data[i];
           }
-          results_[i] = RegisterValue(reinterpret_cast<char*>(row), 256);
+          results_[i] = RegisterValue(out, 256);
         }
         break;
       }
@@ -3592,20 +3599,18 @@ void Instruction::execute() {
 
         for (int i = 0; i < partition_num; i++) {
           // Using uint64_t as no 128-bit data type
-          uint64_t* row =
-              const_cast<uint64_t*>(sourceValues_[i].getAsVector<uint64_t>());
+          const uint64_t* row = sourceValues_[i].getAsVector<uint64_t>();
+          uint64_t out[32] = {0};
+          // *2 in memcpy as need 128-bit but using uint64_t
+          memcpy(out, row, partition_num * sizeof(uint64_t) * 2);
           // For 128-bit there are 16-bit for each active element
           uint64_t shifted_active = 1ull << ((i % 4) * 16);
           if (pg[i / 4] & shifted_active) {
             // As using uint64_t need to modify 2 elements
-            row[2 * sliceNum] = data[2 * i];
-            row[2 * sliceNum + 1] = data[2 * i + 1];
-          } else {
-            // As using uint64_t need to modify 2 elements
-            row[2 * sliceNum] = 0;
-            row[2 * sliceNum + 1] = 0;
+            out[2 * sliceNum] = data[2 * i];
+            out[2 * sliceNum + 1] = data[2 * i + 1];
           }
-          results_[i] = RegisterValue(reinterpret_cast<char*>(row), 256);
+          results_[i] = RegisterValue(out, 256);
         }
         break;
       }
@@ -3625,15 +3630,14 @@ void Instruction::execute() {
         const uint32_t* data = memoryData_[0].getAsVector<uint32_t>();
 
         for (int i = 0; i < partition_num; i++) {
-          uint32_t* row =
-              const_cast<uint32_t*>(sourceValues_[i].getAsVector<uint32_t>());
+          const uint32_t* row = sourceValues_[i].getAsVector<uint32_t>();
+          uint32_t out[64] = {0};
+          memcpy(out, row, partition_num * sizeof(uint64_t));
           uint64_t shifted_active = 1ull << ((i % 16) * 4);
           if (pg[i / 16] & shifted_active) {
-            row[sliceNum] = data[i];
-          } else {
-            row[sliceNum] = 0;
+            out[sliceNum] = data[i];
           }
-          results_[i] = RegisterValue(reinterpret_cast<char*>(row), 256);
+          results_[i] = RegisterValue(out, 256);
         }
         break;
       }
diff --git a/src/lib/arch/aarch64/MicroDecoder.cc b/src/lib/arch/aarch64/MicroDecoder.cc
index 0fc3233f4c..3376f34d61 100644
--- a/src/lib/arch/aarch64/MicroDecoder.cc
+++ b/src/lib/arch/aarch64/MicroDecoder.cc
@@ -620,7 +620,7 @@ uint8_t MicroDecoder::decode(const Architecture& architecture, uint32_t word,
         // Check if SVE or Predicate instructions need their group updating due
         // to SVE Streaming Mode activeness being different from when the
         // instruction was first decoded.
-        if (cachedUops[uop].checkStreamingGroup()) {
+        if (cachedUops[uop].checkStreamingGroupAndUpdate()) {
           // If the instruction's group has changed then update its execution
           // info. The newly set group is most likely to be the most accurate,
           // as an incorrect group allocation is only achieved when an
diff --git a/test/unit/aarch64/ArchitectureTest.cc b/test/unit/aarch64/ArchitectureTest.cc
index f9ed6cb678..8c6d06f52c 100644
--- a/test/unit/aarch64/ArchitectureTest.cc
+++ b/test/unit/aarch64/ArchitectureTest.cc
@@ -286,19 +286,19 @@ TEST_F(AArch64ArchitectureTest, get_set_SVCRVal) {
 
 TEST_F(AArch64ArchitectureTest, isSM_ZA_enabled) {
   EXPECT_FALSE(arch->isStreamingModeEnabled());
-  EXPECT_FALSE(arch->isZA_RegisterEnabled());
+  EXPECT_FALSE(arch->isZARegisterEnabled());
   arch->setSVCRval(1);
   EXPECT_TRUE(arch->isStreamingModeEnabled());
-  EXPECT_FALSE(arch->isZA_RegisterEnabled());
+  EXPECT_FALSE(arch->isZARegisterEnabled());
   arch->setSVCRval(2);
   EXPECT_FALSE(arch->isStreamingModeEnabled());
-  EXPECT_TRUE(arch->isZA_RegisterEnabled());
+  EXPECT_TRUE(arch->isZARegisterEnabled());
   arch->setSVCRval(3);
   EXPECT_TRUE(arch->isStreamingModeEnabled());
-  EXPECT_TRUE(arch->isZA_RegisterEnabled());
+  EXPECT_TRUE(arch->isZARegisterEnabled());
   arch->setSVCRval(0);
   EXPECT_FALSE(arch->isStreamingModeEnabled());
-  EXPECT_FALSE(arch->isZA_RegisterEnabled());
+  EXPECT_FALSE(arch->isZARegisterEnabled());
 }
 
 }  // namespace aarch64
diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc
index 8c0077c45c..95366d96e7 100644
--- a/test/unit/aarch64/InstructionTest.cc
+++ b/test/unit/aarch64/InstructionTest.cc
@@ -642,7 +642,7 @@ TEST_F(AArch64InstructionTest, setters) {
   EXPECT_TRUE(insn.isWaitingCommit());
 }
 
-TEST_F(AArch64InstructionTest, checkStreamingGroup) {
+TEST_F(AArch64InstructionTest, checkStreamingGroupAndUpdate) {
   EXPECT_FALSE(arch.isStreamingModeEnabled());
   // Insn is `fdivr z1.s, p0/m, z1.s, z0.s`
   Instruction SVE_insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo());
@@ -655,42 +655,42 @@ TEST_F(AArch64InstructionTest, checkStreamingGroup) {
   Instruction PRED_insn = Instruction(arch, *pselMetadata.get(), MicroOpInfo());
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE);
 
-  // Without changing SVE Streaming Mode, calling checkStreamingGroup should
-  // have no effect
+  // Without changing SVE Streaming Mode, calling checkStreamingGroupAndUpdate
+  // should have no effect
   EXPECT_FALSE(arch.isStreamingModeEnabled());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE);
-  EXPECT_FALSE(SVE_insn.checkStreamingGroup());
-  EXPECT_FALSE(nonSVE_insn.checkStreamingGroup());
-  EXPECT_FALSE(PRED_insn.checkStreamingGroup());
+  EXPECT_FALSE(SVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_FALSE(PRED_insn.checkStreamingGroupAndUpdate());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE);
 
-  // Updating SVE Streaming Mode should mean calling checkStreamingGroup changes
-  // SVE and PRED groups
+  // Updating SVE Streaming Mode should mean calling
+  // checkStreamingGroupAndUpdate changes SVE and PRED groups
   arch.setSVCRval(3);
   EXPECT_TRUE(arch.isStreamingModeEnabled());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE);
-  EXPECT_TRUE(SVE_insn.checkStreamingGroup());
-  EXPECT_FALSE(nonSVE_insn.checkStreamingGroup());
-  EXPECT_TRUE(PRED_insn.checkStreamingGroup());
+  EXPECT_TRUE(SVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_TRUE(PRED_insn.checkStreamingGroupAndUpdate());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE);
 
-  // Calling checkStreamingGroup again should have no effect on SVE and PRED
-  // groups, and should return false as a result
+  // Calling checkStreamingGroupAndUpdate again should have no effect on SVE and
+  // PRED groups, and should return false as a result
   EXPECT_TRUE(arch.isStreamingModeEnabled());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE);
-  EXPECT_FALSE(SVE_insn.checkStreamingGroup());
-  EXPECT_FALSE(nonSVE_insn.checkStreamingGroup());
-  EXPECT_FALSE(PRED_insn.checkStreamingGroup());
+  EXPECT_FALSE(SVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_FALSE(PRED_insn.checkStreamingGroupAndUpdate());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE);
@@ -702,9 +702,9 @@ TEST_F(AArch64InstructionTest, checkStreamingGroup) {
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::STREAMING_SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::STREAMING_PREDICATE);
-  EXPECT_TRUE(SVE_insn.checkStreamingGroup());
-  EXPECT_FALSE(nonSVE_insn.checkStreamingGroup());
-  EXPECT_TRUE(PRED_insn.checkStreamingGroup());
+  EXPECT_TRUE(SVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_FALSE(nonSVE_insn.checkStreamingGroupAndUpdate());
+  EXPECT_TRUE(PRED_insn.checkStreamingGroupAndUpdate());
   EXPECT_EQ(SVE_insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT);
   EXPECT_EQ(nonSVE_insn.getGroup(), InstructionGroups::BRANCH);
   EXPECT_EQ(PRED_insn.getGroup(), InstructionGroups::PREDICATE);