diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h index bcbfe74f33..a2fc09bfe2 100644 --- a/riscv/insns/vghsh_vv.h +++ b/riscv/insns/vghsh_vv.h @@ -6,6 +6,10 @@ require_zvkg; require(P.VU.vsew == 32); require_egw_fits(128); +require_vd_align_lmul; +require_vs2_align_lmul; +require_vs1_align_lmul; + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( {}, { diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h index 820b396e04..863b12f56c 100644 --- a/riscv/insns/vgmul_vv.h +++ b/riscv/insns/vgmul_vv.h @@ -6,6 +6,9 @@ require_zvkg; require(P.VU.vsew == 32); require_egw_fits(128); +require_vd_align_lmul; +require_vs2_align_lmul; + VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( {}, { diff --git a/riscv/insns/vsha2ch_vv.h b/riscv/insns/vsha2ch_vv.h index 34c6e05fbc..5fa41f86de 100644 --- a/riscv/insns/vsha2ch_vv.h +++ b/riscv/insns/vsha2ch_vv.h @@ -2,7 +2,6 @@ #include "zvknh_ext_macros.h" -// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. require_vsha2_common_constraints; switch (P.VU.vsew) { diff --git a/riscv/insns/vsha2cl_vv.h b/riscv/insns/vsha2cl_vv.h index 4a1df0904b..5c0bdc3d3d 100644 --- a/riscv/insns/vsha2cl_vv.h +++ b/riscv/insns/vsha2cl_vv.h @@ -2,7 +2,6 @@ #include "zvknh_ext_macros.h" -// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. require_vsha2_common_constraints; switch (P.VU.vsew) { diff --git a/riscv/insns/vsha2ms_vv.h b/riscv/insns/vsha2ms_vv.h index 8f1ca085ae..bf53e1184e 100644 --- a/riscv/insns/vsha2ms_vv.h +++ b/riscv/insns/vsha2ms_vv.h @@ -2,7 +2,6 @@ #include "zvknh_ext_macros.h" -// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. require_vsha2_common_constraints; switch (P.VU.vsew) { diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h index dd6cb523f2..1421b62ddd 100644 --- a/riscv/insns/vsm3me_vv.h +++ b/riscv/insns/vsm3me_vv.h @@ -13,6 +13,7 @@ (ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6)) require_vsm3_constraints; +require_vs1_align_lmul; VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h index 8f52e68199..08229661f4 100644 --- a/riscv/insns/vsm4k_vi.h +++ b/riscv/insns/vsm4k_vi.h @@ -16,6 +16,9 @@ static constexpr uint32_t zvksed_ck[32] = { require_vsm4_constraints; +require_vd_align_lmul; +require_vs2_align_lmul; + VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( {}, // The following statements will be executed before the first execution diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h index 44011eb544..d2b90f3784 100644 --- a/riscv/insns/vsm4r_vs.h +++ b/riscv/insns/vsm4r_vs.h @@ -4,7 +4,10 @@ require_vsm4_constraints; // No overlap of vd and vs2. -require(insn.rd() != insn.rs2()); +require_no_overlap_eglmul(insn.rd(), insn.rs2()); +// vd and vs2 are LMUL (resp. EGW / VLEN) aligned +require_vd_align_lmul; +require_vs2_align_eglmul(128); VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h index 9a18cecee0..c07daf0a2a 100644 --- a/riscv/insns/vsm4r_vv.h +++ b/riscv/insns/vsm4r_vv.h @@ -4,6 +4,9 @@ require_vsm4_constraints; +require_vd_align_lmul; +require_vs2_align_lmul; + VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( {}, { diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index f094629835..8f30c3522b 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -86,6 +86,41 @@ // (LMUL * VLEN) <= EGW #define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul)) +// Ensures that a register index is aligned with LMUL +#define require_vreg_align_lmul(VREG_NUM) \ + if (P.VU.vflmul > 1) { \ + require_align(VREG_NUM, P.VU.vflmul); \ + } + +// Ensures that a register index is aligned to EMUL +// evaluated as EGW / VLEN. +// The check is only enabled if this value is greater +// than one (no index alignment check required for fractional EMUL) +#define require_vreg_align_eglmul(EGW, VREG_NUM) \ + do { \ + float vfeglmul = EGW / P.VU.VLEN; \ + if (vfeglmul > 1) { \ + require_align(VREG_NUM, vfeglmul); \ + }\ + } while (0) + +#define require_vd_align_lmul require_vreg_align_lmul(insn.rd()) +#define require_vs2_align_lmul require_vreg_align_lmul(insn.rs2()) +#define require_vs1_align_lmul require_vreg_align_lmul(insn.rs1()) +#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2()) + +// ensure that rs2 and rd do not overlap, assuming rd encodes an LMUL wide +// vector register group and rs2 encodes an vs2_EMUL=ceil(EGW / VLEN) vector register +// group. +// Assumption: LMUL >= vs2_EMUL which is enforced independently through require_egw_fits. +#define require_no_overlap_eglmul(vd, vs2) \ + do { \ + int vd_emul = P.VU.vflmul < 1.f ? 1 : (int) P.VU.vflmul; \ + int aligned_vd = vd / vd_emul; \ + int aligned_vs2 = vs2 / vd_emul; \ + require(aligned_vd != aligned_vs2); \ +} while (0) + // Checks that the vector unit state (vtype and vl) can be interpreted // as element groups with EEW=32, EGS=4 (four 32-bits elements per group), // for an effective element group width of EGW=128 bits. diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h index db705c71e5..d17b416358 100644 --- a/riscv/zvkned_ext_macros.h +++ b/riscv/zvkned_ext_macros.h @@ -10,6 +10,8 @@ // - Zvkned is enabled // - EGW (128) <= LMUL * VLEN // - vd and vs2 cannot overlap +// - vd is LMUL aligned +// - vs2 is ceil(EGW / VLEN) aligned // // The constraint that vstart and vl are both EGS (4) aligned // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. @@ -18,13 +20,17 @@ require_zvkned; \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ - require(insn.rd() != insn.rs2()); \ + require_no_overlap_eglmul(insn.rd(), insn.rs2()); \ + require_vd_align_lmul; \ + require_vs2_align_eglmul(128); \ } while (false) // vaes*.vv instruction constraints. Those are the same as the .vs ones, // except for the overlap constraint that is not present for .vv variants. // - Zvkned is enabled // - EGW (128) <= LMUL * VLEN +// - vd is LMUL aligned +// - vs2 is LMUL aligned // // The constraint that vstart and vl are both EGS (4) aligned // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. @@ -33,6 +39,8 @@ require_zvkned; \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + require_vd_align_lmul; \ + require_vs2_align_lmul; \ } while (false) // vaeskf*.vi instruction constraints. Those are the same as the .vv ones. @@ -41,6 +49,8 @@ require_zvkned; \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + require_vd_align_lmul; \ + require_vs2_align_lmul; \ } while (false) #define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0)) diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h index b50818bdae..89686ff6db 100644 --- a/riscv/zvknh_ext_macros.h +++ b/riscv/zvknh_ext_macros.h @@ -9,6 +9,9 @@ // Constraints common to all vsha* instructions, across all VSEW: // - VSEW is 32 (SHA-256) or 64 (SHA-512) // - No overlap of vd with vs1 or vs2. +// - vd is LMUL aligned +// - vs1 is LMUL aligned +// - vs2 is LMUL aligned // // The constraint that vstart and vl are both EGS (4) aligned // is checked in the VI_..._EGU32x4_..._LOOP and VI_..._EGU64x4_..._LOOP @@ -18,6 +21,9 @@ require(P.VU.vsew == 32 || P.VU.vsew == 64); \ require(insn.rd() != insn.rs1()); \ require(insn.rd() != insn.rs2()); \ + require_vd_align_lmul; \ + require_vs2_align_lmul; \ + require_vs1_align_lmul; \ } while (false) // Constraints on vsha2 instructions that must be verified when VSEW==32. diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h index 71c5a09149..3f4f250d64 100644 --- a/riscv/zvksh_ext_macros.h +++ b/riscv/zvksh_ext_macros.h @@ -11,6 +11,8 @@ // - VSEW == 32 // - EGW (256) <= LMUL * VLEN // - No overlap of vd and vs2. +// - vd is LMUL aligned +// - vs2 is LMUL aligned // // The constraint that vstart and vl are both EGS (8) aligned // is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros. @@ -20,6 +22,8 @@ require(P.VU.vsew == 32); \ require_egw_fits(256); \ require(insn.rd() != insn.rs2()); \ + require_vd_align_lmul; \ + require_vs2_align_lmul; \ } while (false) #define FF1(X, Y, Z) ((X) ^ (Y) ^ (Z))