Skip to content

Commit

Permalink
Kyber ASM ARMv7E-M/ARMv7-M: added assembly code
Browse files Browse the repository at this point in the history
Improved performance by reworking kyber_ntt, kyber_invtt,
kyber_basemul_mont, kyber_basemul_mont_add, kyber_rej_uniform_c to be
in assembly.
Replace WOLFSSL_SP_NO_UMAAL with WOLFSSL_ARM_ARCH_7M
  • Loading branch information
SparkiDev committed Oct 3, 2024
1 parent ac788ec commit d204798
Show file tree
Hide file tree
Showing 10 changed files with 7,843 additions and 48 deletions.
9 changes: 9 additions & 0 deletions src/include.am
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,15 @@ endif
if BUILD_WC_KYBER
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_poly.c
if BUILD_ARMASM
if BUILD_ARM_THUMB
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARM_THUMB
endif BUILD_ARMASM
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S
Expand Down
24 changes: 12 additions & 12 deletions wolfcrypt/src/port/arm/thumb2-curve25519.S
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ fe_cmov_table:
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
#endif /* HAVE_ED25519 */
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
.text
.align 4
.globl fe_mul_op
Expand Down Expand Up @@ -2023,7 +2023,7 @@ fe_mul_op:
POP {pc}
/* Cycle Count = 239 */
.size fe_mul_op,.-fe_mul_op
#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
.text
.align 4
.globl fe_mul
Expand All @@ -2034,7 +2034,7 @@ fe_mul:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 24 */
.size fe_mul,.-fe_mul
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
.text
.align 4
.globl fe_sq_op
Expand Down Expand Up @@ -2425,7 +2425,7 @@ fe_sq_op:
POP {pc}
/* Cycle Count = 179 */
.size fe_sq_op,.-fe_sq_op
#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
.text
.align 4
.globl fe_sq
Expand All @@ -2437,7 +2437,7 @@ fe_sq:
/* Cycle Count = 24 */
.size fe_sq,.-fe_sq
#ifdef HAVE_CURVE25519
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
.text
.align 4
.globl fe_mul121666
Expand Down Expand Up @@ -2524,7 +2524,7 @@ fe_mul121666:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 69 */
.size fe_mul121666,.-fe_mul121666
#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifndef WC_NO_CACHE_RESISTANT
.text
.align 4
Expand Down Expand Up @@ -3466,7 +3466,7 @@ L_fe_invert8:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 292 */
.size fe_invert,.-fe_invert
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
.text
.align 4
.globl fe_sq2
Expand Down Expand Up @@ -3925,7 +3925,7 @@ fe_sq2:
POP {pc}
/* Cycle Count = 213 */
.size fe_sq2,.-fe_sq2
#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
.text
.align 4
.globl fe_pow22523
Expand Down Expand Up @@ -4535,7 +4535,7 @@ ge_sub:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 138 */
.size ge_sub,.-ge_sub
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
.text
.align 4
.globl sc_reduce
Expand Down Expand Up @@ -5258,9 +5258,9 @@ sc_reduce:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 502 */
.size sc_reduce,.-sc_reduce
#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifdef HAVE_ED25519_SIGN
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
.text
.align 4
.globl sc_muladd
Expand Down Expand Up @@ -6470,7 +6470,7 @@ sc_muladd:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 752 */
.size sc_muladd,.-sc_muladd
#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#endif /* HAVE_ED25519_SIGN */
#endif /* HAVE_ED25519 */

Expand Down
24 changes: 12 additions & 12 deletions wolfcrypt/src/port/arm/thumb2-curve25519_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
#endif /* HAVE_ED25519 */
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul_op()
Expand Down Expand Up @@ -2193,7 +2193,7 @@ void fe_mul_op()
);
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul(fe r_p, const fe a_p, const fe b_p)
#else
Expand All @@ -2214,7 +2214,7 @@ void fe_mul(fe r, const fe a, const fe b)
);
}

#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq_op()
Expand Down Expand Up @@ -2619,7 +2619,7 @@ void fe_sq_op()
);
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq(fe r_p, const fe a_p)
#else
Expand All @@ -2640,7 +2640,7 @@ void fe_sq(fe r, const fe a)
}

#ifdef HAVE_CURVE25519
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul121666(fe r_p, fe a_p)
#else
Expand Down Expand Up @@ -2745,7 +2745,7 @@ void fe_mul121666(fe r, fe a)
);
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifndef WC_NO_CACHE_RESISTANT
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
Expand Down Expand Up @@ -3907,7 +3907,7 @@ void fe_invert(fe r, const fe a)
);
}

#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq2(fe r_p, const fe a_p)
#else
Expand Down Expand Up @@ -4384,7 +4384,7 @@ void fe_sq2(fe r, const fe a)
);
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_pow22523(fe r_p, const fe a_p)
#else
Expand Down Expand Up @@ -5126,7 +5126,7 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
);
}

#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void sc_reduce(byte* s_p)
#else
Expand Down Expand Up @@ -5865,9 +5865,9 @@ void sc_reduce(byte* s)
);
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#ifdef HAVE_ED25519_SIGN
#ifdef WOLFSSL_SP_NO_UMAAL
#ifdef WOLFSSL_ARM_ARCH_7M
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
#else
Expand Down Expand Up @@ -7099,7 +7099,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
);
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* WOLFSSL_ARM_ARCH_7M */
#endif /* HAVE_ED25519_SIGN */
#endif /* HAVE_ED25519 */

Expand Down
Loading

0 comments on commit d204798

Please sign in to comment.