diff --git a/library/bignum.c b/library/bignum.c index 288f85932984..6f634b5d1717 100644 --- a/library/bignum.c +++ b/library/bignum.c @@ -38,6 +38,7 @@ #if defined(MBEDTLS_BIGNUM_C) #include "mbedtls/bignum.h" +#include "bignum_internal.h" #include "bn_mul.h" #include "mbedtls/platform_util.h" #include "mbedtls/error.h" @@ -1369,53 +1370,29 @@ int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint return( mbedtls_mpi_sub_mpi( X, A, &B ) ); } -/** Helper for mbedtls_mpi multiplication. - * - * Add \p b * \p s to \p d. - * - * \param i The number of limbs of \p s. - * \param[in] s A bignum to multiply, of size \p i. - * It may overlap with \p d, but only if - * \p d <= \p s. - * Its leading limb must not be \c 0. - * \param[in,out] d The bignum to add to. - * It must be sufficiently large to store the - * result of the multiplication. This means - * \p i + 1 limbs if \p d[\p i - 1] started as 0 and \p b - * is not known a priori. - * \param b A scalar to multiply. - */ -static -#if defined(__APPLE__) && defined(__arm__) -/* - * Apple LLVM version 4.2 (clang-425.0.24) (based on LLVM 3.2svn) - * appears to need this to prevent bad ARM code generation at -O3. - */ -__attribute__ ((noinline)) -#endif -void mpi_mul_hlp( size_t i, - const mbedtls_mpi_uint *s, - mbedtls_mpi_uint *d, - mbedtls_mpi_uint b ) +mbedtls_mpi_uint mbedtls_mpi_core_mla( mbedtls_mpi_uint *d, size_t d_len, + const mbedtls_mpi_uint *s, size_t s_len, + mbedtls_mpi_uint b ) { - mbedtls_mpi_uint c = 0, t = 0; + mbedtls_mpi_uint c = 0; /* carry */ + size_t excess_len = d_len - s_len; #if defined(MULADDC_HUIT) - for( ; i >= 8; i -= 8 ) + for( ; s_len >= 8; s_len -= 8 ) { MULADDC_INIT MULADDC_HUIT MULADDC_STOP } - for( ; i > 0; i-- ) + for( ; s_len > 0; s_len-- ) { MULADDC_INIT MULADDC_CORE MULADDC_STOP } #else /* MULADDC_HUIT */ - for( ; i >= 16; i -= 16 ) + for( ; s_len >= 16; s_len -= 16 ) { MULADDC_INIT MULADDC_CORE MULADDC_CORE @@ -1430,7 +1407,7 @@ void mpi_mul_hlp( size_t i, MULADDC_STOP } - for( ; i >= 8; i -= 8 ) + for( ; s_len >= 8; s_len -= 8 ) { MULADDC_INIT MULADDC_CORE MULADDC_CORE @@ -1441,7 +1418,7 @@ void mpi_mul_hlp( size_t i, MULADDC_STOP } - for( ; i > 0; i-- ) + for( ; s_len > 0; s_len-- ) { MULADDC_INIT MULADDC_CORE @@ -1449,12 +1426,12 @@ void mpi_mul_hlp( size_t i, } #endif /* MULADDC_HUIT */ - t++; - - while( c != 0 ) + while( excess_len-- ) { *d += c; c = ( *d < c ); d++; } + + return( c ); } /* @@ -1490,8 +1467,14 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); - for( ; j > 0; j-- ) - mpi_mul_hlp( i, A->p, X->p + j - 1, B->p[j - 1] ); + for( size_t k = 0; k < j; k++ ) + { + /* We know that there cannot be any carry-out since we're + * iterating from bottom to top. */ + (void) mbedtls_mpi_core_mla( X->p + k, i + 1, + A->p, i, + B->p[k] ); + } /* If the result is 0, we don't shortcut the operation, which reduces * but does not eliminate side channels leaking the zero-ness. We do @@ -1517,19 +1500,15 @@ int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint MPI_VALIDATE_RET( X != NULL ); MPI_VALIDATE_RET( A != NULL ); - /* mpi_mul_hlp can't deal with a leading 0. */ size_t n = A->n; while( n > 0 && A->p[n - 1] == 0 ) --n; - /* The general method below doesn't work if n==0 or b==0. By chance - * calculating the result is trivial in those cases. */ + /* The general method below doesn't work if b==0. */ if( b == 0 || n == 0 ) - { return( mbedtls_mpi_lset( X, 0 ) ); - } - /* Calculate A*b as A + A*(b-1) to take advantage of mpi_mul_hlp */ + /* Calculate A*b as A + A*(b-1) to take advantage of mbedtls_mpi_core_mla */ int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; /* In general, A * b requires 1 limb more than b. If * A->p[n - 1] * b / b == A->p[n - 1], then A * b fits in the same @@ -1538,10 +1517,13 @@ int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint * making the call to grow() unconditional causes slightly fewer * calls to calloc() in ECP code, presumably because it reuses the * same mpi for a while and this way the mpi is more likely to directly - * grow to its final size. */ + * grow to its final size. + * + * Note that calculating A*b as 0 + A*b doesn't work as-is because + * A,X can be the same. */ MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n + 1 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) ); - mpi_mul_hlp( n, A->p, X->p, b - 1 ); + mbedtls_mpi_core_mla( X->p, X->n, A->p, n, b - 1 ); cleanup: return( ret ); @@ -1907,8 +1889,8 @@ static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) * \param mm The value calculated by `mpi_montg_init(&mm, N)`. * This is -N^-1 mod 2^ciL. * \param[in,out] T A bignum for temporary storage. - * It must be at least twice the limb size of N plus 2 - * (T->n >= 2 * (N->n + 1)). + * It must be at least twice the limb size of N plus 1 + * (T->n >= 2 * N->n + 1). * Its initial content is unused and * its final content is indeterminate. * Note that unlike the usual convention in the library @@ -1917,8 +1899,8 @@ static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm, const mbedtls_mpi *T ) { - size_t i, n, m; - mbedtls_mpi_uint u0, u1, *d; + size_t n, m; + mbedtls_mpi_uint *d; memset( T->p, 0, T->n * ciL ); @@ -1926,18 +1908,23 @@ static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi n = N->n; m = ( B->n < n ) ? B->n : n; - for( i = 0; i < n; i++ ) + for( size_t i = 0; i < n; i++ ) { + mbedtls_mpi_uint u0, u1; + /* * T = (T + u0*B + u1*N) / 2^biL */ u0 = A->p[i]; u1 = ( d[0] + u0 * B->p[0] ) * mm; - mpi_mul_hlp( m, B->p, d, u0 ); - mpi_mul_hlp( n, N->p, d, u1 ); - - d++; d[n + 1] = 0; + (void) mbedtls_mpi_core_mla( d, n + 2, + B->p, m, + u0 ); + (void) mbedtls_mpi_core_mla( d, n + 2, + N->p, n, + u1 ); + d++; } /* At this point, d is either the desired result or the desired result diff --git a/library/bignum_internal.h b/library/bignum_internal.h new file mode 100644 index 000000000000..8677dcf1f01f --- /dev/null +++ b/library/bignum_internal.h @@ -0,0 +1,50 @@ +/** + * Internal bignum functions + * + * Copyright The Mbed TLS Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MBEDTLS_BIGNUM_INTERNAL_H +#define MBEDTLS_BIGNUM_INTERNAL_H + +#include "common.h" + +#if defined(MBEDTLS_BIGNUM_C) +#include "mbedtls/bignum.h" +#endif + +/** Perform a known-size multiply accumulate operation + * + * Add \p b * \p s to \p d. + * + * \param[in,out] d The pointer to the (little-endian) array + * representing the bignum to accumulate onto. + * \param d_len The number of limbs of \p d. This must be + * at least \p s_len. + * \param[in] s The pointer to the (little-endian) array + * representing the bignum to multiply with. + * This may be the same as \p d. Otherwise, + * it must be disjoint from \p d. + * \param s_len The number of limbs of \p s. + * \param b A scalar to multiply with. + * + * \return c The carry at the end of the operation. + */ +mbedtls_mpi_uint mbedtls_mpi_core_mla( mbedtls_mpi_uint *d, size_t d_len , + const mbedtls_mpi_uint *s, size_t s_len, + mbedtls_mpi_uint b ); + +#endif /* MBEDTLS_BIGNUM_INTERNAL_H */ diff --git a/library/bn_mul.h b/library/bn_mul.h index b71ddd881a04..aa1183fa5410 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -99,6 +99,7 @@ #if defined(__i386__) && defined(__OPTIMIZE__) #define MULADDC_INIT \ + { mbedtls_mpi_uint t; \ asm( \ "movl %%ebx, %0 \n\t" \ "movl %5, %%esi \n\t" \ @@ -190,7 +191,8 @@ : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ : "eax", "ebx", "ecx", "edx", "esi", "edi" \ - ); + ); } \ + #else @@ -202,7 +204,7 @@ : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ : "eax", "ebx", "ecx", "edx", "esi", "edi" \ - ); + ); } #endif /* SSE2 */ #endif /* i386 */ diff --git a/library/ecp_curves.c b/library/ecp_curves.c index 421a067bbfe7..6b8ff5c7fba4 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -26,6 +26,7 @@ #include "mbedtls/error.h" #include "bn_mul.h" +#include "bignum_internal.h" #include "ecp_invasive.h" #include @@ -5213,40 +5214,30 @@ static int ecp_mod_p521( mbedtls_mpi *N ) /* * Fast quasi-reduction modulo p255 = 2^255 - 19 - * Write N as A0 + 2^255 A1, return A0 + 19 * A1 + * Write N as A0 + 2^256 A1, return A0 + 38 * A1 */ static int ecp_mod_p255( mbedtls_mpi *N ) { - int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; - size_t i; - mbedtls_mpi M; - mbedtls_mpi_uint Mp[P255_WIDTH + 2]; + mbedtls_mpi_uint Mp[P255_WIDTH]; - if( N->n < P255_WIDTH ) + /* Helper references for top part of N */ + mbedtls_mpi_uint * const NT_p = N->p + P255_WIDTH; + const size_t NT_n = N->n - P255_WIDTH; + if( N->n <= P255_WIDTH ) return( 0 ); - - /* M = A1 */ - M.s = 1; - M.n = N->n - ( P255_WIDTH - 1 ); - if( M.n > P255_WIDTH + 1 ) + if( NT_n > P255_WIDTH ) return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); - M.p = Mp; - memset( Mp, 0, sizeof Mp ); - memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) ); - MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, 255 % ( 8 * sizeof( mbedtls_mpi_uint ) ) ) ); - M.n++; /* Make room for multiplication by 19 */ - /* N = A0 */ - MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( N, 255, 0 ) ); - for( i = P255_WIDTH; i < N->n; i++ ) - N->p[i] = 0; + /* Split N as N + 2^256 M */ + memcpy( Mp, NT_p, sizeof( mbedtls_mpi_uint ) * NT_n ); + memset( NT_p, 0, sizeof( mbedtls_mpi_uint ) * NT_n ); - /* N = A0 + 19 * A1 */ - MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M, &M, 19 ) ); - MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( N, N, &M ) ); + /* N = A0 + 38 * A1 */ + mbedtls_mpi_core_mla( N->p, P255_WIDTH + 1, + Mp, NT_n, + 38 ); -cleanup: - return( ret ); + return( 0 ); } #endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */