-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make size of output in mpi_mul_hlp() explicit #5701
Changes from all commits
e7f14a3
defe569
fee261a
74a11a3
e141702
aef9cc4
25bb732
6454993
e9dd9a1
284d778
5d4ceeb
efdc519
99ba4cc
dfcb2d0
53b3c60
808e666
9137b9c
0235f75
2ef0cff
d830feb
bb04cb9
127fcab
da763de
1772e05
0dbf04a
3577131
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
#if defined(MBEDTLS_BIGNUM_C) | ||
|
||
#include "mbedtls/bignum.h" | ||
#include "bignum_internal.h" | ||
#include "bn_mul.h" | ||
#include "mbedtls/platform_util.h" | ||
#include "mbedtls/error.h" | ||
|
@@ -1369,53 +1370,29 @@ int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint | |
return( mbedtls_mpi_sub_mpi( X, A, &B ) ); | ||
} | ||
|
||
/** Helper for mbedtls_mpi multiplication. | ||
* | ||
* Add \p b * \p s to \p d. | ||
* | ||
* \param i The number of limbs of \p s. | ||
* \param[in] s A bignum to multiply, of size \p i. | ||
* It may overlap with \p d, but only if | ||
* \p d <= \p s. | ||
* Its leading limb must not be \c 0. | ||
* \param[in,out] d The bignum to add to. | ||
* It must be sufficiently large to store the | ||
* result of the multiplication. This means | ||
* \p i + 1 limbs if \p d[\p i - 1] started as 0 and \p b | ||
* is not known a priori. | ||
* \param b A scalar to multiply. | ||
*/ | ||
static | ||
#if defined(__APPLE__) && defined(__arm__) | ||
/* | ||
* Apple LLVM version 4.2 (clang-425.0.24) (based on LLVM 3.2svn) | ||
* appears to need this to prevent bad ARM code generation at -O3. | ||
*/ | ||
__attribute__ ((noinline)) | ||
#endif | ||
void mpi_mul_hlp( size_t i, | ||
const mbedtls_mpi_uint *s, | ||
mbedtls_mpi_uint *d, | ||
mbedtls_mpi_uint b ) | ||
mbedtls_mpi_uint mbedtls_mpi_core_mla( mbedtls_mpi_uint *d, size_t d_len, | ||
const mbedtls_mpi_uint *s, size_t s_len, | ||
mbedtls_mpi_uint b ) | ||
{ | ||
mbedtls_mpi_uint c = 0, t = 0; | ||
mbedtls_mpi_uint c = 0; /* carry */ | ||
size_t excess_len = d_len - s_len; | ||
|
||
#if defined(MULADDC_HUIT) | ||
for( ; i >= 8; i -= 8 ) | ||
for( ; s_len >= 8; s_len -= 8 ) | ||
{ | ||
MULADDC_INIT | ||
MULADDC_HUIT | ||
MULADDC_STOP | ||
} | ||
|
||
for( ; i > 0; i-- ) | ||
for( ; s_len > 0; s_len-- ) | ||
{ | ||
MULADDC_INIT | ||
MULADDC_CORE | ||
MULADDC_STOP | ||
} | ||
#else /* MULADDC_HUIT */ | ||
for( ; i >= 16; i -= 16 ) | ||
for( ; s_len >= 16; s_len -= 16 ) | ||
{ | ||
MULADDC_INIT | ||
MULADDC_CORE MULADDC_CORE | ||
|
@@ -1430,7 +1407,7 @@ void mpi_mul_hlp( size_t i, | |
MULADDC_STOP | ||
} | ||
|
||
for( ; i >= 8; i -= 8 ) | ||
for( ; s_len >= 8; s_len -= 8 ) | ||
{ | ||
MULADDC_INIT | ||
MULADDC_CORE MULADDC_CORE | ||
|
@@ -1441,20 +1418,20 @@ void mpi_mul_hlp( size_t i, | |
MULADDC_STOP | ||
} | ||
|
||
for( ; i > 0; i-- ) | ||
for( ; s_len > 0; s_len-- ) | ||
{ | ||
MULADDC_INIT | ||
MULADDC_CORE | ||
MULADDC_STOP | ||
} | ||
#endif /* MULADDC_HUIT */ | ||
|
||
t++; | ||
|
||
while( c != 0 ) | ||
while( excess_len-- ) | ||
{ | ||
*d += c; c = ( *d < c ); d++; | ||
} | ||
|
||
return( c ); | ||
} | ||
|
||
/* | ||
|
@@ -1490,8 +1467,14 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi | |
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) ); | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); | ||
|
||
for( ; j > 0; j-- ) | ||
mpi_mul_hlp( i, A->p, X->p + j - 1, B->p[j - 1] ); | ||
for( size_t k = 0; k < j; k++ ) | ||
{ | ||
/* We know that there cannot be any carry-out since we're | ||
* iterating from bottom to top. */ | ||
(void) mbedtls_mpi_core_mla( X->p + k, i + 1, | ||
A->p, i, | ||
B->p[k] ); | ||
} | ||
|
||
/* If the result is 0, we don't shortcut the operation, which reduces | ||
* but does not eliminate side channels leaking the zero-ness. We do | ||
|
@@ -1517,19 +1500,15 @@ int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint | |
MPI_VALIDATE_RET( X != NULL ); | ||
MPI_VALIDATE_RET( A != NULL ); | ||
|
||
/* mpi_mul_hlp can't deal with a leading 0. */ | ||
size_t n = A->n; | ||
while( n > 0 && A->p[n - 1] == 0 ) | ||
--n; | ||
|
||
/* The general method below doesn't work if n==0 or b==0. By chance | ||
* calculating the result is trivial in those cases. */ | ||
/* The general method below doesn't work if b==0. */ | ||
if( b == 0 || n == 0 ) | ||
{ | ||
return( mbedtls_mpi_lset( X, 0 ) ); | ||
} | ||
|
||
/* Calculate A*b as A + A*(b-1) to take advantage of mpi_mul_hlp */ | ||
/* Calculate A*b as A + A*(b-1) to take advantage of mbedtls_mpi_core_mla */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pre-existing (sorry, as I'm trying to make sure everything is clear in my mind, I'm noticing things that were already there). Another, perhaps more obvious way, would be to compute There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, exactly -- I implemented the approach you suggest and spent an hour or so debugging this, until @gilles-peskine-arm mentioned that input and output can alias. |
||
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; | ||
/* In general, A * b requires 1 limb more than b. If | ||
* A->p[n - 1] * b / b == A->p[n - 1], then A * b fits in the same | ||
|
@@ -1538,10 +1517,13 @@ int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint | |
* making the call to grow() unconditional causes slightly fewer | ||
* calls to calloc() in ECP code, presumably because it reuses the | ||
* same mpi for a while and this way the mpi is more likely to directly | ||
* grow to its final size. */ | ||
* grow to its final size. | ||
* | ||
* Note that calculating A*b as 0 + A*b doesn't work as-is because | ||
* A,X can be the same. */ | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n + 1 ) ); | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) ); | ||
mpi_mul_hlp( n, A->p, X->p, b - 1 ); | ||
mbedtls_mpi_core_mla( X->p, X->n, A->p, n, b - 1 ); | ||
|
||
cleanup: | ||
return( ret ); | ||
|
@@ -1907,8 +1889,8 @@ static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) | |
* \param mm The value calculated by `mpi_montg_init(&mm, N)`. | ||
* This is -N^-1 mod 2^ciL. | ||
* \param[in,out] T A bignum for temporary storage. | ||
* It must be at least twice the limb size of N plus 2 | ||
* (T->n >= 2 * (N->n + 1)). | ||
* It must be at least twice the limb size of N plus 1 | ||
* (T->n >= 2 * N->n + 1). | ||
* Its initial content is unused and | ||
* its final content is indeterminate. | ||
* Note that unlike the usual convention in the library | ||
|
@@ -1917,27 +1899,32 @@ static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) | |
static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm, | ||
const mbedtls_mpi *T ) | ||
{ | ||
size_t i, n, m; | ||
mbedtls_mpi_uint u0, u1, *d; | ||
size_t n, m; | ||
mbedtls_mpi_uint *d; | ||
|
||
memset( T->p, 0, T->n * ciL ); | ||
|
||
d = T->p; | ||
n = N->n; | ||
m = ( B->n < n ) ? B->n : n; | ||
|
||
for( i = 0; i < n; i++ ) | ||
for( size_t i = 0; i < n; i++ ) | ||
{ | ||
mbedtls_mpi_uint u0, u1; | ||
|
||
/* | ||
* T = (T + u0*B + u1*N) / 2^biL | ||
*/ | ||
u0 = A->p[i]; | ||
u1 = ( d[0] + u0 * B->p[0] ) * mm; | ||
|
||
mpi_mul_hlp( m, B->p, d, u0 ); | ||
mpi_mul_hlp( n, N->p, d, u1 ); | ||
|
||
d++; d[n + 1] = 0; | ||
(void) mbedtls_mpi_core_mla( d, n + 2, | ||
B->p, m, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pre-existing: we could probably find better names for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm keeping |
||
u0 ); | ||
(void) mbedtls_mpi_core_mla( d, n + 2, | ||
N->p, n, | ||
u1 ); | ||
d++; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pre-existing: I'm not sure if the best place for this is here or in the loop preamble (as it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good |
||
} | ||
|
||
/* At this point, d is either the desired result or the desired result | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The conditional subtraction can be simplified here to borrow = mpi_sub_hlp( n, d, d, N->p );
borrow = ( d[n] < borrow );
(void) mpi_mul_hlp( d, n, N->p, n, borrow );
memcpy( A->p, d, n * ciL ); which would be an example of a call to (@yanesca @mpg we touched on this approach in the chat, so FYI). |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/** | ||
* Internal bignum functions | ||
* | ||
* Copyright The Mbed TLS Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef MBEDTLS_BIGNUM_INTERNAL_H | ||
#define MBEDTLS_BIGNUM_INTERNAL_H | ||
|
||
#include "common.h" | ||
|
||
#if defined(MBEDTLS_BIGNUM_C) | ||
#include "mbedtls/bignum.h" | ||
#endif | ||
|
||
/** Perform a known-size multiply accumulate operation | ||
* | ||
* Add \p b * \p s to \p d. | ||
* | ||
* \param[in,out] d The pointer to the (little-endian) array | ||
* representing the bignum to accumulate onto. | ||
* \param d_len The number of limbs of \p d. This must be | ||
* at least \p s_len. | ||
* \param[in] s The pointer to the (little-endian) array | ||
* representing the bignum to multiply with. | ||
* This may be the same as \p d. Otherwise, | ||
* it must be disjoint from \p d. | ||
* \param s_len The number of limbs of \p s. | ||
* \param b A scalar to multiply with. | ||
* | ||
* \return c The carry at the end of the operation. | ||
*/ | ||
mbedtls_mpi_uint mbedtls_mpi_core_mla( mbedtls_mpi_uint *d, size_t d_len , | ||
const mbedtls_mpi_uint *s, size_t s_len, | ||
mbedtls_mpi_uint b ); | ||
|
||
#endif /* MBEDTLS_BIGNUM_INTERNAL_H */ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
#include "mbedtls/error.h" | ||
|
||
#include "bn_mul.h" | ||
#include "bignum_internal.h" | ||
#include "ecp_invasive.h" | ||
|
||
#include <string.h> | ||
|
@@ -5213,40 +5214,30 @@ static int ecp_mod_p521( mbedtls_mpi *N ) | |
|
||
/* | ||
* Fast quasi-reduction modulo p255 = 2^255 - 19 | ||
* Write N as A0 + 2^255 A1, return A0 + 19 * A1 | ||
* Write N as A0 + 2^256 A1, return A0 + 38 * A1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity: did you check how that affects performance? Also, I think if we wanted we could still reduce a bit more by doing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't remember the details, but yes I did a quick check and it led to a noticable though not huge performance improvement, I think somewhere around 10-20%. |
||
*/ | ||
static int ecp_mod_p255( mbedtls_mpi *N ) | ||
{ | ||
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; | ||
size_t i; | ||
mbedtls_mpi M; | ||
mbedtls_mpi_uint Mp[P255_WIDTH + 2]; | ||
mbedtls_mpi_uint Mp[P255_WIDTH]; | ||
|
||
if( N->n < P255_WIDTH ) | ||
/* Helper references for top part of N */ | ||
mbedtls_mpi_uint * const NT_p = N->p + P255_WIDTH; | ||
const size_t NT_n = N->n - P255_WIDTH; | ||
if( N->n <= P255_WIDTH ) | ||
return( 0 ); | ||
|
||
/* M = A1 */ | ||
M.s = 1; | ||
M.n = N->n - ( P255_WIDTH - 1 ); | ||
if( M.n > P255_WIDTH + 1 ) | ||
if( NT_n > P255_WIDTH ) | ||
return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); | ||
M.p = Mp; | ||
memset( Mp, 0, sizeof Mp ); | ||
memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) ); | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, 255 % ( 8 * sizeof( mbedtls_mpi_uint ) ) ) ); | ||
M.n++; /* Make room for multiplication by 19 */ | ||
|
||
/* N = A0 */ | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( N, 255, 0 ) ); | ||
for( i = P255_WIDTH; i < N->n; i++ ) | ||
N->p[i] = 0; | ||
/* Split N as N + 2^256 M */ | ||
memcpy( Mp, NT_p, sizeof( mbedtls_mpi_uint ) * NT_n ); | ||
memset( NT_p, 0, sizeof( mbedtls_mpi_uint ) * NT_n ); | ||
|
||
/* N = A0 + 19 * A1 */ | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M, &M, 19 ) ); | ||
MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( N, N, &M ) ); | ||
/* N = A0 + 38 * A1 */ | ||
mbedtls_mpi_core_mla( N->p, P255_WIDTH + 1, | ||
Mp, NT_n, | ||
38 ); | ||
|
||
cleanup: | ||
return( ret ); | ||
return( 0 ); | ||
} | ||
#endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */ | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would find it more readable to declare this just before the
while
statement where it's used. (Might even consider making it afor
loop.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't do that for now because it would require an additional variable -- note that
s_len
is modified in the loops.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually, that's not a bad idea. also has the side effect of removing a very long loop if
d_len < s_len
(which would have had other bad consequences anyway, but still...)the loop could actually just go to
while( d_len-- > s_len )
- WDYT?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, right, I missed that. So, let's keep things as they are, then.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On a related note, should we add a comment that the macros are incrementing
d
so thatd + s_len
remains invariant throughout the loops?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I prefer the present version since the loop condition is simpler. Note also that #5706 further reworks this part of the code.