-
Notifications
You must be signed in to change notification settings - Fork 838
/
Copy pathsp_int.c
19897 lines (18865 loc) · 668 KB
/
sp_int.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* sp_int.c
*
* Copyright (C) 2006-2025 wolfSSL Inc.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
/* Implementation by Sean Parkinson. */
/*
DESCRIPTION
This library provides single precision (SP) integer math functions.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
#if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
defined(WOLFSSL_SP_NO_MALLOC)
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
!defined(WOLFSSL_SP_NO_DYN_STACK)
#pragma GCC diagnostic push
/* We are statically declaring a variable smaller than sp_int.
* We track available memory in the 'size' field.
* Disable warnings of sp_int being partly outside array bounds of variable.
*/
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
#endif
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
#define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
/* SP Build Options:
* WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
* WOLFSSL_HAVE_SP_DH: Enable SP DH support
* WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
* WOLFSSL_SP_MATH: Use only single precision math and algorithms
* it supports (no fastmath tfm.c or normal integer.c)
* WOLFSSL_SP_MATH_ALL Implementation of all MP functions
* (replacement for tfm.c and integer.c)
* WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
* stack variables
* WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
* WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
* WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
* WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
* WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
* WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
* WOLFSSL_SP_521 Enable ECC 521-bit SECP521R1 support
* WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
* WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
* WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
* WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
* WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
* WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
* (used with -mthumb)
* WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
* WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
* WOLFSSL_SP_ARM64 Enable Aarch64 assembly speedups
* WOLFSSL_SP_ARM32 Enable ARM32 assembly speedups
* WOLFSSL_SP_ARM32_UDIV Enable word divide asm that uses UDIV instr
* WOLFSSL_SP_ARM_THUMB Enable ARM Thumb assembly speedups
* (explicitly uses register 'r7')
* WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
* WOLFSSL_SP_PPC Enable PPC assembly speedups
* WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
* WOLFSSL_SP_MIPS Enable MIPS assembly speedups
* WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
* WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
* WOLFSSL_SP_S390X Enable S390X assembly speedups
* SP_WORD_SIZE Force 32 or 64 bit mode
* WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
* will return FP_WOULDBLOCK for long operations and function must be
* called again until complete.
* WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
* exponentiation implementation.
* WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
* WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
* pointer is not allowed.
* WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
* Dynamic arrays used when not small stack.
* WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
* WOLFSSL_SP_LOW_MEM Use algorithms that use less memory.
*/
/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
#if defined(__clang__) && defined(__clang_major__) && \
(__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
#undef WOLFSSL_SP_SMALL
#endif
#include <wolfssl/wolfcrypt/sp_int.h>
#if defined(WOLFSSL_LINUXKM) && !defined(WOLFSSL_SP_ASM)
/* force off unneeded vector register save/restore. */
#undef SAVE_VECTOR_REGISTERS
#define SAVE_VECTOR_REGISTERS(fail_clause) WC_DO_NOTHING
#undef RESTORE_VECTOR_REGISTERS
#define RESTORE_VECTOR_REGISTERS() WC_DO_NOTHING
#endif
/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
!defined(WOLFSSL_SP_NO_MALLOC)
/* Declare a variable that will be assigned a value on XMALLOC. */
#define DECL_SP_INT(n, s) \
sp_int* n = NULL
#else
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
!defined(WOLFSSL_SP_NO_DYN_STACK)
/* Declare a variable on the stack with the required data size. */
#define DECL_SP_INT(n, s) \
byte n##d[MP_INT_SIZEOF(s)]; \
sp_int* (n) = (sp_int*)n##d
#else
/* Declare a variable on the stack. */
#define DECL_SP_INT(n, s) \
sp_int n[1]
#endif
#endif
/* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
!defined(WOLFSSL_SP_NO_MALLOC)
/* Dynamically allocate just enough data to support size. */
#define ALLOC_SP_INT(n, s, err, h) \
do { \
if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
(err) = MP_VAL; \
} \
if ((err) == MP_OKAY) { \
(n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), \
DYNAMIC_TYPE_BIGINT); \
if ((n) == NULL) { \
(err) = MP_MEM; \
} \
} \
} \
while (0)
/* Dynamically allocate just enough data to support size - and set size. */
#define ALLOC_SP_INT_SIZE(n, s, err, h) \
do { \
ALLOC_SP_INT(n, s, err, h); \
if ((err) == MP_OKAY) { \
(n)->size = (sp_size_t)(s); \
} \
} \
while (0)
#else
/* Array declared on stack - check size is valid. */
#define ALLOC_SP_INT(n, s, err, h) \
do { \
if (((err) == MP_OKAY) && ((s) > (int)SP_INT_DIGITS)) { \
(err) = MP_VAL; \
} \
} \
while (0)
/* Array declared on stack - set the size field. */
#define ALLOC_SP_INT_SIZE(n, s, err, h) \
do { \
ALLOC_SP_INT(n, s, err, h); \
if ((err) == MP_OKAY) { \
(n)->size = (sp_size_t)(s); \
} \
} \
while (0)
#endif
/* FREE_SP_INT: Free an 'sp_int' variable. */
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
!defined(WOLFSSL_SP_NO_MALLOC)
/* Free dynamically allocated data. */
#define FREE_SP_INT(n, h) \
do { \
if ((n) != NULL) { \
XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
} \
} \
while (0)
#else
/* Nothing to do as declared on stack. */
#define FREE_SP_INT(n, h) WC_DO_NOTHING
#endif
/* Declare a variable that will be assigned a value on XMALLOC. */
#define DECL_DYN_SP_INT_ARRAY(n, s, c) \
sp_int* n##d = NULL; \
sp_int* (n)[c]; \
void *n ## _dummy_var = XMEMSET(n, 0, sizeof(n))
/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
!defined(WOLFSSL_SP_NO_MALLOC)
/* Declare a variable that will be assigned a value on XMALLOC. */
#define DECL_SP_INT_ARRAY(n, s, c) \
DECL_DYN_SP_INT_ARRAY(n, s, c)
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
!defined(WOLFSSL_SP_NO_DYN_STACK)
/* Declare a variable on the stack with the required data size. */
#define DECL_SP_INT_ARRAY(n, s, c) \
byte n##d[MP_INT_SIZEOF(s) * (c)]; \
sp_int* (n)[c] = { NULL, }
#else
/* Declare a variable on the stack. */
#define DECL_SP_INT_ARRAY(n, s, c) \
sp_int n##d[c]; \
sp_int* (n)[c]
#endif
/* Dynamically allocate just enough data to support multiple sp_ints of the
* required size. Use pointers into data to make up array and set sizes.
*/
#define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h) \
do { \
(void)n ## _dummy_var; \
if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
(err) = MP_VAL; \
} \
if ((err) == MP_OKAY) { \
n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h), \
DYNAMIC_TYPE_BIGINT); \
if (n##d == NULL) { \
(err) = MP_MEM; \
} \
else { \
int n##ii; \
(n)[0] = n##d; \
(n)[0]->size = (sp_size_t)(s); \
for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
(n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
(n)[n##ii]->size = (sp_size_t)(s); \
} \
} \
} \
} \
while (0)
/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
!defined(WOLFSSL_SP_NO_MALLOC)
#define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
!defined(WOLFSSL_SP_NO_DYN_STACK)
/* Data declared on stack that supports multiple sp_ints of the
* required size. Use pointers into data to make up array and set sizes.
*/
#define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
do { \
if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
(err) = MP_VAL; \
} \
if ((err) == MP_OKAY) { \
int n##ii; \
(n)[0] = (sp_int*)n##d; \
((sp_int_minimal*)(n)[0])->size = (sp_size_t)(s); \
for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
(n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
((sp_int_minimal*)(n)[n##ii])->size = (sp_size_t)(s); \
} \
} \
} \
while (0)
#else
/* Data declared on stack that supports multiple sp_ints of the
* required size. Set into array and set sizes.
*/
#define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
do { \
if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
(err) = MP_VAL; \
} \
if ((err) == MP_OKAY) { \
int n##ii; \
for (n##ii = 0; n##ii < (int)(c); n##ii++) { \
(n)[n##ii] = &n##d[n##ii]; \
(n)[n##ii]->size = (sp_size_t)(s); \
} \
} \
} \
while (0)
#endif
/* Free data variable that was dynamically allocated. */
#define FREE_DYN_SP_INT_ARRAY(n, h) \
do { \
if (n##d != NULL) { \
XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
} \
} \
while (0)
/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
!defined(WOLFSSL_SP_NO_MALLOC)
#define FREE_SP_INT_ARRAY(n, h) \
FREE_DYN_SP_INT_ARRAY(n, h)
#else
/* Nothing to do as data declared on stack. */
#define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
#endif
#ifndef WOLFSSL_NO_ASM
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif
#if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
/*
* CPU: x86_64
*/
#ifndef _MSC_VER
/* Multiply va by vb and store double size result in: vh | vl */
#define SP_ASM_MUL(vl, vh, va, vb) \
__asm__ __volatile__ ( \
"movq %[b], %%rax \n\t" \
"mulq %[a] \n\t" \
"movq %%rax, %[l] \n\t" \
"movq %%rdx, %[h] \n\t" \
: [h] "+r" (vh), [l] "+r" (vl) \
: [a] "m" (va), [b] "m" (vb) \
: "memory", "%rax", "%rdx", "cc" \
)
/* Multiply va by vb and store double size result in: vo | vh | vl */
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movq %[b], %%rax \n\t" \
"mulq %[a] \n\t" \
"movq $0 , %[o] \n\t" \
"movq %%rax, %[l] \n\t" \
"movq %%rdx, %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
: [a] "m" (va), [b] "m" (vb) \
: "%rax", "%rdx", "cc" \
)
/* Multiply va by vb and add double size result into: vo | vh | vl */
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movq %[b], %%rax \n\t" \
"mulq %[a] \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
"adcq $0 , %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "m" (va), [b] "m" (vb) \
: "%rax", "%rdx", "cc" \
)
/* Multiply va by vb and add double size result into: vh | vl */
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
__asm__ __volatile__ ( \
"movq %[b], %%rax \n\t" \
"mulq %[a] \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va), [b] "m" (vb) \
: "%rax", "%rdx", "cc" \
)
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movq %[b], %%rax \n\t" \
"mulq %[a] \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
"adcq $0 , %[o] \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
"adcq $0 , %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "m" (va), [b] "m" (vb) \
: "%rax", "%rdx", "cc" \
)
/* Multiply va by vb and add double size result twice into: vo | vh | vl
* Assumes first add will not overflow vh | vl
*/
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movq %[b], %%rax \n\t" \
"mulq %[a] \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
"adcq $0 , %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "m" (va), [b] "m" (vb) \
: "%rax", "%rdx", "cc" \
)
/* Square va and store double size result in: vh | vl */
#define SP_ASM_SQR(vl, vh, va) \
__asm__ __volatile__ ( \
"movq %[a], %%rax \n\t" \
"mulq %%rax \n\t" \
"movq %%rax, %[l] \n\t" \
"movq %%rdx, %[h] \n\t" \
: [h] "+r" (vh), [l] "+r" (vl) \
: [a] "m" (va) \
: "memory", "%rax", "%rdx", "cc" \
)
/* Square va and add double size result into: vo | vh | vl */
#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
__asm__ __volatile__ ( \
"movq %[a], %%rax \n\t" \
"mulq %%rax \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
"adcq $0 , %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "m" (va) \
: "%rax", "%rdx", "cc" \
)
/* Square va and add double size result into: vh | vl */
#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
__asm__ __volatile__ ( \
"movq %[a], %%rax \n\t" \
"mulq %%rax \n\t" \
"addq %%rax, %[l] \n\t" \
"adcq %%rdx, %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va) \
: "%rax", "%rdx", "cc" \
)
/* Add va into: vh | vl */
#define SP_ASM_ADDC(vl, vh, va) \
__asm__ __volatile__ ( \
"addq %[a], %[l] \n\t" \
"adcq $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va) \
: "cc" \
)
/* Add va, variable in a register, into: vh | vl */
#define SP_ASM_ADDC_REG(vl, vh, va) \
__asm__ __volatile__ ( \
"addq %[a], %[l] \n\t" \
"adcq $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "r" (va) \
: "cc" \
)
/* Sub va from: vh | vl */
#define SP_ASM_SUBB(vl, vh, va) \
__asm__ __volatile__ ( \
"subq %[a], %[l] \n\t" \
"sbbq $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va) \
: "cc" \
)
/* Sub va from: vh | vl */
#define SP_ASM_SUBB_REG(vl, vh, va) \
__asm__ __volatile__ ( \
"subq %[a], %[l] \n\t" \
"sbbq $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "r" (va) \
: "cc" \
)
/* Add two times vc | vb | va into vo | vh | vl */
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
__asm__ __volatile__ ( \
"addq %[a], %[l] \n\t" \
"adcq %[b], %[h] \n\t" \
"adcq %[c], %[o] \n\t" \
"addq %[a], %[l] \n\t" \
"adcq %[b], %[h] \n\t" \
"adcq %[c], %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
: "cc" \
)
/* Index of highest bit set. */
#define SP_ASM_HI_BIT_SET_IDX(va, vi) \
__asm__ __volatile__ ( \
"bsr %[a], %[i] \n\t" \
: [i] "=r" (vi) \
: [a] "r" (va) \
: "cc" \
)
#else
#include <intrin.h>
/* Multiply va by vb and store double size result in: vh | vl */
#define SP_ASM_MUL(vl, vh, va, vb) \
vl = _umul128(va, vb, &vh)
/* Multiply va by vb and store double size result in: vo | vh | vl */
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
do { \
vl = _umul128(va, vb, &vh); \
vo = 0; \
} \
while (0)
/* Multiply va by vb and add double size result into: vo | vh | vl */
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
do { \
unsigned __int64 vtl, vth; \
unsigned char c; \
vtl = _umul128(va, vb, &vth); \
c = _addcarry_u64(0, vl, vtl, &vl); \
c = _addcarry_u64(c, vh, vth, &vh); \
_addcarry_u64(c, vo, 0, &vo); \
} \
while (0)
/* Multiply va by vb and add double size result into: vh | vl */
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
do { \
unsigned __int64 vtl, vth; \
unsigned char c; \
vtl = _umul128(va, vb, &vth); \
c = _addcarry_u64(0, vl, vtl, &vl); \
_addcarry_u64(c, vh, vth, &vh); \
} \
while (0)
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
do { \
unsigned __int64 vtl, vth; \
unsigned char c; \
vtl = _umul128(va, vb, &vth); \
c = _addcarry_u64(0, vl, vtl, &vl); \
c = _addcarry_u64(c, vh, vth, &vh); \
_addcarry_u64(c, vo, 0, &vo); \
c = _addcarry_u64(0, vl, vtl, &vl); \
c = _addcarry_u64(c, vh, vth, &vh); \
_addcarry_u64(c, vo, 0, &vo); \
} \
while (0)
/* Multiply va by vb and add double size result twice into: vo | vh | vl
* Assumes first add will not overflow vh | vl
*/
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
do { \
unsigned __int64 vtl, vth; \
unsigned char c; \
vtl = _umul128(va, vb, &vth); \
c = _addcarry_u64(0, vl, vtl, &vl); \
_addcarry_u64(c, vh, vth, &vh); \
c = _addcarry_u64(0, vl, vtl, &vl); \
c = _addcarry_u64(c, vh, vth, &vh); \
_addcarry_u64(c, vo, 0, &vo); \
} \
while (0)
/* Square va and store double size result in: vh | vl */
#define SP_ASM_SQR(vl, vh, va) \
vl = _umul128(va, va, &vh)
/* Square va and add double size result into: vo | vh | vl */
#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
do { \
unsigned __int64 vtl, vth; \
unsigned char c; \
vtl = _umul128(va, va, &vth); \
c = _addcarry_u64(0, vl, vtl, &vl); \
c = _addcarry_u64(c, vh, vth, &vh); \
_addcarry_u64(c, vo, 0, &vo); \
} \
while (0)
/* Square va and add double size result into: vh | vl */
#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
do { \
unsigned __int64 vtl, vth; \
unsigned char c; \
vtl = _umul128(va, va, &vth); \
c = _addcarry_u64(0, vl, vtl, &vl); \
_addcarry_u64(c, vh, vth, &vh); \
} \
while (0)
/* Add va into: vh | vl */
#define SP_ASM_ADDC(vl, vh, va) \
do { \
unsigned char c; \
c = _addcarry_u64(0, vl, va, &vl); \
_addcarry_u64(c, vh, 0, &vh); \
} \
while (0)
/* Add va, variable in a register, into: vh | vl */
#define SP_ASM_ADDC_REG(vl, vh, va) \
do { \
unsigned char c; \
c = _addcarry_u64(0, vl, va, &vl); \
_addcarry_u64(c, vh, 0, &vh); \
} \
while (0)
/* Sub va from: vh | vl */
#define SP_ASM_SUBB(vl, vh, va) \
do { \
unsigned char c; \
c = _subborrow_u64(0, vl, va, &vl); \
_subborrow_u64(c, vh, 0, &vh); \
} \
while (0)
/* Add two times vc | vb | va into vo | vh | vl */
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
do { \
unsigned char c; \
c = _addcarry_u64(0, vl, va, &vl); \
c = _addcarry_u64(c, vh, vb, &vh); \
_addcarry_u64(c, vo, vc, &vo); \
c = _addcarry_u64(0, vl, va, &vl); \
c = _addcarry_u64(c, vh, vb, &vh); \
_addcarry_u64(c, vo, vc, &vo); \
} \
while (0)
/* Index of highest bit set. */
#define SP_ASM_HI_BIT_SET_IDX(va, vi) \
do { \
unsigned long idx; \
_BitScanReverse64(&idx, va); \
vi = idx; \
} \
while (0)
#endif
#if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
_MSC_VER >= 1920)
/* Divide a two digit number by a digit number and return. (hi | lo) / d
*
* Using divq instruction on Intel x64.
*
* @param [in] hi SP integer digit. High digit of the dividend.
* @param [in] lo SP integer digit. Lower digit of the dividend.
* @param [in] d SP integer digit. Number to divide by.
* @return The division result.
*/
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
sp_int_digit d)
{
#ifndef _MSC_VER
__asm__ __volatile__ (
"divq %2"
: "+a" (lo)
: "d" (hi), "r" (d)
: "cc"
);
return lo;
#elif defined(_MSC_VER) && _MSC_VER >= 1920
return _udiv128(hi, lo, d, NULL);
#endif
}
#define SP_ASM_DIV_WORD
#endif
#define SP_INT_ASM_AVAILABLE
#endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
#if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
/*
* CPU: x86
*/
/* Multiply va by vb and store double size result in: vh | vl */
#define SP_ASM_MUL(vl, vh, va, vb) \
__asm__ __volatile__ ( \
"movl %[b], %%eax \n\t" \
"mull %[a] \n\t" \
"movl %%eax, %[l] \n\t" \
"movl %%edx, %[h] \n\t" \
: [h] "+r" (vh), [l] "+r" (vl) \
: [a] "m" (va), [b] "m" (vb) \
: "memory", "eax", "edx", "cc" \
)
/* Multiply va by vb and store double size result in: vo | vh | vl */
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movl %[b], %%eax \n\t" \
"mull %[a] \n\t" \
"movl $0 , %[o] \n\t" \
"movl %%eax, %[l] \n\t" \
"movl %%edx, %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
: [a] "m" (va), [b] "m" (vb) \
: "eax", "edx", "cc" \
)
/* Multiply va by vb and add double size result into: vo | vh | vl */
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movl %[b], %%eax \n\t" \
"mull %[a] \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
"adcl $0 , %[o] \n\t" \
: [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
: [a] "r" (va), [b] "r" (vb) \
: "eax", "edx", "cc" \
)
/* Multiply va by vb and add double size result into: vh | vl */
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
__asm__ __volatile__ ( \
"movl %[b], %%eax \n\t" \
"mull %[a] \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va), [b] "m" (vb) \
: "eax", "edx", "cc" \
)
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movl %[b], %%eax \n\t" \
"mull %[a] \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
"adcl $0 , %[o] \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
"adcl $0 , %[o] \n\t" \
: [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
: [a] "r" (va), [b] "r" (vb) \
: "eax", "edx", "cc" \
)
/* Multiply va by vb and add double size result twice into: vo | vh | vl
* Assumes first add will not overflow vh | vl
*/
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"movl %[b], %%eax \n\t" \
"mull %[a] \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
"adcl $0 , %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "m" (va), [b] "m" (vb) \
: "eax", "edx", "cc" \
)
/* Square va and store double size result in: vh | vl */
#define SP_ASM_SQR(vl, vh, va) \
__asm__ __volatile__ ( \
"movl %[a], %%eax \n\t" \
"mull %%eax \n\t" \
"movl %%eax, %[l] \n\t" \
"movl %%edx, %[h] \n\t" \
: [h] "+r" (vh), [l] "+r" (vl) \
: [a] "m" (va) \
: "memory", "eax", "edx", "cc" \
)
/* Square va and add double size result into: vo | vh | vl */
#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
__asm__ __volatile__ ( \
"movl %[a], %%eax \n\t" \
"mull %%eax \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
"adcl $0 , %[o] \n\t" \
: [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
: [a] "m" (va) \
: "eax", "edx", "cc" \
)
/* Square va and add double size result into: vh | vl */
#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
__asm__ __volatile__ ( \
"movl %[a], %%eax \n\t" \
"mull %%eax \n\t" \
"addl %%eax, %[l] \n\t" \
"adcl %%edx, %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va) \
: "eax", "edx", "cc" \
)
/* Add va into: vh | vl */
#define SP_ASM_ADDC(vl, vh, va) \
__asm__ __volatile__ ( \
"addl %[a], %[l] \n\t" \
"adcl $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va) \
: "cc" \
)
/* Add va, variable in a register, into: vh | vl */
#define SP_ASM_ADDC_REG(vl, vh, va) \
__asm__ __volatile__ ( \
"addl %[a], %[l] \n\t" \
"adcl $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "r" (va) \
: "cc" \
)
/* Sub va from: vh | vl */
#define SP_ASM_SUBB(vl, vh, va) \
__asm__ __volatile__ ( \
"subl %[a], %[l] \n\t" \
"sbbl $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "m" (va) \
: "cc" \
)
/* Sub va from: vh | vl */
#define SP_ASM_SUBB_REG(vl, vh, va) \
__asm__ __volatile__ ( \
"subl %[a], %[l] \n\t" \
"sbbl $0 , %[h] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "r" (va) \
: "cc" \
)
/* Add two times vc | vb | va into vo | vh | vl */
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
__asm__ __volatile__ ( \
"addl %[a], %[l] \n\t" \
"adcl %[b], %[h] \n\t" \
"adcl %[c], %[o] \n\t" \
"addl %[a], %[l] \n\t" \
"adcl %[b], %[h] \n\t" \
"adcl %[c], %[o] \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
: "cc" \
)
/* Index of highest bit set. */
#define SP_ASM_HI_BIT_SET_IDX(va, vi) \
__asm__ __volatile__ ( \
"bsr %[a], %[i] \n\t" \
: [i] "=r" (vi) \
: [a] "r" (va) \
: "cc" \
)
#ifndef WOLFSSL_SP_DIV_WORD_HALF
/* Divide a two digit number by a digit number and return. (hi | lo) / d
*
* Using divl instruction on Intel x64.
*
* @param [in] hi SP integer digit. High digit of the dividend.
* @param [in] lo SP integer digit. Lower digit of the dividend.
* @param [in] d SP integer digit. Number to divide by.
* @return The division result.
*/
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
sp_int_digit d)
{
__asm__ __volatile__ (
"divl %2"
: "+a" (lo)
: "d" (hi), "r" (d)
: "cc"
);
return lo;
}
#define SP_ASM_DIV_WORD
#endif
#define SP_INT_ASM_AVAILABLE
#endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
#if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
/*
* CPU: Aarch64
*/
/* Multiply va by vb and store double size result in: vh | vl */
#define SP_ASM_MUL(vl, vh, va, vb) \
__asm__ __volatile__ ( \
"mul %[l], %[a], %[b] \n\t" \
"umulh %[h], %[a], %[b] \n\t" \
: [h] "+r" (vh), [l] "+r" (vl) \
: [a] "r" (va), [b] "r" (vb) \
: "memory", "cc" \
)
/* Multiply va by vb and store double size result in: vo | vh | vl */
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[b] \n\t" \
"umulh %[h], %[a], %[b] \n\t" \
"mov %[l], x8 \n\t" \
"mov %[o], xzr \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
: [a] "r" (va), [b] "r" (vb) \
: "x8" \
)
/* Multiply va by vb and add double size result into: vo | vh | vl */
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[b] \n\t" \
"umulh x9, %[a], %[b] \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adcs %[h], %[h], x9 \n\t" \
"adc %[o], %[o], xzr \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "r" (va), [b] "r" (vb) \
: "x8", "x9", "cc" \
)
/* Multiply va by vb and add double size result into: vh | vl */
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[b] \n\t" \
"umulh x9, %[a], %[b] \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adc %[h], %[h], x9 \n\t" \
: [l] "+r" (vl), [h] "+r" (vh) \
: [a] "r" (va), [b] "r" (vb) \
: "x8", "x9", "cc" \
)
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[b] \n\t" \
"umulh x9, %[a], %[b] \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adcs %[h], %[h], x9 \n\t" \
"adc %[o], %[o], xzr \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adcs %[h], %[h], x9 \n\t" \
"adc %[o], %[o], xzr \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "r" (va), [b] "r" (vb) \
: "x8", "x9", "cc" \
)
/* Multiply va by vb and add double size result twice into: vo | vh | vl
* Assumes first add will not overflow vh | vl
*/
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[b] \n\t" \
"umulh x9, %[a], %[b] \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adc %[h], %[h], x9 \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adcs %[h], %[h], x9 \n\t" \
"adc %[o], %[o], xzr \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "r" (va), [b] "r" (vb) \
: "x8", "x9", "cc" \
)
/* Square va and store double size result in: vh | vl */
#define SP_ASM_SQR(vl, vh, va) \
__asm__ __volatile__ ( \
"mul %[l], %[a], %[a] \n\t" \
"umulh %[h], %[a], %[a] \n\t" \
: [h] "+r" (vh), [l] "+r" (vl) \
: [a] "r" (va) \
: "memory" \
)
/* Square va and add double size result into: vo | vh | vl */
#define SP_ASM_SQR_ADD(vl, vh, vo, va) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[a] \n\t" \
"umulh x9, %[a], %[a] \n\t" \
"adds %[l], %[l], x8 \n\t" \
"adcs %[h], %[h], x9 \n\t" \
"adc %[o], %[o], xzr \n\t" \
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
: [a] "r" (va) \
: "x8", "x9", "cc" \
)
/* Square va and add double size result into: vh | vl */
#define SP_ASM_SQR_ADD_NO(vl, vh, va) \
__asm__ __volatile__ ( \
"mul x8, %[a], %[a] \n\t" \
"umulh x9, %[a], %[a] \n\t" \
"adds %[l], %[l], x8 \n\t" \