-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfastrandombytes.c
121 lines (107 loc) · 3.74 KB
/
fastrandombytes.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/* Adapted from Intel® Advanced Encryption Standard (Intel® AES) Instructions Set - Rev 3.01
* https://software.intel.com/sites/default/files/article/165683/aes-wp-2012-09-22-v01.pdf
*/
#include "fastrandombytes.h"
#include <string.h>
#include <x86intrin.h>
static __m128i round_key[15];
static __m128i iv;
static const __m128i ONE = { 1, 0 };
static inline void KEY_256_ASSIST_1(__m128i *temp1, __m128i *temp2) {
__m128i temp4;
*temp2 = _mm_shuffle_epi32(*temp2, 0xff);
temp4 = _mm_slli_si128(*temp1, 0x4);
*temp1 = _mm_xor_si128(*temp1, temp4);
temp4 = _mm_slli_si128(temp4, 0x4);
*temp1 = _mm_xor_si128(*temp1, temp4);
temp4 = _mm_slli_si128(temp4, 0x4);
*temp1 = _mm_xor_si128(*temp1, temp4);
*temp1 = _mm_xor_si128(*temp1, *temp2);
}
static inline void KEY_256_ASSIST_2(__m128i *temp1, __m128i *temp3) {
__m128i temp2, temp4;
temp4 = _mm_aeskeygenassist_si128(*temp1, 0x0);
temp2 = _mm_shuffle_epi32(temp4, 0xaa);
temp4 = _mm_slli_si128(*temp3, 0x4);
*temp3 = _mm_xor_si128(*temp3, temp4);
temp4 = _mm_slli_si128(temp4, 0x4);
*temp3 = _mm_xor_si128(*temp3, temp4);
temp4 = _mm_slli_si128(temp4, 0x4);
*temp3 = _mm_xor_si128(*temp3, temp4);
*temp3 = _mm_xor_si128(*temp3, temp2);
}
/* round_key <-- aes256_key_expansion(randomness), iv <-- 0 */
void fastrandombytes_setseed(const unsigned char *randomness) {
__m128i temp1, temp2, temp3;
temp1 = _mm_loadu_si128((__m128i *) randomness);
temp3 = _mm_loadu_si128((__m128i *) (randomness + 16));
round_key[0] = temp1;
round_key[1] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x01);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[2] = temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
round_key[3] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x02);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[4] = temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
round_key[5] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x04);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[6] = temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
round_key[7] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x08);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[8] = temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
round_key[9] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x10);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[10] = temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
round_key[11] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x20);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[12] = temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
round_key[13] = temp3;
temp2 = _mm_aeskeygenassist_si128(temp3, 0x40);
KEY_256_ASSIST_1(&temp1, &temp2);
round_key[14] = temp1;
iv = _mm_setzero_si128();
}
static inline void AES_ctr_round(unsigned char *out) {
__m128i tmp;
tmp = _mm_xor_si128(iv, round_key[0]);
tmp = _mm_aesenc_si128(tmp, round_key[1]);
tmp = _mm_aesenc_si128(tmp, round_key[2]);
tmp = _mm_aesenc_si128(tmp, round_key[3]);
tmp = _mm_aesenc_si128(tmp, round_key[4]);
tmp = _mm_aesenc_si128(tmp, round_key[5]);
tmp = _mm_aesenc_si128(tmp, round_key[6]);
tmp = _mm_aesenc_si128(tmp, round_key[7]);
tmp = _mm_aesenc_si128(tmp, round_key[8]);
tmp = _mm_aesenc_si128(tmp, round_key[9]);
tmp = _mm_aesenc_si128(tmp, round_key[10]);
tmp = _mm_aesenc_si128(tmp, round_key[11]);
tmp = _mm_aesenc_si128(tmp, round_key[12]);
tmp = _mm_aesenc_si128(tmp, round_key[13]);
tmp = _mm_aesenclast_si128(tmp, round_key[14]);
_mm_storeu_si128((__m128i *) out, tmp);
iv = _mm_add_epi32(iv, ONE);
}
/* r <-- aes256_ctr(round_key, iv, rlen) */
void fastrandombytes(unsigned char *r, unsigned long long rlen) {
unsigned char ct[16];
unsigned long long num_of_blocks = rlen >> 4;
unsigned long long i;
for (i = 0; i < num_of_blocks; i++) {
AES_ctr_round(r + (i << 4));
}
if (rlen & 0x0f) {
AES_ctr_round(ct);
memcpy(r + (i << 4), ct, rlen & 0x0f);
}
}