/root/bitcoin/src/crypto/ctaes/ctaes.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************* |
2 | | * Copyright (c) 2016 Pieter Wuille * |
3 | | * Distributed under the MIT software license, see the accompanying * |
4 | | * file COPYING or http://www.opensource.org/licenses/mit-license.php.* |
5 | | **********************************************************************/ |
6 | | |
7 | | /* Constant time, unoptimized, concise, plain C, AES implementation |
8 | | * Based On: |
9 | | * Emilia Kasper and Peter Schwabe, Faster and Timing-Attack Resistant AES-GCM |
10 | | * http://www.iacr.org/archive/ches2009/57470001/57470001.pdf |
11 | | * But using 8 16-bit integers representing a single AES state rather than 8 128-bit |
12 | | * integers representing 8 AES states. |
13 | | */ |
14 | | |
15 | | #include "ctaes.h" |
16 | | |
17 | | /* Slice variable slice_i contains the i'th bit of the 16 state variables in this order: |
18 | | * 0 1 2 3 |
19 | | * 4 5 6 7 |
20 | | * 8 9 10 11 |
21 | | * 12 13 14 15 |
22 | | */ |
23 | | |
24 | | /** Convert a byte to sliced form, storing it corresponding to given row and column in s */ |
25 | 0 | static void LoadByte(AES_state* s, unsigned char byte, int r, int c) { |
26 | 0 | int i; |
27 | 0 | for (i = 0; i < 8; i++) { |
28 | 0 | s->slice[i] |= (byte & 1) << (r * 4 + c); |
29 | 0 | byte >>= 1; |
30 | 0 | } |
31 | 0 | } |
32 | | |
33 | | /** Load 16 bytes of data into 8 sliced integers */ |
34 | 0 | static void LoadBytes(AES_state *s, const unsigned char* data16) { |
35 | 0 | int c; |
36 | 0 | for (c = 0; c < 4; c++) { |
37 | 0 | int r; |
38 | 0 | for (r = 0; r < 4; r++) { |
39 | 0 | LoadByte(s, *(data16++), r, c); |
40 | 0 | } |
41 | 0 | } |
42 | 0 | } |
43 | | |
44 | | /** Convert 8 sliced integers into 16 bytes of data */ |
45 | 0 | static void SaveBytes(unsigned char* data16, const AES_state *s) { |
46 | 0 | int c; |
47 | 0 | for (c = 0; c < 4; c++) { |
48 | 0 | int r; |
49 | 0 | for (r = 0; r < 4; r++) { |
50 | 0 | int b; |
51 | 0 | uint8_t v = 0; |
52 | 0 | for (b = 0; b < 8; b++) { |
53 | 0 | v |= ((s->slice[b] >> (r * 4 + c)) & 1) << b; |
54 | 0 | } |
55 | 0 | *(data16++) = v; |
56 | 0 | } |
57 | 0 | } |
58 | 0 | } |
59 | | |
60 | | /* S-box implementation based on the gate logic from: |
61 | | * Joan Boyar and Rene Peralta, A depth-16 circuit for the AES S-box. |
62 | | * https://eprint.iacr.org/2011/332.pdf |
63 | | */ |
64 | 0 | static void SubBytes(AES_state *s, int inv) { |
65 | | /* Load the bit slices */ |
66 | 0 | uint16_t U0 = s->slice[7], U1 = s->slice[6], U2 = s->slice[5], U3 = s->slice[4]; |
67 | 0 | uint16_t U4 = s->slice[3], U5 = s->slice[2], U6 = s->slice[1], U7 = s->slice[0]; |
68 | |
|
69 | 0 | uint16_t T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16; |
70 | 0 | uint16_t T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, D; |
71 | 0 | uint16_t M1, M6, M11, M13, M15, M20, M21, M22, M23, M25, M37, M38, M39, M40; |
72 | 0 | uint16_t M41, M42, M43, M44, M45, M46, M47, M48, M49, M50, M51, M52, M53, M54; |
73 | 0 | uint16_t M55, M56, M57, M58, M59, M60, M61, M62, M63; |
74 | |
|
75 | 0 | if (inv) { |
76 | 0 | uint16_t R5, R13, R17, R18, R19; |
77 | | /* Undo linear postprocessing */ |
78 | 0 | T23 = U0 ^ U3; |
79 | 0 | T22 = ~(U1 ^ U3); |
80 | 0 | T2 = ~(U0 ^ U1); |
81 | 0 | T1 = U3 ^ U4; |
82 | 0 | T24 = ~(U4 ^ U7); |
83 | 0 | R5 = U6 ^ U7; |
84 | 0 | T8 = ~(U1 ^ T23); |
85 | 0 | T19 = T22 ^ R5; |
86 | 0 | T9 = ~(U7 ^ T1); |
87 | 0 | T10 = T2 ^ T24; |
88 | 0 | T13 = T2 ^ R5; |
89 | 0 | T3 = T1 ^ R5; |
90 | 0 | T25 = ~(U2 ^ T1); |
91 | 0 | R13 = U1 ^ U6; |
92 | 0 | T17 = ~(U2 ^ T19); |
93 | 0 | T20 = T24 ^ R13; |
94 | 0 | T4 = U4 ^ T8; |
95 | 0 | R17 = ~(U2 ^ U5); |
96 | 0 | R18 = ~(U5 ^ U6); |
97 | 0 | R19 = ~(U2 ^ U4); |
98 | 0 | D = U0 ^ R17; |
99 | 0 | T6 = T22 ^ R17; |
100 | 0 | T16 = R13 ^ R19; |
101 | 0 | T27 = T1 ^ R18; |
102 | 0 | T15 = T10 ^ T27; |
103 | 0 | T14 = T10 ^ R18; |
104 | 0 | T26 = T3 ^ T16; |
105 | 0 | } else { |
106 | | /* Linear preprocessing. */ |
107 | 0 | T1 = U0 ^ U3; |
108 | 0 | T2 = U0 ^ U5; |
109 | 0 | T3 = U0 ^ U6; |
110 | 0 | T4 = U3 ^ U5; |
111 | 0 | T5 = U4 ^ U6; |
112 | 0 | T6 = T1 ^ T5; |
113 | 0 | T7 = U1 ^ U2; |
114 | 0 | T8 = U7 ^ T6; |
115 | 0 | T9 = U7 ^ T7; |
116 | 0 | T10 = T6 ^ T7; |
117 | 0 | T11 = U1 ^ U5; |
118 | 0 | T12 = U2 ^ U5; |
119 | 0 | T13 = T3 ^ T4; |
120 | 0 | T14 = T6 ^ T11; |
121 | 0 | T15 = T5 ^ T11; |
122 | 0 | T16 = T5 ^ T12; |
123 | 0 | T17 = T9 ^ T16; |
124 | 0 | T18 = U3 ^ U7; |
125 | 0 | T19 = T7 ^ T18; |
126 | 0 | T20 = T1 ^ T19; |
127 | 0 | T21 = U6 ^ U7; |
128 | 0 | T22 = T7 ^ T21; |
129 | 0 | T23 = T2 ^ T22; |
130 | 0 | T24 = T2 ^ T10; |
131 | 0 | T25 = T20 ^ T17; |
132 | 0 | T26 = T3 ^ T16; |
133 | 0 | T27 = T1 ^ T12; |
134 | 0 | D = U7; |
135 | 0 | } |
136 | | |
137 | | /* Non-linear transformation (shared between the forward and backward case) */ |
138 | 0 | M1 = T13 & T6; |
139 | 0 | M6 = T3 & T16; |
140 | 0 | M11 = T1 & T15; |
141 | 0 | M13 = (T4 & T27) ^ M11; |
142 | 0 | M15 = (T2 & T10) ^ M11; |
143 | 0 | M20 = T14 ^ M1 ^ (T23 & T8) ^ M13; |
144 | 0 | M21 = (T19 & D) ^ M1 ^ T24 ^ M15; |
145 | 0 | M22 = T26 ^ M6 ^ (T22 & T9) ^ M13; |
146 | 0 | M23 = (T20 & T17) ^ M6 ^ M15 ^ T25; |
147 | 0 | M25 = M22 & M20; |
148 | 0 | M37 = M21 ^ ((M20 ^ M21) & (M23 ^ M25)); |
149 | 0 | M38 = M20 ^ M25 ^ (M21 | (M20 & M23)); |
150 | 0 | M39 = M23 ^ ((M22 ^ M23) & (M21 ^ M25)); |
151 | 0 | M40 = M22 ^ M25 ^ (M23 | (M21 & M22)); |
152 | 0 | M41 = M38 ^ M40; |
153 | 0 | M42 = M37 ^ M39; |
154 | 0 | M43 = M37 ^ M38; |
155 | 0 | M44 = M39 ^ M40; |
156 | 0 | M45 = M42 ^ M41; |
157 | 0 | M46 = M44 & T6; |
158 | 0 | M47 = M40 & T8; |
159 | 0 | M48 = M39 & D; |
160 | 0 | M49 = M43 & T16; |
161 | 0 | M50 = M38 & T9; |
162 | 0 | M51 = M37 & T17; |
163 | 0 | M52 = M42 & T15; |
164 | 0 | M53 = M45 & T27; |
165 | 0 | M54 = M41 & T10; |
166 | 0 | M55 = M44 & T13; |
167 | 0 | M56 = M40 & T23; |
168 | 0 | M57 = M39 & T19; |
169 | 0 | M58 = M43 & T3; |
170 | 0 | M59 = M38 & T22; |
171 | 0 | M60 = M37 & T20; |
172 | 0 | M61 = M42 & T1; |
173 | 0 | M62 = M45 & T4; |
174 | 0 | M63 = M41 & T2; |
175 | |
|
176 | 0 | if (inv){ |
177 | | /* Undo linear preprocessing */ |
178 | 0 | uint16_t P0 = M52 ^ M61; |
179 | 0 | uint16_t P1 = M58 ^ M59; |
180 | 0 | uint16_t P2 = M54 ^ M62; |
181 | 0 | uint16_t P3 = M47 ^ M50; |
182 | 0 | uint16_t P4 = M48 ^ M56; |
183 | 0 | uint16_t P5 = M46 ^ M51; |
184 | 0 | uint16_t P6 = M49 ^ M60; |
185 | 0 | uint16_t P7 = P0 ^ P1; |
186 | 0 | uint16_t P8 = M50 ^ M53; |
187 | 0 | uint16_t P9 = M55 ^ M63; |
188 | 0 | uint16_t P10 = M57 ^ P4; |
189 | 0 | uint16_t P11 = P0 ^ P3; |
190 | 0 | uint16_t P12 = M46 ^ M48; |
191 | 0 | uint16_t P13 = M49 ^ M51; |
192 | 0 | uint16_t P14 = M49 ^ M62; |
193 | 0 | uint16_t P15 = M54 ^ M59; |
194 | 0 | uint16_t P16 = M57 ^ M61; |
195 | 0 | uint16_t P17 = M58 ^ P2; |
196 | 0 | uint16_t P18 = M63 ^ P5; |
197 | 0 | uint16_t P19 = P2 ^ P3; |
198 | 0 | uint16_t P20 = P4 ^ P6; |
199 | 0 | uint16_t P22 = P2 ^ P7; |
200 | 0 | uint16_t P23 = P7 ^ P8; |
201 | 0 | uint16_t P24 = P5 ^ P7; |
202 | 0 | uint16_t P25 = P6 ^ P10; |
203 | 0 | uint16_t P26 = P9 ^ P11; |
204 | 0 | uint16_t P27 = P10 ^ P18; |
205 | 0 | uint16_t P28 = P11 ^ P25; |
206 | 0 | uint16_t P29 = P15 ^ P20; |
207 | 0 | s->slice[7] = P13 ^ P22; |
208 | 0 | s->slice[6] = P26 ^ P29; |
209 | 0 | s->slice[5] = P17 ^ P28; |
210 | 0 | s->slice[4] = P12 ^ P22; |
211 | 0 | s->slice[3] = P23 ^ P27; |
212 | 0 | s->slice[2] = P19 ^ P24; |
213 | 0 | s->slice[1] = P14 ^ P23; |
214 | 0 | s->slice[0] = P9 ^ P16; |
215 | 0 | } else { |
216 | | /* Linear postprocessing */ |
217 | 0 | uint16_t L0 = M61 ^ M62; |
218 | 0 | uint16_t L1 = M50 ^ M56; |
219 | 0 | uint16_t L2 = M46 ^ M48; |
220 | 0 | uint16_t L3 = M47 ^ M55; |
221 | 0 | uint16_t L4 = M54 ^ M58; |
222 | 0 | uint16_t L5 = M49 ^ M61; |
223 | 0 | uint16_t L6 = M62 ^ L5; |
224 | 0 | uint16_t L7 = M46 ^ L3; |
225 | 0 | uint16_t L8 = M51 ^ M59; |
226 | 0 | uint16_t L9 = M52 ^ M53; |
227 | 0 | uint16_t L10 = M53 ^ L4; |
228 | 0 | uint16_t L11 = M60 ^ L2; |
229 | 0 | uint16_t L12 = M48 ^ M51; |
230 | 0 | uint16_t L13 = M50 ^ L0; |
231 | 0 | uint16_t L14 = M52 ^ M61; |
232 | 0 | uint16_t L15 = M55 ^ L1; |
233 | 0 | uint16_t L16 = M56 ^ L0; |
234 | 0 | uint16_t L17 = M57 ^ L1; |
235 | 0 | uint16_t L18 = M58 ^ L8; |
236 | 0 | uint16_t L19 = M63 ^ L4; |
237 | 0 | uint16_t L20 = L0 ^ L1; |
238 | 0 | uint16_t L21 = L1 ^ L7; |
239 | 0 | uint16_t L22 = L3 ^ L12; |
240 | 0 | uint16_t L23 = L18 ^ L2; |
241 | 0 | uint16_t L24 = L15 ^ L9; |
242 | 0 | uint16_t L25 = L6 ^ L10; |
243 | 0 | uint16_t L26 = L7 ^ L9; |
244 | 0 | uint16_t L27 = L8 ^ L10; |
245 | 0 | uint16_t L28 = L11 ^ L14; |
246 | 0 | uint16_t L29 = L11 ^ L17; |
247 | 0 | s->slice[7] = L6 ^ L24; |
248 | 0 | s->slice[6] = ~(L16 ^ L26); |
249 | 0 | s->slice[5] = ~(L19 ^ L28); |
250 | 0 | s->slice[4] = L6 ^ L21; |
251 | 0 | s->slice[3] = L20 ^ L22; |
252 | 0 | s->slice[2] = L25 ^ L29; |
253 | 0 | s->slice[1] = ~(L13 ^ L27); |
254 | 0 | s->slice[0] = ~(L6 ^ L23); |
255 | 0 | } |
256 | 0 | } |
257 | | |
258 | 0 | #define BIT_RANGE(from,to) (((1 << ((to) - (from))) - 1) << (from)) |
259 | | |
260 | 0 | #define BIT_RANGE_LEFT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) << (shift)) |
261 | 0 | #define BIT_RANGE_RIGHT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) >> (shift)) |
262 | | |
263 | 0 | static void ShiftRows(AES_state* s) { |
264 | 0 | int i; |
265 | 0 | for (i = 0; i < 8; i++) { |
266 | 0 | uint16_t v = s->slice[i]; |
267 | 0 | s->slice[i] = |
268 | 0 | (v & BIT_RANGE(0, 4)) | |
269 | 0 | BIT_RANGE_LEFT(v, 4, 5, 3) | BIT_RANGE_RIGHT(v, 5, 8, 1) | |
270 | 0 | BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) | |
271 | 0 | BIT_RANGE_LEFT(v, 12, 15, 1) | BIT_RANGE_RIGHT(v, 15, 16, 3); |
272 | 0 | } |
273 | 0 | } |
274 | | |
275 | 0 | static void InvShiftRows(AES_state* s) { |
276 | 0 | int i; |
277 | 0 | for (i = 0; i < 8; i++) { |
278 | 0 | uint16_t v = s->slice[i]; |
279 | 0 | s->slice[i] = |
280 | 0 | (v & BIT_RANGE(0, 4)) | |
281 | 0 | BIT_RANGE_LEFT(v, 4, 7, 1) | BIT_RANGE_RIGHT(v, 7, 8, 3) | |
282 | 0 | BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) | |
283 | 0 | BIT_RANGE_LEFT(v, 12, 13, 3) | BIT_RANGE_RIGHT(v, 13, 16, 1); |
284 | 0 | } |
285 | 0 | } |
286 | | |
287 | 0 | #define ROT(x,b) (((x) >> ((b) * 4)) | ((x) << ((4-(b)) * 4))) |
288 | | |
289 | 0 | static void MixColumns(AES_state* s, int inv) { |
290 | | /* The MixColumns transform treats the bytes of the columns of the state as |
291 | | * coefficients of a 3rd degree polynomial over GF(2^8) and multiplies them |
292 | | * by the fixed polynomial a(x) = {03}x^3 + {01}x^2 + {01}x + {02}, modulo |
293 | | * x^4 + {01}. |
294 | | * |
295 | | * In the inverse transform, we multiply by the inverse of a(x), |
296 | | * a^-1(x) = {0b}x^3 + {0d}x^2 + {09}x + {0e}. This is equal to |
297 | | * a(x) * ({04}x^2 + {05}), so we can reuse the forward transform's code |
298 | | * (found in OpenSSL's bsaes-x86_64.pl, attributed to Jussi Kivilinna) |
299 | | * |
300 | | * In the bitsliced representation, a multiplication of every column by x |
301 | | * mod x^4 + 1 is simply a right rotation. |
302 | | */ |
303 | | |
304 | | /* Shared for both directions is a multiplication by a(x), which can be |
305 | | * rewritten as (x^3 + x^2 + x) + {02}*(x^3 + {01}). |
306 | | * |
307 | | * First compute s into the s? variables, (x^3 + {01}) * s into the s?_01 |
308 | | * variables and (x^3 + x^2 + x)*s into the s?_123 variables. |
309 | | */ |
310 | 0 | uint16_t s0 = s->slice[0], s1 = s->slice[1], s2 = s->slice[2], s3 = s->slice[3]; |
311 | 0 | uint16_t s4 = s->slice[4], s5 = s->slice[5], s6 = s->slice[6], s7 = s->slice[7]; |
312 | 0 | uint16_t s0_01 = s0 ^ ROT(s0, 1), s0_123 = ROT(s0_01, 1) ^ ROT(s0, 3); |
313 | 0 | uint16_t s1_01 = s1 ^ ROT(s1, 1), s1_123 = ROT(s1_01, 1) ^ ROT(s1, 3); |
314 | 0 | uint16_t s2_01 = s2 ^ ROT(s2, 1), s2_123 = ROT(s2_01, 1) ^ ROT(s2, 3); |
315 | 0 | uint16_t s3_01 = s3 ^ ROT(s3, 1), s3_123 = ROT(s3_01, 1) ^ ROT(s3, 3); |
316 | 0 | uint16_t s4_01 = s4 ^ ROT(s4, 1), s4_123 = ROT(s4_01, 1) ^ ROT(s4, 3); |
317 | 0 | uint16_t s5_01 = s5 ^ ROT(s5, 1), s5_123 = ROT(s5_01, 1) ^ ROT(s5, 3); |
318 | 0 | uint16_t s6_01 = s6 ^ ROT(s6, 1), s6_123 = ROT(s6_01, 1) ^ ROT(s6, 3); |
319 | 0 | uint16_t s7_01 = s7 ^ ROT(s7, 1), s7_123 = ROT(s7_01, 1) ^ ROT(s7, 3); |
320 | | /* Now compute s = s?_123 + {02} * s?_01. */ |
321 | 0 | s->slice[0] = s7_01 ^ s0_123; |
322 | 0 | s->slice[1] = s7_01 ^ s0_01 ^ s1_123; |
323 | 0 | s->slice[2] = s1_01 ^ s2_123; |
324 | 0 | s->slice[3] = s7_01 ^ s2_01 ^ s3_123; |
325 | 0 | s->slice[4] = s7_01 ^ s3_01 ^ s4_123; |
326 | 0 | s->slice[5] = s4_01 ^ s5_123; |
327 | 0 | s->slice[6] = s5_01 ^ s6_123; |
328 | 0 | s->slice[7] = s6_01 ^ s7_123; |
329 | 0 | if (inv) { |
330 | | /* In the reverse direction, we further need to multiply by |
331 | | * {04}x^2 + {05}, which can be written as {04} * (x^2 + {01}) + {01}. |
332 | | * |
333 | | * First compute (x^2 + {01}) * s into the t?_02 variables: */ |
334 | 0 | uint16_t t0_02 = s->slice[0] ^ ROT(s->slice[0], 2); |
335 | 0 | uint16_t t1_02 = s->slice[1] ^ ROT(s->slice[1], 2); |
336 | 0 | uint16_t t2_02 = s->slice[2] ^ ROT(s->slice[2], 2); |
337 | 0 | uint16_t t3_02 = s->slice[3] ^ ROT(s->slice[3], 2); |
338 | 0 | uint16_t t4_02 = s->slice[4] ^ ROT(s->slice[4], 2); |
339 | 0 | uint16_t t5_02 = s->slice[5] ^ ROT(s->slice[5], 2); |
340 | 0 | uint16_t t6_02 = s->slice[6] ^ ROT(s->slice[6], 2); |
341 | 0 | uint16_t t7_02 = s->slice[7] ^ ROT(s->slice[7], 2); |
342 | | /* And then update s += {04} * t?_02 */ |
343 | 0 | s->slice[0] ^= t6_02; |
344 | 0 | s->slice[1] ^= t6_02 ^ t7_02; |
345 | 0 | s->slice[2] ^= t0_02 ^ t7_02; |
346 | 0 | s->slice[3] ^= t1_02 ^ t6_02; |
347 | 0 | s->slice[4] ^= t2_02 ^ t6_02 ^ t7_02; |
348 | 0 | s->slice[5] ^= t3_02 ^ t7_02; |
349 | 0 | s->slice[6] ^= t4_02; |
350 | 0 | s->slice[7] ^= t5_02; |
351 | 0 | } |
352 | 0 | } |
353 | | |
354 | 0 | static void AddRoundKey(AES_state* s, const AES_state* round) { |
355 | 0 | int b; |
356 | 0 | for (b = 0; b < 8; b++) { |
357 | 0 | s->slice[b] ^= round->slice[b]; |
358 | 0 | } |
359 | 0 | } |
360 | | |
361 | | /** column_0(s) = column_c(a) */ |
362 | 0 | static void GetOneColumn(AES_state* s, const AES_state* a, int c) { |
363 | 0 | int b; |
364 | 0 | for (b = 0; b < 8; b++) { |
365 | 0 | s->slice[b] = (a->slice[b] >> c) & 0x1111; |
366 | 0 | } |
367 | 0 | } |
368 | | |
369 | | /** column_c1(r) |= (column_0(s) ^= column_c2(a)) */ |
370 | 0 | static void KeySetupColumnMix(AES_state* s, AES_state* r, const AES_state* a, int c1, int c2) { |
371 | 0 | int b; |
372 | 0 | for (b = 0; b < 8; b++) { |
373 | 0 | r->slice[b] |= ((s->slice[b] ^= ((a->slice[b] >> c2) & 0x1111)) & 0x1111) << c1; |
374 | 0 | } |
375 | 0 | } |
376 | | |
377 | | /** Rotate the rows in s one position upwards, and xor in r */ |
378 | 0 | static void KeySetupTransform(AES_state* s, const AES_state* r) { |
379 | 0 | int b; |
380 | 0 | for (b = 0; b < 8; b++) { |
381 | 0 | s->slice[b] = ((s->slice[b] >> 4) | (s->slice[b] << 12)) ^ r->slice[b]; |
382 | 0 | } |
383 | 0 | } |
384 | | |
385 | | /* Multiply the cells in s by x, as polynomials over GF(2) mod x^8 + x^4 + x^3 + x + 1 */ |
386 | 0 | static void MultX(AES_state* s) { |
387 | 0 | uint16_t top = s->slice[7]; |
388 | 0 | s->slice[7] = s->slice[6]; |
389 | 0 | s->slice[6] = s->slice[5]; |
390 | 0 | s->slice[5] = s->slice[4]; |
391 | 0 | s->slice[4] = s->slice[3] ^ top; |
392 | 0 | s->slice[3] = s->slice[2] ^ top; |
393 | 0 | s->slice[2] = s->slice[1]; |
394 | 0 | s->slice[1] = s->slice[0] ^ top; |
395 | 0 | s->slice[0] = top; |
396 | 0 | } |
397 | | |
398 | | /** Expand the cipher key into the key schedule. |
399 | | * |
400 | | * state must be a pointer to an array of size nrounds + 1. |
401 | | * key must be a pointer to 4 * nkeywords bytes. |
402 | | * |
403 | | * AES128 uses nkeywords = 4, nrounds = 10 |
404 | | * AES192 uses nkeywords = 6, nrounds = 12 |
405 | | * AES256 uses nkeywords = 8, nrounds = 14 |
406 | | */ |
407 | | static void AES_setup(AES_state* rounds, const uint8_t* key, int nkeywords, int nrounds) |
408 | 0 | { |
409 | 0 | int i; |
410 | | |
411 | | /* The one-byte round constant */ |
412 | 0 | AES_state rcon = {{1,0,0,0,0,0,0,0}}; |
413 | | /* The number of the word being generated, modulo nkeywords */ |
414 | 0 | int pos = 0; |
415 | | /* The column representing the word currently being processed */ |
416 | 0 | AES_state column; |
417 | |
|
418 | 0 | for (i = 0; i < nrounds + 1; i++) { |
419 | 0 | int b; |
420 | 0 | for (b = 0; b < 8; b++) { |
421 | 0 | rounds[i].slice[b] = 0; |
422 | 0 | } |
423 | 0 | } |
424 | | |
425 | | /* The first nkeywords round columns are just taken from the key directly. */ |
426 | 0 | for (i = 0; i < nkeywords; i++) { |
427 | 0 | int r; |
428 | 0 | for (r = 0; r < 4; r++) { |
429 | 0 | LoadByte(&rounds[i >> 2], *(key++), r, i & 3); |
430 | 0 | } |
431 | 0 | } |
432 | |
|
433 | 0 | GetOneColumn(&column, &rounds[(nkeywords - 1) >> 2], (nkeywords - 1) & 3); |
434 | |
|
435 | 0 | for (i = nkeywords; i < 4 * (nrounds + 1); i++) { |
436 | | /* Transform column */ |
437 | 0 | if (pos == 0) { |
438 | 0 | SubBytes(&column, 0); |
439 | 0 | KeySetupTransform(&column, &rcon); |
440 | 0 | MultX(&rcon); |
441 | 0 | } else if (nkeywords > 6 && pos == 4) { |
442 | 0 | SubBytes(&column, 0); |
443 | 0 | } |
444 | 0 | if (++pos == nkeywords) pos = 0; |
445 | 0 | KeySetupColumnMix(&column, &rounds[i >> 2], &rounds[(i - nkeywords) >> 2], i & 3, (i - nkeywords) & 3); |
446 | 0 | } |
447 | 0 | } |
448 | | |
449 | 0 | static void AES_encrypt(const AES_state* rounds, int nrounds, unsigned char* cipher16, const unsigned char* plain16) { |
450 | 0 | AES_state s = {{0}}; |
451 | 0 | int round; |
452 | |
|
453 | 0 | LoadBytes(&s, plain16); |
454 | 0 | AddRoundKey(&s, rounds++); |
455 | |
|
456 | 0 | for (round = 1; round < nrounds; round++) { |
457 | 0 | SubBytes(&s, 0); |
458 | 0 | ShiftRows(&s); |
459 | 0 | MixColumns(&s, 0); |
460 | 0 | AddRoundKey(&s, rounds++); |
461 | 0 | } |
462 | |
|
463 | 0 | SubBytes(&s, 0); |
464 | 0 | ShiftRows(&s); |
465 | 0 | AddRoundKey(&s, rounds); |
466 | |
|
467 | 0 | SaveBytes(cipher16, &s); |
468 | 0 | } |
469 | | |
470 | 0 | static void AES_decrypt(const AES_state* rounds, int nrounds, unsigned char* plain16, const unsigned char* cipher16) { |
471 | | /* Most AES decryption implementations use the alternate scheme |
472 | | * (the Equivalent Inverse Cipher), which allows for more code reuse between |
473 | | * the encryption and decryption code, but requires separate setup for both. |
474 | | */ |
475 | 0 | AES_state s = {{0}}; |
476 | 0 | int round; |
477 | |
|
478 | 0 | rounds += nrounds; |
479 | |
|
480 | 0 | LoadBytes(&s, cipher16); |
481 | 0 | AddRoundKey(&s, rounds--); |
482 | |
|
483 | 0 | for (round = 1; round < nrounds; round++) { |
484 | 0 | InvShiftRows(&s); |
485 | 0 | SubBytes(&s, 1); |
486 | 0 | AddRoundKey(&s, rounds--); |
487 | 0 | MixColumns(&s, 1); |
488 | 0 | } |
489 | |
|
490 | 0 | InvShiftRows(&s); |
491 | 0 | SubBytes(&s, 1); |
492 | 0 | AddRoundKey(&s, rounds); |
493 | |
|
494 | 0 | SaveBytes(plain16, &s); |
495 | 0 | } |
496 | | |
497 | 0 | void AES128_init(AES128_ctx* ctx, const unsigned char* key16) { |
498 | 0 | AES_setup(ctx->rk, key16, 4, 10); |
499 | 0 | } |
500 | | |
501 | 0 | void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) { |
502 | 0 | while (blocks--) { |
503 | 0 | AES_encrypt(ctx->rk, 10, cipher16, plain16); |
504 | 0 | cipher16 += 16; |
505 | 0 | plain16 += 16; |
506 | 0 | } |
507 | 0 | } |
508 | | |
509 | 0 | void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) { |
510 | 0 | while (blocks--) { |
511 | 0 | AES_decrypt(ctx->rk, 10, plain16, cipher16); |
512 | 0 | cipher16 += 16; |
513 | 0 | plain16 += 16; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | 0 | void AES192_init(AES192_ctx* ctx, const unsigned char* key24) { |
518 | 0 | AES_setup(ctx->rk, key24, 6, 12); |
519 | 0 | } |
520 | | |
521 | 0 | void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) { |
522 | 0 | while (blocks--) { |
523 | 0 | AES_encrypt(ctx->rk, 12, cipher16, plain16); |
524 | 0 | cipher16 += 16; |
525 | 0 | plain16 += 16; |
526 | 0 | } |
527 | |
|
528 | 0 | } |
529 | | |
530 | 0 | void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) { |
531 | 0 | while (blocks--) { |
532 | 0 | AES_decrypt(ctx->rk, 12, plain16, cipher16); |
533 | 0 | cipher16 += 16; |
534 | 0 | plain16 += 16; |
535 | 0 | } |
536 | 0 | } |
537 | | |
538 | 0 | void AES256_init(AES256_ctx* ctx, const unsigned char* key32) { |
539 | 0 | AES_setup(ctx->rk, key32, 8, 14); |
540 | 0 | } |
541 | | |
542 | 0 | void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) { |
543 | 0 | while (blocks--) { |
544 | 0 | AES_encrypt(ctx->rk, 14, cipher16, plain16); |
545 | 0 | cipher16 += 16; |
546 | 0 | plain16 += 16; |
547 | 0 | } |
548 | 0 | } |
549 | | |
550 | 0 | void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) { |
551 | 0 | while (blocks--) { |
552 | 0 | AES_decrypt(ctx->rk, 14, plain16, cipher16); |
553 | 0 | cipher16 += 16; |
554 | 0 | plain16 += 16; |
555 | 0 | } |
556 | 0 | } |