/root/bitcoin/src/crypto/ctaes/ctaes.c
| Line | Count | Source | 
| 1 |  |  /********************************************************************* | 
| 2 |  |  * Copyright (c) 2016 Pieter Wuille                                   * | 
| 3 |  |  * Distributed under the MIT software license, see the accompanying   * | 
| 4 |  |  * file COPYING or http://www.opensource.org/licenses/mit-license.php.* | 
| 5 |  |  **********************************************************************/ | 
| 6 |  |  | 
| 7 |  | /* Constant time, unoptimized, concise, plain C, AES implementation | 
| 8 |  |  * Based On: | 
| 9 |  |  *   Emilia Kasper and Peter Schwabe, Faster and Timing-Attack Resistant AES-GCM | 
| 10 |  |  *   http://www.iacr.org/archive/ches2009/57470001/57470001.pdf | 
| 11 |  |  * But using 8 16-bit integers representing a single AES state rather than 8 128-bit | 
| 12 |  |  * integers representing 8 AES states. | 
| 13 |  |  */ | 
| 14 |  |  | 
| 15 |  | #include "ctaes.h" | 
| 16 |  |  | 
| 17 |  | /* Slice variable slice_i contains the i'th bit of the 16 state variables in this order: | 
| 18 |  |  *  0  1  2  3 | 
| 19 |  |  *  4  5  6  7 | 
| 20 |  |  *  8  9 10 11 | 
| 21 |  |  * 12 13 14 15 | 
| 22 |  |  */ | 
| 23 |  |  | 
| 24 |  | /** Convert a byte to sliced form, storing it corresponding to given row and column in s */ | 
| 25 | 0 | static void LoadByte(AES_state* s, unsigned char byte, int r, int c) { | 
| 26 | 0 |     int i; | 
| 27 | 0 |     for (i = 0; i < 8; i++) { | 
| 28 | 0 |         s->slice[i] |= (byte & 1) << (r * 4 + c); | 
| 29 | 0 |         byte >>= 1; | 
| 30 | 0 |     } | 
| 31 | 0 | } | 
| 32 |  |  | 
| 33 |  | /** Load 16 bytes of data into 8 sliced integers */ | 
| 34 | 0 | static void LoadBytes(AES_state *s, const unsigned char* data16) { | 
| 35 | 0 |     int c; | 
| 36 | 0 |     for (c = 0; c < 4; c++) { | 
| 37 | 0 |         int r; | 
| 38 | 0 |         for (r = 0; r < 4; r++) { | 
| 39 | 0 |             LoadByte(s, *(data16++), r, c); | 
| 40 | 0 |         } | 
| 41 | 0 |     } | 
| 42 | 0 | } | 
| 43 |  |  | 
| 44 |  | /** Convert 8 sliced integers into 16 bytes of data */ | 
| 45 | 0 | static void SaveBytes(unsigned char* data16, const AES_state *s) { | 
| 46 | 0 |     int c; | 
| 47 | 0 |     for (c = 0; c < 4; c++) { | 
| 48 | 0 |         int r; | 
| 49 | 0 |         for (r = 0; r < 4; r++) { | 
| 50 | 0 |             int b; | 
| 51 | 0 |             uint8_t v = 0; | 
| 52 | 0 |             for (b = 0; b < 8; b++) { | 
| 53 | 0 |                 v |= ((s->slice[b] >> (r * 4 + c)) & 1) << b; | 
| 54 | 0 |             } | 
| 55 | 0 |             *(data16++) = v; | 
| 56 | 0 |         } | 
| 57 | 0 |     } | 
| 58 | 0 | } | 
| 59 |  |  | 
| 60 |  | /* S-box implementation based on the gate logic from: | 
| 61 |  |  *   Joan Boyar and Rene Peralta, A depth-16 circuit for the AES S-box. | 
| 62 |  |  *   https://eprint.iacr.org/2011/332.pdf | 
| 63 |  | */ | 
| 64 | 0 | static void SubBytes(AES_state *s, int inv) { | 
| 65 |  |     /* Load the bit slices */ | 
| 66 | 0 |     uint16_t U0 = s->slice[7], U1 = s->slice[6], U2 = s->slice[5], U3 = s->slice[4]; | 
| 67 | 0 |     uint16_t U4 = s->slice[3], U5 = s->slice[2], U6 = s->slice[1], U7 = s->slice[0]; | 
| 68 |  | 
 | 
| 69 | 0 |     uint16_t T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16; | 
| 70 | 0 |     uint16_t T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, D; | 
| 71 | 0 |     uint16_t M1, M6, M11, M13, M15, M20, M21, M22, M23, M25, M37, M38, M39, M40; | 
| 72 | 0 |     uint16_t M41, M42, M43, M44, M45, M46, M47, M48, M49, M50, M51, M52, M53, M54; | 
| 73 | 0 |     uint16_t M55, M56, M57, M58, M59, M60, M61, M62, M63; | 
| 74 |  | 
 | 
| 75 | 0 |     if (inv) { | 
| 76 | 0 |         uint16_t R5, R13, R17, R18, R19; | 
| 77 |  |         /* Undo linear postprocessing */ | 
| 78 | 0 |         T23 = U0 ^ U3; | 
| 79 | 0 |         T22 = ~(U1 ^ U3); | 
| 80 | 0 |         T2 = ~(U0 ^ U1); | 
| 81 | 0 |         T1 = U3 ^ U4; | 
| 82 | 0 |         T24 = ~(U4 ^ U7); | 
| 83 | 0 |         R5 = U6 ^ U7; | 
| 84 | 0 |         T8 = ~(U1 ^ T23); | 
| 85 | 0 |         T19 = T22 ^ R5; | 
| 86 | 0 |         T9 = ~(U7 ^ T1); | 
| 87 | 0 |         T10 = T2 ^ T24; | 
| 88 | 0 |         T13 = T2 ^ R5; | 
| 89 | 0 |         T3 = T1 ^ R5; | 
| 90 | 0 |         T25 = ~(U2 ^ T1); | 
| 91 | 0 |         R13 = U1 ^ U6; | 
| 92 | 0 |         T17 = ~(U2 ^ T19); | 
| 93 | 0 |         T20 = T24 ^ R13; | 
| 94 | 0 |         T4 = U4 ^ T8; | 
| 95 | 0 |         R17 = ~(U2 ^ U5); | 
| 96 | 0 |         R18 = ~(U5 ^ U6); | 
| 97 | 0 |         R19 = ~(U2 ^ U4); | 
| 98 | 0 |         D = U0 ^ R17; | 
| 99 | 0 |         T6 = T22 ^ R17; | 
| 100 | 0 |         T16 = R13 ^ R19; | 
| 101 | 0 |         T27 = T1 ^ R18; | 
| 102 | 0 |         T15 = T10 ^ T27; | 
| 103 | 0 |         T14 = T10 ^ R18; | 
| 104 | 0 |         T26 = T3 ^ T16; | 
| 105 | 0 |     } else { | 
| 106 |  |         /* Linear preprocessing. */ | 
| 107 | 0 |         T1 = U0 ^ U3; | 
| 108 | 0 |         T2 = U0 ^ U5; | 
| 109 | 0 |         T3 = U0 ^ U6; | 
| 110 | 0 |         T4 = U3 ^ U5; | 
| 111 | 0 |         T5 = U4 ^ U6; | 
| 112 | 0 |         T6 = T1 ^ T5; | 
| 113 | 0 |         T7 = U1 ^ U2; | 
| 114 | 0 |         T8 = U7 ^ T6; | 
| 115 | 0 |         T9 = U7 ^ T7; | 
| 116 | 0 |         T10 = T6 ^ T7; | 
| 117 | 0 |         T11 = U1 ^ U5; | 
| 118 | 0 |         T12 = U2 ^ U5; | 
| 119 | 0 |         T13 = T3 ^ T4; | 
| 120 | 0 |         T14 = T6 ^ T11; | 
| 121 | 0 |         T15 = T5 ^ T11; | 
| 122 | 0 |         T16 = T5 ^ T12; | 
| 123 | 0 |         T17 = T9 ^ T16; | 
| 124 | 0 |         T18 = U3 ^ U7; | 
| 125 | 0 |         T19 = T7 ^ T18; | 
| 126 | 0 |         T20 = T1 ^ T19; | 
| 127 | 0 |         T21 = U6 ^ U7; | 
| 128 | 0 |         T22 = T7 ^ T21; | 
| 129 | 0 |         T23 = T2 ^ T22; | 
| 130 | 0 |         T24 = T2 ^ T10; | 
| 131 | 0 |         T25 = T20 ^ T17; | 
| 132 | 0 |         T26 = T3 ^ T16; | 
| 133 | 0 |         T27 = T1 ^ T12; | 
| 134 | 0 |         D = U7; | 
| 135 | 0 |     } | 
| 136 |  |  | 
| 137 |  |     /* Non-linear transformation (shared between the forward and backward case) */ | 
| 138 | 0 |     M1 = T13 & T6; | 
| 139 | 0 |     M6 = T3 & T16; | 
| 140 | 0 |     M11 = T1 & T15; | 
| 141 | 0 |     M13 = (T4 & T27) ^ M11; | 
| 142 | 0 |     M15 = (T2 & T10) ^ M11; | 
| 143 | 0 |     M20 = T14 ^ M1 ^ (T23 & T8) ^ M13; | 
| 144 | 0 |     M21 = (T19 & D) ^ M1 ^ T24 ^ M15; | 
| 145 | 0 |     M22 = T26 ^ M6 ^ (T22 & T9) ^ M13; | 
| 146 | 0 |     M23 = (T20 & T17) ^ M6 ^ M15 ^ T25; | 
| 147 | 0 |     M25 = M22 & M20; | 
| 148 | 0 |     M37 = M21 ^ ((M20 ^ M21) & (M23 ^ M25)); | 
| 149 | 0 |     M38 = M20 ^ M25 ^ (M21 | (M20 & M23)); | 
| 150 | 0 |     M39 = M23 ^ ((M22 ^ M23) & (M21 ^ M25)); | 
| 151 | 0 |     M40 = M22 ^ M25 ^ (M23 | (M21 & M22)); | 
| 152 | 0 |     M41 = M38 ^ M40; | 
| 153 | 0 |     M42 = M37 ^ M39; | 
| 154 | 0 |     M43 = M37 ^ M38; | 
| 155 | 0 |     M44 = M39 ^ M40; | 
| 156 | 0 |     M45 = M42 ^ M41; | 
| 157 | 0 |     M46 = M44 & T6; | 
| 158 | 0 |     M47 = M40 & T8; | 
| 159 | 0 |     M48 = M39 & D; | 
| 160 | 0 |     M49 = M43 & T16; | 
| 161 | 0 |     M50 = M38 & T9; | 
| 162 | 0 |     M51 = M37 & T17; | 
| 163 | 0 |     M52 = M42 & T15; | 
| 164 | 0 |     M53 = M45 & T27; | 
| 165 | 0 |     M54 = M41 & T10; | 
| 166 | 0 |     M55 = M44 & T13; | 
| 167 | 0 |     M56 = M40 & T23; | 
| 168 | 0 |     M57 = M39 & T19; | 
| 169 | 0 |     M58 = M43 & T3; | 
| 170 | 0 |     M59 = M38 & T22; | 
| 171 | 0 |     M60 = M37 & T20; | 
| 172 | 0 |     M61 = M42 & T1; | 
| 173 | 0 |     M62 = M45 & T4; | 
| 174 | 0 |     M63 = M41 & T2; | 
| 175 |  | 
 | 
| 176 | 0 |     if (inv){ | 
| 177 |  |         /* Undo linear preprocessing */ | 
| 178 | 0 |         uint16_t P0 = M52 ^ M61; | 
| 179 | 0 |         uint16_t P1 = M58 ^ M59; | 
| 180 | 0 |         uint16_t P2 = M54 ^ M62; | 
| 181 | 0 |         uint16_t P3 = M47 ^ M50; | 
| 182 | 0 |         uint16_t P4 = M48 ^ M56; | 
| 183 | 0 |         uint16_t P5 = M46 ^ M51; | 
| 184 | 0 |         uint16_t P6 = M49 ^ M60; | 
| 185 | 0 |         uint16_t P7 = P0 ^ P1; | 
| 186 | 0 |         uint16_t P8 = M50 ^ M53; | 
| 187 | 0 |         uint16_t P9 = M55 ^ M63; | 
| 188 | 0 |         uint16_t P10 = M57 ^ P4; | 
| 189 | 0 |         uint16_t P11 = P0 ^ P3; | 
| 190 | 0 |         uint16_t P12 = M46 ^ M48; | 
| 191 | 0 |         uint16_t P13 = M49 ^ M51; | 
| 192 | 0 |         uint16_t P14 = M49 ^ M62; | 
| 193 | 0 |         uint16_t P15 = M54 ^ M59; | 
| 194 | 0 |         uint16_t P16 = M57 ^ M61; | 
| 195 | 0 |         uint16_t P17 = M58 ^ P2; | 
| 196 | 0 |         uint16_t P18 = M63 ^ P5; | 
| 197 | 0 |         uint16_t P19 = P2 ^ P3; | 
| 198 | 0 |         uint16_t P20 = P4 ^ P6; | 
| 199 | 0 |         uint16_t P22 = P2 ^ P7; | 
| 200 | 0 |         uint16_t P23 = P7 ^ P8; | 
| 201 | 0 |         uint16_t P24 = P5 ^ P7; | 
| 202 | 0 |         uint16_t P25 = P6 ^ P10; | 
| 203 | 0 |         uint16_t P26 = P9 ^ P11; | 
| 204 | 0 |         uint16_t P27 = P10 ^ P18; | 
| 205 | 0 |         uint16_t P28 = P11 ^ P25; | 
| 206 | 0 |         uint16_t P29 = P15 ^ P20; | 
| 207 | 0 |         s->slice[7] = P13 ^ P22; | 
| 208 | 0 |         s->slice[6] = P26 ^ P29; | 
| 209 | 0 |         s->slice[5] = P17 ^ P28; | 
| 210 | 0 |         s->slice[4] = P12 ^ P22; | 
| 211 | 0 |         s->slice[3] = P23 ^ P27; | 
| 212 | 0 |         s->slice[2] = P19 ^ P24; | 
| 213 | 0 |         s->slice[1] = P14 ^ P23; | 
| 214 | 0 |         s->slice[0] = P9 ^ P16; | 
| 215 | 0 |     } else { | 
| 216 |  |         /* Linear postprocessing */ | 
| 217 | 0 |         uint16_t L0 = M61 ^ M62; | 
| 218 | 0 |         uint16_t L1 = M50 ^ M56; | 
| 219 | 0 |         uint16_t L2 = M46 ^ M48; | 
| 220 | 0 |         uint16_t L3 = M47 ^ M55; | 
| 221 | 0 |         uint16_t L4 = M54 ^ M58; | 
| 222 | 0 |         uint16_t L5 = M49 ^ M61; | 
| 223 | 0 |         uint16_t L6 = M62 ^ L5; | 
| 224 | 0 |         uint16_t L7 = M46 ^ L3; | 
| 225 | 0 |         uint16_t L8 = M51 ^ M59; | 
| 226 | 0 |         uint16_t L9 = M52 ^ M53; | 
| 227 | 0 |         uint16_t L10 = M53 ^ L4; | 
| 228 | 0 |         uint16_t L11 = M60 ^ L2; | 
| 229 | 0 |         uint16_t L12 = M48 ^ M51; | 
| 230 | 0 |         uint16_t L13 = M50 ^ L0; | 
| 231 | 0 |         uint16_t L14 = M52 ^ M61; | 
| 232 | 0 |         uint16_t L15 = M55 ^ L1; | 
| 233 | 0 |         uint16_t L16 = M56 ^ L0; | 
| 234 | 0 |         uint16_t L17 = M57 ^ L1; | 
| 235 | 0 |         uint16_t L18 = M58 ^ L8; | 
| 236 | 0 |         uint16_t L19 = M63 ^ L4; | 
| 237 | 0 |         uint16_t L20 = L0 ^ L1; | 
| 238 | 0 |         uint16_t L21 = L1 ^ L7; | 
| 239 | 0 |         uint16_t L22 = L3 ^ L12; | 
| 240 | 0 |         uint16_t L23 = L18 ^ L2; | 
| 241 | 0 |         uint16_t L24 = L15 ^ L9; | 
| 242 | 0 |         uint16_t L25 = L6 ^ L10; | 
| 243 | 0 |         uint16_t L26 = L7 ^ L9; | 
| 244 | 0 |         uint16_t L27 = L8 ^ L10; | 
| 245 | 0 |         uint16_t L28 = L11 ^ L14; | 
| 246 | 0 |         uint16_t L29 = L11 ^ L17; | 
| 247 | 0 |         s->slice[7] = L6 ^ L24; | 
| 248 | 0 |         s->slice[6] = ~(L16 ^ L26); | 
| 249 | 0 |         s->slice[5] = ~(L19 ^ L28); | 
| 250 | 0 |         s->slice[4] = L6 ^ L21; | 
| 251 | 0 |         s->slice[3] = L20 ^ L22; | 
| 252 | 0 |         s->slice[2] = L25 ^ L29; | 
| 253 | 0 |         s->slice[1] = ~(L13 ^ L27); | 
| 254 | 0 |         s->slice[0] = ~(L6 ^ L23); | 
| 255 | 0 |     } | 
| 256 | 0 | } | 
| 257 |  |  | 
| 258 | 0 | #define BIT_RANGE(from,to) (((1 << ((to) - (from))) - 1) << (from)) | 
| 259 |  |  | 
| 260 | 0 | #define BIT_RANGE_LEFT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) << (shift)) | 
| 261 | 0 | #define BIT_RANGE_RIGHT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) >> (shift)) | 
| 262 |  |  | 
| 263 | 0 | static void ShiftRows(AES_state* s) { | 
| 264 | 0 |     int i; | 
| 265 | 0 |     for (i = 0; i < 8; i++) { | 
| 266 | 0 |         uint16_t v = s->slice[i]; | 
| 267 | 0 |         s->slice[i] = | 
| 268 | 0 |             (v & BIT_RANGE(0, 4)) | | 
| 269 | 0 |             BIT_RANGE_LEFT(v, 4, 5, 3) | BIT_RANGE_RIGHT(v, 5, 8, 1) | | 
| 270 | 0 |             BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) | | 
| 271 | 0 |             BIT_RANGE_LEFT(v, 12, 15, 1) | BIT_RANGE_RIGHT(v, 15, 16, 3); | 
| 272 | 0 |     } | 
| 273 | 0 | } | 
| 274 |  |  | 
| 275 | 0 | static void InvShiftRows(AES_state* s) { | 
| 276 | 0 |     int i; | 
| 277 | 0 |     for (i = 0; i < 8; i++) { | 
| 278 | 0 |         uint16_t v = s->slice[i]; | 
| 279 | 0 |         s->slice[i] = | 
| 280 | 0 |             (v & BIT_RANGE(0, 4)) | | 
| 281 | 0 |             BIT_RANGE_LEFT(v, 4, 7, 1) | BIT_RANGE_RIGHT(v, 7, 8, 3) | | 
| 282 | 0 |             BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) | | 
| 283 | 0 |             BIT_RANGE_LEFT(v, 12, 13, 3) | BIT_RANGE_RIGHT(v, 13, 16, 1); | 
| 284 | 0 |     } | 
| 285 | 0 | } | 
| 286 |  |  | 
| 287 | 0 | #define ROT(x,b) (((x) >> ((b) * 4)) | ((x) << ((4-(b)) * 4))) | 
| 288 |  |  | 
| 289 | 0 | static void MixColumns(AES_state* s, int inv) { | 
| 290 |  |     /* The MixColumns transform treats the bytes of the columns of the state as | 
| 291 |  |      * coefficients of a 3rd degree polynomial over GF(2^8) and multiplies them | 
| 292 |  |      * by the fixed polynomial a(x) = {03}x^3 + {01}x^2 + {01}x + {02}, modulo | 
| 293 |  |      * x^4 + {01}. | 
| 294 |  |      * | 
| 295 |  |      * In the inverse transform, we multiply by the inverse of a(x), | 
| 296 |  |      * a^-1(x) = {0b}x^3 + {0d}x^2 + {09}x + {0e}. This is equal to | 
| 297 |  |      * a(x) * ({04}x^2 + {05}), so we can reuse the forward transform's code | 
| 298 |  |      * (found in OpenSSL's bsaes-x86_64.pl, attributed to Jussi Kivilinna) | 
| 299 |  |      * | 
| 300 |  |      * In the bitsliced representation, a multiplication of every column by x | 
| 301 |  |      * mod x^4 + 1 is simply a right rotation. | 
| 302 |  |      */ | 
| 303 |  |  | 
| 304 |  |     /* Shared for both directions is a multiplication by a(x), which can be | 
| 305 |  |      * rewritten as (x^3 + x^2 + x) + {02}*(x^3 + {01}). | 
| 306 |  |      * | 
| 307 |  |      * First compute s into the s? variables, (x^3 + {01}) * s into the s?_01 | 
| 308 |  |      * variables and (x^3 + x^2 + x)*s into the s?_123 variables. | 
| 309 |  |      */ | 
| 310 | 0 |     uint16_t s0 = s->slice[0], s1 = s->slice[1], s2 = s->slice[2], s3 = s->slice[3]; | 
| 311 | 0 |     uint16_t s4 = s->slice[4], s5 = s->slice[5], s6 = s->slice[6], s7 = s->slice[7]; | 
| 312 | 0 |     uint16_t s0_01 = s0 ^ ROT(s0, 1), s0_123 = ROT(s0_01, 1) ^ ROT(s0, 3); | 
| 313 | 0 |     uint16_t s1_01 = s1 ^ ROT(s1, 1), s1_123 = ROT(s1_01, 1) ^ ROT(s1, 3); | 
| 314 | 0 |     uint16_t s2_01 = s2 ^ ROT(s2, 1), s2_123 = ROT(s2_01, 1) ^ ROT(s2, 3); | 
| 315 | 0 |     uint16_t s3_01 = s3 ^ ROT(s3, 1), s3_123 = ROT(s3_01, 1) ^ ROT(s3, 3); | 
| 316 | 0 |     uint16_t s4_01 = s4 ^ ROT(s4, 1), s4_123 = ROT(s4_01, 1) ^ ROT(s4, 3); | 
| 317 | 0 |     uint16_t s5_01 = s5 ^ ROT(s5, 1), s5_123 = ROT(s5_01, 1) ^ ROT(s5, 3); | 
| 318 | 0 |     uint16_t s6_01 = s6 ^ ROT(s6, 1), s6_123 = ROT(s6_01, 1) ^ ROT(s6, 3); | 
| 319 | 0 |     uint16_t s7_01 = s7 ^ ROT(s7, 1), s7_123 = ROT(s7_01, 1) ^ ROT(s7, 3); | 
| 320 |  |     /* Now compute s = s?_123 + {02} * s?_01. */ | 
| 321 | 0 |     s->slice[0] = s7_01 ^ s0_123; | 
| 322 | 0 |     s->slice[1] = s7_01 ^ s0_01 ^ s1_123; | 
| 323 | 0 |     s->slice[2] = s1_01 ^ s2_123; | 
| 324 | 0 |     s->slice[3] = s7_01 ^ s2_01 ^ s3_123; | 
| 325 | 0 |     s->slice[4] = s7_01 ^ s3_01 ^ s4_123; | 
| 326 | 0 |     s->slice[5] = s4_01 ^ s5_123; | 
| 327 | 0 |     s->slice[6] = s5_01 ^ s6_123; | 
| 328 | 0 |     s->slice[7] = s6_01 ^ s7_123; | 
| 329 | 0 |     if (inv) { | 
| 330 |  |         /* In the reverse direction, we further need to multiply by | 
| 331 |  |          * {04}x^2 + {05}, which can be written as {04} * (x^2 + {01}) + {01}. | 
| 332 |  |          * | 
| 333 |  |          * First compute (x^2 + {01}) * s into the t?_02 variables: */ | 
| 334 | 0 |         uint16_t t0_02 = s->slice[0] ^ ROT(s->slice[0], 2); | 
| 335 | 0 |         uint16_t t1_02 = s->slice[1] ^ ROT(s->slice[1], 2); | 
| 336 | 0 |         uint16_t t2_02 = s->slice[2] ^ ROT(s->slice[2], 2); | 
| 337 | 0 |         uint16_t t3_02 = s->slice[3] ^ ROT(s->slice[3], 2); | 
| 338 | 0 |         uint16_t t4_02 = s->slice[4] ^ ROT(s->slice[4], 2); | 
| 339 | 0 |         uint16_t t5_02 = s->slice[5] ^ ROT(s->slice[5], 2); | 
| 340 | 0 |         uint16_t t6_02 = s->slice[6] ^ ROT(s->slice[6], 2); | 
| 341 | 0 |         uint16_t t7_02 = s->slice[7] ^ ROT(s->slice[7], 2); | 
| 342 |  |         /* And then update s += {04} * t?_02 */ | 
| 343 | 0 |         s->slice[0] ^= t6_02; | 
| 344 | 0 |         s->slice[1] ^= t6_02 ^ t7_02; | 
| 345 | 0 |         s->slice[2] ^= t0_02 ^ t7_02; | 
| 346 | 0 |         s->slice[3] ^= t1_02 ^ t6_02; | 
| 347 | 0 |         s->slice[4] ^= t2_02 ^ t6_02 ^ t7_02; | 
| 348 | 0 |         s->slice[5] ^= t3_02 ^ t7_02; | 
| 349 | 0 |         s->slice[6] ^= t4_02; | 
| 350 | 0 |         s->slice[7] ^= t5_02; | 
| 351 | 0 |     } | 
| 352 | 0 | } | 
| 353 |  |  | 
| 354 | 0 | static void AddRoundKey(AES_state* s, const AES_state* round) { | 
| 355 | 0 |     int b; | 
| 356 | 0 |     for (b = 0; b < 8; b++) { | 
| 357 | 0 |         s->slice[b] ^= round->slice[b]; | 
| 358 | 0 |     } | 
| 359 | 0 | } | 
| 360 |  |  | 
| 361 |  | /** column_0(s) = column_c(a) */ | 
| 362 | 0 | static void GetOneColumn(AES_state* s, const AES_state* a, int c) { | 
| 363 | 0 |     int b; | 
| 364 | 0 |     for (b = 0; b < 8; b++) { | 
| 365 | 0 |         s->slice[b] = (a->slice[b] >> c) & 0x1111; | 
| 366 | 0 |     } | 
| 367 | 0 | } | 
| 368 |  |  | 
| 369 |  | /** column_c1(r) |= (column_0(s) ^= column_c2(a)) */ | 
| 370 | 0 | static void KeySetupColumnMix(AES_state* s, AES_state* r, const AES_state* a, int c1, int c2) { | 
| 371 | 0 |     int b; | 
| 372 | 0 |     for (b = 0; b < 8; b++) { | 
| 373 | 0 |         r->slice[b] |= ((s->slice[b] ^= ((a->slice[b] >> c2) & 0x1111)) & 0x1111) << c1; | 
| 374 | 0 |     } | 
| 375 | 0 | } | 
| 376 |  |  | 
| 377 |  | /** Rotate the rows in s one position upwards, and xor in r */ | 
| 378 | 0 | static void KeySetupTransform(AES_state* s, const AES_state* r) { | 
| 379 | 0 |     int b; | 
| 380 | 0 |     for (b = 0; b < 8; b++) { | 
| 381 | 0 |         s->slice[b] = ((s->slice[b] >> 4) | (s->slice[b] << 12)) ^ r->slice[b]; | 
| 382 | 0 |     } | 
| 383 | 0 | } | 
| 384 |  |  | 
| 385 |  | /* Multiply the cells in s by x, as polynomials over GF(2) mod x^8 + x^4 + x^3 + x + 1 */ | 
| 386 | 0 | static void MultX(AES_state* s) { | 
| 387 | 0 |     uint16_t top = s->slice[7]; | 
| 388 | 0 |     s->slice[7] = s->slice[6]; | 
| 389 | 0 |     s->slice[6] = s->slice[5]; | 
| 390 | 0 |     s->slice[5] = s->slice[4]; | 
| 391 | 0 |     s->slice[4] = s->slice[3] ^ top; | 
| 392 | 0 |     s->slice[3] = s->slice[2] ^ top; | 
| 393 | 0 |     s->slice[2] = s->slice[1]; | 
| 394 | 0 |     s->slice[1] = s->slice[0] ^ top; | 
| 395 | 0 |     s->slice[0] = top; | 
| 396 | 0 | } | 
| 397 |  |  | 
| 398 |  | /** Expand the cipher key into the key schedule. | 
| 399 |  |  * | 
| 400 |  |  *  state must be a pointer to an array of size nrounds + 1. | 
| 401 |  |  *  key must be a pointer to 4 * nkeywords bytes. | 
| 402 |  |  * | 
| 403 |  |  *  AES128 uses nkeywords = 4, nrounds = 10 | 
| 404 |  |  *  AES192 uses nkeywords = 6, nrounds = 12 | 
| 405 |  |  *  AES256 uses nkeywords = 8, nrounds = 14 | 
| 406 |  |  */ | 
| 407 |  | static void AES_setup(AES_state* rounds, const uint8_t* key, int nkeywords, int nrounds) | 
| 408 | 0 | { | 
| 409 | 0 |     int i; | 
| 410 |  |  | 
| 411 |  |     /* The one-byte round constant */ | 
| 412 | 0 |     AES_state rcon = {{1,0,0,0,0,0,0,0}}; | 
| 413 |  |     /* The number of the word being generated, modulo nkeywords */ | 
| 414 | 0 |     int pos = 0; | 
| 415 |  |     /* The column representing the word currently being processed */ | 
| 416 | 0 |     AES_state column; | 
| 417 |  | 
 | 
| 418 | 0 |     for (i = 0; i < nrounds + 1; i++) { | 
| 419 | 0 |         int b; | 
| 420 | 0 |         for (b = 0; b < 8; b++) { | 
| 421 | 0 |             rounds[i].slice[b] = 0; | 
| 422 | 0 |         } | 
| 423 | 0 |     } | 
| 424 |  |  | 
| 425 |  |     /* The first nkeywords round columns are just taken from the key directly. */ | 
| 426 | 0 |     for (i = 0; i < nkeywords; i++) { | 
| 427 | 0 |         int r; | 
| 428 | 0 |         for (r = 0; r < 4; r++) { | 
| 429 | 0 |             LoadByte(&rounds[i >> 2], *(key++), r, i & 3); | 
| 430 | 0 |         } | 
| 431 | 0 |     } | 
| 432 |  | 
 | 
| 433 | 0 |     GetOneColumn(&column, &rounds[(nkeywords - 1) >> 2], (nkeywords - 1) & 3); | 
| 434 |  | 
 | 
| 435 | 0 |     for (i = nkeywords; i < 4 * (nrounds + 1); i++) { | 
| 436 |  |         /* Transform column */ | 
| 437 | 0 |         if (pos == 0) { | 
| 438 | 0 |             SubBytes(&column, 0); | 
| 439 | 0 |             KeySetupTransform(&column, &rcon); | 
| 440 | 0 |             MultX(&rcon); | 
| 441 | 0 |         } else if (nkeywords > 6 && pos == 4) { | 
| 442 | 0 |             SubBytes(&column, 0); | 
| 443 | 0 |         } | 
| 444 | 0 |         if (++pos == nkeywords) pos = 0; | 
| 445 | 0 |         KeySetupColumnMix(&column, &rounds[i >> 2], &rounds[(i - nkeywords) >> 2], i & 3, (i - nkeywords) & 3); | 
| 446 | 0 |     } | 
| 447 | 0 | } | 
| 448 |  |  | 
| 449 | 0 | static void AES_encrypt(const AES_state* rounds, int nrounds, unsigned char* cipher16, const unsigned char* plain16) { | 
| 450 | 0 |     AES_state s = {{0}}; | 
| 451 | 0 |     int round; | 
| 452 |  | 
 | 
| 453 | 0 |     LoadBytes(&s, plain16); | 
| 454 | 0 |     AddRoundKey(&s, rounds++); | 
| 455 |  | 
 | 
| 456 | 0 |     for (round = 1; round < nrounds; round++) { | 
| 457 | 0 |         SubBytes(&s, 0); | 
| 458 | 0 |         ShiftRows(&s); | 
| 459 | 0 |         MixColumns(&s, 0); | 
| 460 | 0 |         AddRoundKey(&s, rounds++); | 
| 461 | 0 |     } | 
| 462 |  | 
 | 
| 463 | 0 |     SubBytes(&s, 0); | 
| 464 | 0 |     ShiftRows(&s); | 
| 465 | 0 |     AddRoundKey(&s, rounds); | 
| 466 |  | 
 | 
| 467 | 0 |     SaveBytes(cipher16, &s); | 
| 468 | 0 | } | 
| 469 |  |  | 
| 470 | 0 | static void AES_decrypt(const AES_state* rounds, int nrounds, unsigned char* plain16, const unsigned char* cipher16) { | 
| 471 |  |     /* Most AES decryption implementations use the alternate scheme | 
| 472 |  |      * (the Equivalent Inverse Cipher), which allows for more code reuse between | 
| 473 |  |      * the encryption and decryption code, but requires separate setup for both. | 
| 474 |  |      */ | 
| 475 | 0 |     AES_state s = {{0}}; | 
| 476 | 0 |     int round; | 
| 477 |  | 
 | 
| 478 | 0 |     rounds += nrounds; | 
| 479 |  | 
 | 
| 480 | 0 |     LoadBytes(&s, cipher16); | 
| 481 | 0 |     AddRoundKey(&s, rounds--); | 
| 482 |  | 
 | 
| 483 | 0 |     for (round = 1; round < nrounds; round++) { | 
| 484 | 0 |         InvShiftRows(&s); | 
| 485 | 0 |         SubBytes(&s, 1); | 
| 486 | 0 |         AddRoundKey(&s, rounds--); | 
| 487 | 0 |         MixColumns(&s, 1); | 
| 488 | 0 |     } | 
| 489 |  | 
 | 
| 490 | 0 |     InvShiftRows(&s); | 
| 491 | 0 |     SubBytes(&s, 1); | 
| 492 | 0 |     AddRoundKey(&s, rounds); | 
| 493 |  | 
 | 
| 494 | 0 |     SaveBytes(plain16, &s); | 
| 495 | 0 | } | 
| 496 |  |  | 
| 497 | 0 | void AES128_init(AES128_ctx* ctx, const unsigned char* key16) { | 
| 498 | 0 |     AES_setup(ctx->rk, key16, 4, 10); | 
| 499 | 0 | } | 
| 500 |  |  | 
| 501 | 0 | void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) { | 
| 502 | 0 |     while (blocks--) { | 
| 503 | 0 |         AES_encrypt(ctx->rk, 10, cipher16, plain16); | 
| 504 | 0 |         cipher16 += 16; | 
| 505 | 0 |         plain16 += 16; | 
| 506 | 0 |     } | 
| 507 | 0 | } | 
| 508 |  |  | 
| 509 | 0 | void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) { | 
| 510 | 0 |     while (blocks--) { | 
| 511 | 0 |         AES_decrypt(ctx->rk, 10, plain16, cipher16); | 
| 512 | 0 |         cipher16 += 16; | 
| 513 | 0 |         plain16 += 16; | 
| 514 | 0 |     } | 
| 515 | 0 | } | 
| 516 |  |  | 
| 517 | 0 | void AES192_init(AES192_ctx* ctx, const unsigned char* key24) { | 
| 518 | 0 |     AES_setup(ctx->rk, key24, 6, 12); | 
| 519 | 0 | } | 
| 520 |  |  | 
| 521 | 0 | void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) { | 
| 522 | 0 |     while (blocks--) { | 
| 523 | 0 |         AES_encrypt(ctx->rk, 12, cipher16, plain16); | 
| 524 | 0 |         cipher16 += 16; | 
| 525 | 0 |         plain16 += 16; | 
| 526 | 0 |     } | 
| 527 |  | 
 | 
| 528 | 0 | } | 
| 529 |  |  | 
| 530 | 0 | void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) { | 
| 531 | 0 |     while (blocks--) { | 
| 532 | 0 |         AES_decrypt(ctx->rk, 12, plain16, cipher16); | 
| 533 | 0 |         cipher16 += 16; | 
| 534 | 0 |         plain16 += 16; | 
| 535 | 0 |     } | 
| 536 | 0 | } | 
| 537 |  |  | 
| 538 | 0 | void AES256_init(AES256_ctx* ctx, const unsigned char* key32) { | 
| 539 | 0 |     AES_setup(ctx->rk, key32, 8, 14); | 
| 540 | 0 | } | 
| 541 |  |  | 
| 542 | 0 | void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) { | 
| 543 | 0 |     while (blocks--) { | 
| 544 | 0 |         AES_encrypt(ctx->rk, 14, cipher16, plain16); | 
| 545 | 0 |         cipher16 += 16; | 
| 546 | 0 |         plain16 += 16; | 
| 547 | 0 |     } | 
| 548 | 0 | } | 
| 549 |  |  | 
| 550 | 0 | void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) { | 
| 551 | 0 |     while (blocks--) { | 
| 552 | 0 |         AES_decrypt(ctx->rk, 14, plain16, cipher16); | 
| 553 | 0 |         cipher16 += 16; | 
| 554 | 0 |         plain16 += 16; | 
| 555 | 0 |     } | 
| 556 | 0 | } |