/root/bitcoin/src/crypto/ctaes/ctaes.c

Source (jump to first uncovered line)
 /*********************************************************************
 * Copyright (c) 2016 Pieter Wuille                                   *
 * Distributed under the MIT software license, see the accompanying   *
 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
 **********************************************************************/

/* Constant time, unoptimized, concise, plain C, AES implementation
 * Based On:
 *   Emilia Kasper and Peter Schwabe, Faster and Timing-Attack Resistant AES-GCM
 *   http://www.iacr.org/archive/ches2009/57470001/57470001.pdf
 * But using 8 16-bit integers representing a single AES state rather than 8 128-bit
 * integers representing 8 AES states.
 */

#include "ctaes.h"

/* Slice variable slice_i contains the i'th bit of the 16 state variables in this order:
 *  0  1  2  3
 *  4  5  6  7
 *  8  9 10 11
 * 12 13 14 15
 */

/** Convert a byte to sliced form, storing it corresponding to given row and column in s */
static void LoadByte(AES_state* s, unsigned char byte, int r, int c) {
    int i;
    for (i = 0; i < 8; i++) {
        s->slice[i] |= (byte & 1) << (r * 4 + c);
        byte >>= 1;
    }
}

/** Load 16 bytes of data into 8 sliced integers */
static void LoadBytes(AES_state *s, const unsigned char* data16) {
    int c;
    for (c = 0; c < 4; c++) {
        int r;
        for (r = 0; r < 4; r++) {
            LoadByte(s, *(data16++), r, c);
        }
    }
}

/** Convert 8 sliced integers into 16 bytes of data */
static void SaveBytes(unsigned char* data16, const AES_state *s) {
    int c;
    for (c = 0; c < 4; c++) {
        int r;
        for (r = 0; r < 4; r++) {
            int b;
            uint8_t v = 0;
            for (b = 0; b < 8; b++) {
                v |= ((s->slice[b] >> (r * 4 + c)) & 1) << b;
            }
            *(data16++) = v;
        }
    }
}

/* S-box implementation based on the gate logic from:
 *   Joan Boyar and Rene Peralta, A depth-16 circuit for the AES S-box.
 *   https://eprint.iacr.org/2011/332.pdf
*/
static void SubBytes(AES_state *s, int inv) {
    /* Load the bit slices */
    uint16_t U0 = s->slice[7], U1 = s->slice[6], U2 = s->slice[5], U3 = s->slice[4];
    uint16_t U4 = s->slice[3], U5 = s->slice[2], U6 = s->slice[1], U7 = s->slice[0];

    uint16_t T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16;
    uint16_t T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, D;
    uint16_t M1, M6, M11, M13, M15, M20, M21, M22, M23, M25, M37, M38, M39, M40;
    uint16_t M41, M42, M43, M44, M45, M46, M47, M48, M49, M50, M51, M52, M53, M54;
    uint16_t M55, M56, M57, M58, M59, M60, M61, M62, M63;

    if (inv) {
        uint16_t R5, R13, R17, R18, R19;
        /* Undo linear postprocessing */
        T23 = U0 ^ U3;
        T22 = ~(U1 ^ U3);
        T2 = ~(U0 ^ U1);
        T1 = U3 ^ U4;
        T24 = ~(U4 ^ U7);
        R5 = U6 ^ U7;
        T8 = ~(U1 ^ T23);
        T19 = T22 ^ R5;
        T9 = ~(U7 ^ T1);
        T10 = T2 ^ T24;
        T13 = T2 ^ R5;
        T3 = T1 ^ R5;
        T25 = ~(U2 ^ T1);
        R13 = U1 ^ U6;
        T17 = ~(U2 ^ T19);
        T20 = T24 ^ R13;
        T4 = U4 ^ T8;
        R17 = ~(U2 ^ U5);
        R18 = ~(U5 ^ U6);
        R19 = ~(U2 ^ U4);
        D = U0 ^ R17;
        T6 = T22 ^ R17;
        T16 = R13 ^ R19;
        T27 = T1 ^ R18;
        T15 = T10 ^ T27;
        T14 = T10 ^ R18;
        T26 = T3 ^ T16;
    } else {
        /* Linear preprocessing. */
        T1 = U0 ^ U3;
        T2 = U0 ^ U5;
        T3 = U0 ^ U6;
        T4 = U3 ^ U5;
        T5 = U4 ^ U6;
        T6 = T1 ^ T5;
        T7 = U1 ^ U2;
        T8 = U7 ^ T6;
        T9 = U7 ^ T7;
        T10 = T6 ^ T7;
        T11 = U1 ^ U5;
        T12 = U2 ^ U5;
        T13 = T3 ^ T4;
        T14 = T6 ^ T11;
        T15 = T5 ^ T11;
        T16 = T5 ^ T12;
        T17 = T9 ^ T16;
        T18 = U3 ^ U7;
        T19 = T7 ^ T18;
        T20 = T1 ^ T19;
        T21 = U6 ^ U7;
        T22 = T7 ^ T21;
        T23 = T2 ^ T22;
        T24 = T2 ^ T10;
        T25 = T20 ^ T17;
        T26 = T3 ^ T16;
        T27 = T1 ^ T12;
        D = U7;
    }

    /* Non-linear transformation (shared between the forward and backward case) */
    M1 = T13 & T6;
    M6 = T3 & T16;
    M11 = T1 & T15;
    M13 = (T4 & T27) ^ M11;
    M15 = (T2 & T10) ^ M11;
    M20 = T14 ^ M1 ^ (T23 & T8) ^ M13;
    M21 = (T19 & D) ^ M1 ^ T24 ^ M15;
    M22 = T26 ^ M6 ^ (T22 & T9) ^ M13;
    M23 = (T20 & T17) ^ M6 ^ M15 ^ T25;
    M25 = M22 & M20;
    M37 = M21 ^ ((M20 ^ M21) & (M23 ^ M25));
    M38 = M20 ^ M25 ^ (M21 | (M20 & M23));
    M39 = M23 ^ ((M22 ^ M23) & (M21 ^ M25));
    M40 = M22 ^ M25 ^ (M23 | (M21 & M22));
    M41 = M38 ^ M40;
    M42 = M37 ^ M39;
    M43 = M37 ^ M38;
    M44 = M39 ^ M40;
    M45 = M42 ^ M41;
    M46 = M44 & T6;
    M47 = M40 & T8;
    M48 = M39 & D;
    M49 = M43 & T16;
    M50 = M38 & T9;
    M51 = M37 & T17;
    M52 = M42 & T15;
    M53 = M45 & T27;
    M54 = M41 & T10;
    M55 = M44 & T13;
    M56 = M40 & T23;
    M57 = M39 & T19;
    M58 = M43 & T3;
    M59 = M38 & T22;
    M60 = M37 & T20;
    M61 = M42 & T1;
    M62 = M45 & T4;
    M63 = M41 & T2;

    if (inv){
        /* Undo linear preprocessing */
        uint16_t P0 = M52 ^ M61;
        uint16_t P1 = M58 ^ M59;
        uint16_t P2 = M54 ^ M62;
        uint16_t P3 = M47 ^ M50;
        uint16_t P4 = M48 ^ M56;
        uint16_t P5 = M46 ^ M51;
        uint16_t P6 = M49 ^ M60;
        uint16_t P7 = P0 ^ P1;
        uint16_t P8 = M50 ^ M53;
        uint16_t P9 = M55 ^ M63;
        uint16_t P10 = M57 ^ P4;
        uint16_t P11 = P0 ^ P3;
        uint16_t P12 = M46 ^ M48;
        uint16_t P13 = M49 ^ M51;
        uint16_t P14 = M49 ^ M62;
        uint16_t P15 = M54 ^ M59;
        uint16_t P16 = M57 ^ M61;
        uint16_t P17 = M58 ^ P2;
        uint16_t P18 = M63 ^ P5;
        uint16_t P19 = P2 ^ P3;
        uint16_t P20 = P4 ^ P6;
        uint16_t P22 = P2 ^ P7;
        uint16_t P23 = P7 ^ P8;
        uint16_t P24 = P5 ^ P7;
        uint16_t P25 = P6 ^ P10;
        uint16_t P26 = P9 ^ P11;
        uint16_t P27 = P10 ^ P18;
        uint16_t P28 = P11 ^ P25;
        uint16_t P29 = P15 ^ P20;
        s->slice[7] = P13 ^ P22;
        s->slice[6] = P26 ^ P29;
        s->slice[5] = P17 ^ P28;
        s->slice[4] = P12 ^ P22;
        s->slice[3] = P23 ^ P27;
        s->slice[2] = P19 ^ P24;
        s->slice[1] = P14 ^ P23;
        s->slice[0] = P9 ^ P16;
    } else {
        /* Linear postprocessing */
        uint16_t L0 = M61 ^ M62;
        uint16_t L1 = M50 ^ M56;
        uint16_t L2 = M46 ^ M48;
        uint16_t L3 = M47 ^ M55;
        uint16_t L4 = M54 ^ M58;
        uint16_t L5 = M49 ^ M61;
        uint16_t L6 = M62 ^ L5;
        uint16_t L7 = M46 ^ L3;
        uint16_t L8 = M51 ^ M59;
        uint16_t L9 = M52 ^ M53;
        uint16_t L10 = M53 ^ L4;
        uint16_t L11 = M60 ^ L2;
        uint16_t L12 = M48 ^ M51;
        uint16_t L13 = M50 ^ L0;
        uint16_t L14 = M52 ^ M61;
        uint16_t L15 = M55 ^ L1;
        uint16_t L16 = M56 ^ L0;
        uint16_t L17 = M57 ^ L1;
        uint16_t L18 = M58 ^ L8;
        uint16_t L19 = M63 ^ L4;
        uint16_t L20 = L0 ^ L1;
        uint16_t L21 = L1 ^ L7;
        uint16_t L22 = L3 ^ L12;
        uint16_t L23 = L18 ^ L2;
        uint16_t L24 = L15 ^ L9;
        uint16_t L25 = L6 ^ L10;
        uint16_t L26 = L7 ^ L9;
        uint16_t L27 = L8 ^ L10;
        uint16_t L28 = L11 ^ L14;
        uint16_t L29 = L11 ^ L17;
        s->slice[7] = L6 ^ L24;
        s->slice[6] = ~(L16 ^ L26);
        s->slice[5] = ~(L19 ^ L28);
        s->slice[4] = L6 ^ L21;
        s->slice[3] = L20 ^ L22;
        s->slice[2] = L25 ^ L29;
        s->slice[1] = ~(L13 ^ L27);
        s->slice[0] = ~(L6 ^ L23);
    }
}

#define BIT_RANGE(from,to) (((1 << ((to) - (from))) - 1) << (from))

#define BIT_RANGE_LEFT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) << (shift))
#define BIT_RANGE_RIGHT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) >> (shift))

static void ShiftRows(AES_state* s) {
    int i;
    for (i = 0; i < 8; i++) {
        uint16_t v = s->slice[i];
        s->slice[i] =
            (v & BIT_RANGE(0, 4)) |
            BIT_RANGE_LEFT(v, 4, 5, 3) | BIT_RANGE_RIGHT(v, 5, 8, 1) |
            BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) |
            BIT_RANGE_LEFT(v, 12, 15, 1) | BIT_RANGE_RIGHT(v, 15, 16, 3);
    }
}

static void InvShiftRows(AES_state* s) {
    int i;
    for (i = 0; i < 8; i++) {
        uint16_t v = s->slice[i];
        s->slice[i] =
            (v & BIT_RANGE(0, 4)) |
            BIT_RANGE_LEFT(v, 4, 7, 1) | BIT_RANGE_RIGHT(v, 7, 8, 3) |
            BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) |
            BIT_RANGE_LEFT(v, 12, 13, 3) | BIT_RANGE_RIGHT(v, 13, 16, 1);
    }
}

#define ROT(x,b) (((x) >> ((b) * 4)) | ((x) << ((4-(b)) * 4)))

static void MixColumns(AES_state* s, int inv) {
    /* The MixColumns transform treats the bytes of the columns of the state as
     * coefficients of a 3rd degree polynomial over GF(2^8) and multiplies them
     * by the fixed polynomial a(x) = {03}x^3 + {01}x^2 + {01}x + {02}, modulo
     * x^4 + {01}.
     *
     * In the inverse transform, we multiply by the inverse of a(x),
     * a^-1(x) = {0b}x^3 + {0d}x^2 + {09}x + {0e}. This is equal to
     * a(x) * ({04}x^2 + {05}), so we can reuse the forward transform's code
     * (found in OpenSSL's bsaes-x86_64.pl, attributed to Jussi Kivilinna)
     *
     * In the bitsliced representation, a multiplication of every column by x
     * mod x^4 + 1 is simply a right rotation.
     */

    /* Shared for both directions is a multiplication by a(x), which can be
     * rewritten as (x^3 + x^2 + x) + {02}*(x^3 + {01}).
     *
     * First compute s into the s? variables, (x^3 + {01}) * s into the s?_01
     * variables and (x^3 + x^2 + x)*s into the s?_123 variables.
     */
    uint16_t s0 = s->slice[0], s1 = s->slice[1], s2 = s->slice[2], s3 = s->slice[3];
    uint16_t s4 = s->slice[4], s5 = s->slice[5], s6 = s->slice[6], s7 = s->slice[7];
    uint16_t s0_01 = s0 ^ ROT(s0, 1), s0_123 = ROT(s0_01, 1) ^ ROT(s0, 3);
    uint16_t s1_01 = s1 ^ ROT(s1, 1), s1_123 = ROT(s1_01, 1) ^ ROT(s1, 3);
    uint16_t s2_01 = s2 ^ ROT(s2, 1), s2_123 = ROT(s2_01, 1) ^ ROT(s2, 3);
    uint16_t s3_01 = s3 ^ ROT(s3, 1), s3_123 = ROT(s3_01, 1) ^ ROT(s3, 3);
    uint16_t s4_01 = s4 ^ ROT(s4, 1), s4_123 = ROT(s4_01, 1) ^ ROT(s4, 3);
    uint16_t s5_01 = s5 ^ ROT(s5, 1), s5_123 = ROT(s5_01, 1) ^ ROT(s5, 3);
    uint16_t s6_01 = s6 ^ ROT(s6, 1), s6_123 = ROT(s6_01, 1) ^ ROT(s6, 3);
    uint16_t s7_01 = s7 ^ ROT(s7, 1), s7_123 = ROT(s7_01, 1) ^ ROT(s7, 3);
    /* Now compute s = s?_123 + {02} * s?_01. */
    s->slice[0] = s7_01 ^ s0_123;
    s->slice[1] = s7_01 ^ s0_01 ^ s1_123;
    s->slice[2] = s1_01 ^ s2_123;
    s->slice[3] = s7_01 ^ s2_01 ^ s3_123;
    s->slice[4] = s7_01 ^ s3_01 ^ s4_123;
    s->slice[5] = s4_01 ^ s5_123;
    s->slice[6] = s5_01 ^ s6_123;
    s->slice[7] = s6_01 ^ s7_123;
    if (inv) {
        /* In the reverse direction, we further need to multiply by
         * {04}x^2 + {05}, which can be written as {04} * (x^2 + {01}) + {01}.
         *
         * First compute (x^2 + {01}) * s into the t?_02 variables: */
        uint16_t t0_02 = s->slice[0] ^ ROT(s->slice[0], 2);
        uint16_t t1_02 = s->slice[1] ^ ROT(s->slice[1], 2);
        uint16_t t2_02 = s->slice[2] ^ ROT(s->slice[2], 2);
        uint16_t t3_02 = s->slice[3] ^ ROT(s->slice[3], 2);
        uint16_t t4_02 = s->slice[4] ^ ROT(s->slice[4], 2);
        uint16_t t5_02 = s->slice[5] ^ ROT(s->slice[5], 2);
        uint16_t t6_02 = s->slice[6] ^ ROT(s->slice[6], 2);
        uint16_t t7_02 = s->slice[7] ^ ROT(s->slice[7], 2);
        /* And then update s += {04} * t?_02 */
        s->slice[0] ^= t6_02;
        s->slice[1] ^= t6_02 ^ t7_02;
        s->slice[2] ^= t0_02 ^ t7_02;
        s->slice[3] ^= t1_02 ^ t6_02;
        s->slice[4] ^= t2_02 ^ t6_02 ^ t7_02;
        s->slice[5] ^= t3_02 ^ t7_02;
        s->slice[6] ^= t4_02;
        s->slice[7] ^= t5_02;
    }
}

static void AddRoundKey(AES_state* s, const AES_state* round) {
    int b;
    for (b = 0; b < 8; b++) {
        s->slice[b] ^= round->slice[b];
    }
}

/** column_0(s) = column_c(a) */
static void GetOneColumn(AES_state* s, const AES_state* a, int c) {
    int b;
    for (b = 0; b < 8; b++) {
        s->slice[b] = (a->slice[b] >> c) & 0x1111;
    }
}

/** column_c1(r) |= (column_0(s) ^= column_c2(a)) */
static void KeySetupColumnMix(AES_state* s, AES_state* r, const AES_state* a, int c1, int c2) {
    int b;
    for (b = 0; b < 8; b++) {
        r->slice[b] |= ((s->slice[b] ^= ((a->slice[b] >> c2) & 0x1111)) & 0x1111) << c1;
    }
}

/** Rotate the rows in s one position upwards, and xor in r */
static void KeySetupTransform(AES_state* s, const AES_state* r) {
    int b;
    for (b = 0; b < 8; b++) {
        s->slice[b] = ((s->slice[b] >> 4) | (s->slice[b] << 12)) ^ r->slice[b];
    }
}

/* Multiply the cells in s by x, as polynomials over GF(2) mod x^8 + x^4 + x^3 + x + 1 */
static void MultX(AES_state* s) {
    uint16_t top = s->slice[7];
    s->slice[7] = s->slice[6];
    s->slice[6] = s->slice[5];
    s->slice[5] = s->slice[4];
    s->slice[4] = s->slice[3] ^ top;
    s->slice[3] = s->slice[2] ^ top;
    s->slice[2] = s->slice[1];
    s->slice[1] = s->slice[0] ^ top;
    s->slice[0] = top;
}

/** Expand the cipher key into the key schedule.
 *
 *  state must be a pointer to an array of size nrounds + 1.
 *  key must be a pointer to 4 * nkeywords bytes.
 *
 *  AES128 uses nkeywords = 4, nrounds = 10
 *  AES192 uses nkeywords = 6, nrounds = 12
 *  AES256 uses nkeywords = 8, nrounds = 14
 */
static void AES_setup(AES_state* rounds, const uint8_t* key, int nkeywords, int nrounds)
{
    int i;

    /* The one-byte round constant */
    AES_state rcon = {{1,0,0,0,0,0,0,0}};
    /* The number of the word being generated, modulo nkeywords */
    int pos = 0;
    /* The column representing the word currently being processed */
    AES_state column;

    for (i = 0; i < nrounds + 1; i++) {
        int b;
        for (b = 0; b < 8; b++) {
            rounds[i].slice[b] = 0;
        }
    }

    /* The first nkeywords round columns are just taken from the key directly. */
    for (i = 0; i < nkeywords; i++) {
        int r;
        for (r = 0; r < 4; r++) {
            LoadByte(&rounds[i >> 2], *(key++), r, i & 3);
        }
    }

    GetOneColumn(&column, &rounds[(nkeywords - 1) >> 2], (nkeywords - 1) & 3);

    for (i = nkeywords; i < 4 * (nrounds + 1); i++) {
        /* Transform column */
        if (pos == 0) {
            SubBytes(&column, 0);
            KeySetupTransform(&column, &rcon);
            MultX(&rcon);
        } else if (nkeywords > 6 && pos == 4) {
            SubBytes(&column, 0);
        }
        if (++pos == nkeywords) pos = 0;
        KeySetupColumnMix(&column, &rounds[i >> 2], &rounds[(i - nkeywords) >> 2], i & 3, (i - nkeywords) & 3);
    }
}

static void AES_encrypt(const AES_state* rounds, int nrounds, unsigned char* cipher16, const unsigned char* plain16) {
    AES_state s = {{0}};
    int round;

    LoadBytes(&s, plain16);
    AddRoundKey(&s, rounds++);

    for (round = 1; round < nrounds; round++) {
        SubBytes(&s, 0);
        ShiftRows(&s);
        MixColumns(&s, 0);
        AddRoundKey(&s, rounds++);
    }

    SubBytes(&s, 0);
    ShiftRows(&s);
    AddRoundKey(&s, rounds);

    SaveBytes(cipher16, &s);
}

static void AES_decrypt(const AES_state* rounds, int nrounds, unsigned char* plain16, const unsigned char* cipher16) {
    /* Most AES decryption implementations use the alternate scheme
     * (the Equivalent Inverse Cipher), which allows for more code reuse between
     * the encryption and decryption code, but requires separate setup for both.
     */
    AES_state s = {{0}};
    int round;

    rounds += nrounds;

    LoadBytes(&s, cipher16);
    AddRoundKey(&s, rounds--);

    for (round = 1; round < nrounds; round++) {
        InvShiftRows(&s);
        SubBytes(&s, 1);
        AddRoundKey(&s, rounds--);
        MixColumns(&s, 1);
    }

    InvShiftRows(&s);
    SubBytes(&s, 1);
    AddRoundKey(&s, rounds);

    SaveBytes(plain16, &s);
}

void AES128_init(AES128_ctx* ctx, const unsigned char* key16) {
    AES_setup(ctx->rk, key16, 4, 10);
}

void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
    while (blocks--) {
        AES_encrypt(ctx->rk, 10, cipher16, plain16);
        cipher16 += 16;
        plain16 += 16;
    }
}

void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
    while (blocks--) {
        AES_decrypt(ctx->rk, 10, plain16, cipher16);
        cipher16 += 16;
        plain16 += 16;
    }
}

void AES192_init(AES192_ctx* ctx, const unsigned char* key24) {
    AES_setup(ctx->rk, key24, 6, 12);
}

void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
    while (blocks--) {
        AES_encrypt(ctx->rk, 12, cipher16, plain16);
        cipher16 += 16;
        plain16 += 16;
    }

}

void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
    while (blocks--) {
        AES_decrypt(ctx->rk, 12, plain16, cipher16);
        cipher16 += 16;
        plain16 += 16;
    }
}

void AES256_init(AES256_ctx* ctx, const unsigned char* key32) {
    AES_setup(ctx->rk, key32, 8, 14);
}

void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
    while (blocks--) {
        AES_encrypt(ctx->rk, 14, cipher16, plain16);
        cipher16 += 16;
        plain16 += 16;
    }
}

void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
    while (blocks--) {
        AES_decrypt(ctx->rk, 14, plain16, cipher16);
        cipher16 += 16;
        plain16 += 16;
    }
}

Coverage Report

Created: 2024-11-15 12:18

Line	Count	Source (jump to first uncovered line)
1		/*********************************************************************
2		* Copyright (c) 2016 Pieter Wuille *
3		* Distributed under the MIT software license, see the accompanying *
4		* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5		**********************************************************************/
6
7		/* Constant time, unoptimized, concise, plain C, AES implementation
8		* Based On:
9		* Emilia Kasper and Peter Schwabe, Faster and Timing-Attack Resistant AES-GCM
10		* http://www.iacr.org/archive/ches2009/57470001/57470001.pdf
11		* But using 8 16-bit integers representing a single AES state rather than 8 128-bit
12		* integers representing 8 AES states.
13		*/
14
15		#include "ctaes.h"
16
17		/* Slice variable slice_i contains the i'th bit of the 16 state variables in this order:
18		* 0 1 2 3
19		* 4 5 6 7
20		* 8 9 10 11
21		* 12 13 14 15
22		*/
23
24		/** Convert a byte to sliced form, storing it corresponding to given row and column in s */
25	0	static void LoadByte(AES_state* s, unsigned char byte, int r, int c) {
26	0	int i;
27	0	for (i = 0; i < 8; i++) {
28	0	s->slice[i] \|= (byte & 1) << (r * 4 + c);
29	0	byte >>= 1;
30	0	}
31	0	}
32
33		/** Load 16 bytes of data into 8 sliced integers */
34	0	static void LoadBytes(AES_state s, const unsigned char data16) {
35	0	int c;
36	0	for (c = 0; c < 4; c++) {
37	0	int r;
38	0	for (r = 0; r < 4; r++) {
39	0	LoadByte(s, *(data16++), r, c);
40	0	}
41	0	}
42	0	}
43
44		/** Convert 8 sliced integers into 16 bytes of data */
45	0	static void SaveBytes(unsigned char* data16, const AES_state *s) {
46	0	int c;
47	0	for (c = 0; c < 4; c++) {
48	0	int r;
49	0	for (r = 0; r < 4; r++) {
50	0	int b;
51	0	uint8_t v = 0;
52	0	for (b = 0; b < 8; b++) {
53	0	v \|= ((s->slice[b] >> (r * 4 + c)) & 1) << b;
54	0	}
55	0	*(data16++) = v;
56	0	}
57	0	}
58	0	}
59
60		/* S-box implementation based on the gate logic from:
61		* Joan Boyar and Rene Peralta, A depth-16 circuit for the AES S-box.
62		* https://eprint.iacr.org/2011/332.pdf
63		*/
64	0	static void SubBytes(AES_state *s, int inv) {
65		/* Load the bit slices */
66	0	uint16_t U0 = s->slice[7], U1 = s->slice[6], U2 = s->slice[5], U3 = s->slice[4];
67	0	uint16_t U4 = s->slice[3], U5 = s->slice[2], U6 = s->slice[1], U7 = s->slice[0];
68
69	0	uint16_t T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16;
70	0	uint16_t T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, D;
71	0	uint16_t M1, M6, M11, M13, M15, M20, M21, M22, M23, M25, M37, M38, M39, M40;
72	0	uint16_t M41, M42, M43, M44, M45, M46, M47, M48, M49, M50, M51, M52, M53, M54;
73	0	uint16_t M55, M56, M57, M58, M59, M60, M61, M62, M63;
74
75	0	if (inv) {
76	0	uint16_t R5, R13, R17, R18, R19;
77		/* Undo linear postprocessing */
78	0	T23 = U0 ^ U3;
79	0	T22 = ~(U1 ^ U3);
80	0	T2 = ~(U0 ^ U1);
81	0	T1 = U3 ^ U4;
82	0	T24 = ~(U4 ^ U7);
83	0	R5 = U6 ^ U7;
84	0	T8 = ~(U1 ^ T23);
85	0	T19 = T22 ^ R5;
86	0	T9 = ~(U7 ^ T1);
87	0	T10 = T2 ^ T24;
88	0	T13 = T2 ^ R5;
89	0	T3 = T1 ^ R5;
90	0	T25 = ~(U2 ^ T1);
91	0	R13 = U1 ^ U6;
92	0	T17 = ~(U2 ^ T19);
93	0	T20 = T24 ^ R13;
94	0	T4 = U4 ^ T8;
95	0	R17 = ~(U2 ^ U5);
96	0	R18 = ~(U5 ^ U6);
97	0	R19 = ~(U2 ^ U4);
98	0	D = U0 ^ R17;
99	0	T6 = T22 ^ R17;
100	0	T16 = R13 ^ R19;
101	0	T27 = T1 ^ R18;
102	0	T15 = T10 ^ T27;
103	0	T14 = T10 ^ R18;
104	0	T26 = T3 ^ T16;
105	0	} else {
106		/* Linear preprocessing. */
107	0	T1 = U0 ^ U3;
108	0	T2 = U0 ^ U5;
109	0	T3 = U0 ^ U6;
110	0	T4 = U3 ^ U5;
111	0	T5 = U4 ^ U6;
112	0	T6 = T1 ^ T5;
113	0	T7 = U1 ^ U2;
114	0	T8 = U7 ^ T6;
115	0	T9 = U7 ^ T7;
116	0	T10 = T6 ^ T7;
117	0	T11 = U1 ^ U5;
118	0	T12 = U2 ^ U5;
119	0	T13 = T3 ^ T4;
120	0	T14 = T6 ^ T11;
121	0	T15 = T5 ^ T11;
122	0	T16 = T5 ^ T12;
123	0	T17 = T9 ^ T16;
124	0	T18 = U3 ^ U7;
125	0	T19 = T7 ^ T18;
126	0	T20 = T1 ^ T19;
127	0	T21 = U6 ^ U7;
128	0	T22 = T7 ^ T21;
129	0	T23 = T2 ^ T22;
130	0	T24 = T2 ^ T10;
131	0	T25 = T20 ^ T17;
132	0	T26 = T3 ^ T16;
133	0	T27 = T1 ^ T12;
134	0	D = U7;
135	0	}
136
137		/* Non-linear transformation (shared between the forward and backward case) */
138	0	M1 = T13 & T6;
139	0	M6 = T3 & T16;
140	0	M11 = T1 & T15;
141	0	M13 = (T4 & T27) ^ M11;
142	0	M15 = (T2 & T10) ^ M11;
143	0	M20 = T14 ^ M1 ^ (T23 & T8) ^ M13;
144	0	M21 = (T19 & D) ^ M1 ^ T24 ^ M15;
145	0	M22 = T26 ^ M6 ^ (T22 & T9) ^ M13;
146	0	M23 = (T20 & T17) ^ M6 ^ M15 ^ T25;
147	0	M25 = M22 & M20;
148	0	M37 = M21 ^ ((M20 ^ M21) & (M23 ^ M25));
149	0	M38 = M20 ^ M25 ^ (M21 \| (M20 & M23));
150	0	M39 = M23 ^ ((M22 ^ M23) & (M21 ^ M25));
151	0	M40 = M22 ^ M25 ^ (M23 \| (M21 & M22));
152	0	M41 = M38 ^ M40;
153	0	M42 = M37 ^ M39;
154	0	M43 = M37 ^ M38;
155	0	M44 = M39 ^ M40;
156	0	M45 = M42 ^ M41;
157	0	M46 = M44 & T6;
158	0	M47 = M40 & T8;
159	0	M48 = M39 & D;
160	0	M49 = M43 & T16;
161	0	M50 = M38 & T9;
162	0	M51 = M37 & T17;
163	0	M52 = M42 & T15;
164	0	M53 = M45 & T27;
165	0	M54 = M41 & T10;
166	0	M55 = M44 & T13;
167	0	M56 = M40 & T23;
168	0	M57 = M39 & T19;
169	0	M58 = M43 & T3;
170	0	M59 = M38 & T22;
171	0	M60 = M37 & T20;
172	0	M61 = M42 & T1;
173	0	M62 = M45 & T4;
174	0	M63 = M41 & T2;
175
176	0	if (inv){
177		/* Undo linear preprocessing */
178	0	uint16_t P0 = M52 ^ M61;
179	0	uint16_t P1 = M58 ^ M59;
180	0	uint16_t P2 = M54 ^ M62;
181	0	uint16_t P3 = M47 ^ M50;
182	0	uint16_t P4 = M48 ^ M56;
183	0	uint16_t P5 = M46 ^ M51;
184	0	uint16_t P6 = M49 ^ M60;
185	0	uint16_t P7 = P0 ^ P1;
186	0	uint16_t P8 = M50 ^ M53;
187	0	uint16_t P9 = M55 ^ M63;
188	0	uint16_t P10 = M57 ^ P4;
189	0	uint16_t P11 = P0 ^ P3;
190	0	uint16_t P12 = M46 ^ M48;
191	0	uint16_t P13 = M49 ^ M51;
192	0	uint16_t P14 = M49 ^ M62;
193	0	uint16_t P15 = M54 ^ M59;
194	0	uint16_t P16 = M57 ^ M61;
195	0	uint16_t P17 = M58 ^ P2;
196	0	uint16_t P18 = M63 ^ P5;
197	0	uint16_t P19 = P2 ^ P3;
198	0	uint16_t P20 = P4 ^ P6;
199	0	uint16_t P22 = P2 ^ P7;
200	0	uint16_t P23 = P7 ^ P8;
201	0	uint16_t P24 = P5 ^ P7;
202	0	uint16_t P25 = P6 ^ P10;
203	0	uint16_t P26 = P9 ^ P11;
204	0	uint16_t P27 = P10 ^ P18;
205	0	uint16_t P28 = P11 ^ P25;
206	0	uint16_t P29 = P15 ^ P20;
207	0	s->slice[7] = P13 ^ P22;
208	0	s->slice[6] = P26 ^ P29;
209	0	s->slice[5] = P17 ^ P28;
210	0	s->slice[4] = P12 ^ P22;
211	0	s->slice[3] = P23 ^ P27;
212	0	s->slice[2] = P19 ^ P24;
213	0	s->slice[1] = P14 ^ P23;
214	0	s->slice[0] = P9 ^ P16;
215	0	} else {
216		/* Linear postprocessing */
217	0	uint16_t L0 = M61 ^ M62;
218	0	uint16_t L1 = M50 ^ M56;
219	0	uint16_t L2 = M46 ^ M48;
220	0	uint16_t L3 = M47 ^ M55;
221	0	uint16_t L4 = M54 ^ M58;
222	0	uint16_t L5 = M49 ^ M61;
223	0	uint16_t L6 = M62 ^ L5;
224	0	uint16_t L7 = M46 ^ L3;
225	0	uint16_t L8 = M51 ^ M59;
226	0	uint16_t L9 = M52 ^ M53;
227	0	uint16_t L10 = M53 ^ L4;
228	0	uint16_t L11 = M60 ^ L2;
229	0	uint16_t L12 = M48 ^ M51;
230	0	uint16_t L13 = M50 ^ L0;
231	0	uint16_t L14 = M52 ^ M61;
232	0	uint16_t L15 = M55 ^ L1;
233	0	uint16_t L16 = M56 ^ L0;
234	0	uint16_t L17 = M57 ^ L1;
235	0	uint16_t L18 = M58 ^ L8;
236	0	uint16_t L19 = M63 ^ L4;
237	0	uint16_t L20 = L0 ^ L1;
238	0	uint16_t L21 = L1 ^ L7;
239	0	uint16_t L22 = L3 ^ L12;
240	0	uint16_t L23 = L18 ^ L2;
241	0	uint16_t L24 = L15 ^ L9;
242	0	uint16_t L25 = L6 ^ L10;
243	0	uint16_t L26 = L7 ^ L9;
244	0	uint16_t L27 = L8 ^ L10;
245	0	uint16_t L28 = L11 ^ L14;
246	0	uint16_t L29 = L11 ^ L17;
247	0	s->slice[7] = L6 ^ L24;
248	0	s->slice[6] = ~(L16 ^ L26);
249	0	s->slice[5] = ~(L19 ^ L28);
250	0	s->slice[4] = L6 ^ L21;
251	0	s->slice[3] = L20 ^ L22;
252	0	s->slice[2] = L25 ^ L29;
253	0	s->slice[1] = ~(L13 ^ L27);
254	0	s->slice[0] = ~(L6 ^ L23);
255	0	}
256	0	}
257
258	0	#define BIT_RANGE(from,to) (((1 << ((to) - (from))) - 1) << (from))
259
260	0	#define BIT_RANGE_LEFT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) << (shift))
261	0	#define BIT_RANGE_RIGHT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) >> (shift))
262
263	0	static void ShiftRows(AES_state* s) {
264	0	int i;
265	0	for (i = 0; i < 8; i++) {
266	0	uint16_t v = s->slice[i];
267	0	s->slice[i] =
268	0	(v & BIT_RANGE(0, 4)) \|
269	0	BIT_RANGE_LEFT(v, 4, 5, 3) \| BIT_RANGE_RIGHT(v, 5, 8, 1) \|
270	0	BIT_RANGE_LEFT(v, 8, 10, 2) \| BIT_RANGE_RIGHT(v, 10, 12, 2) \|
271	0	BIT_RANGE_LEFT(v, 12, 15, 1) \| BIT_RANGE_RIGHT(v, 15, 16, 3);
272	0	}
273	0	}
274
275	0	static void InvShiftRows(AES_state* s) {
276	0	int i;
277	0	for (i = 0; i < 8; i++) {
278	0	uint16_t v = s->slice[i];
279	0	s->slice[i] =
280	0	(v & BIT_RANGE(0, 4)) \|
281	0	BIT_RANGE_LEFT(v, 4, 7, 1) \| BIT_RANGE_RIGHT(v, 7, 8, 3) \|
282	0	BIT_RANGE_LEFT(v, 8, 10, 2) \| BIT_RANGE_RIGHT(v, 10, 12, 2) \|
283	0	BIT_RANGE_LEFT(v, 12, 13, 3) \| BIT_RANGE_RIGHT(v, 13, 16, 1);
284	0	}
285	0	}
286
287	0	#define ROT(x,b) (((x) >> ((b) * 4)) \| ((x) << ((4-(b)) * 4)))
288
289	0	static void MixColumns(AES_state* s, int inv) {
290		/* The MixColumns transform treats the bytes of the columns of the state as
291		* coefficients of a 3rd degree polynomial over GF(2^8) and multiplies them
292		* by the fixed polynomial a(x) = {03}x^3 + {01}x^2 + {01}x + {02}, modulo
293		* x^4 + {01}.
294		*
295		* In the inverse transform, we multiply by the inverse of a(x),
296		* a^-1(x) = {0b}x^3 + {0d}x^2 + {09}x + {0e}. This is equal to
297		* a(x) * ({04}x^2 + {05}), so we can reuse the forward transform's code
298		* (found in OpenSSL's bsaes-x86_64.pl, attributed to Jussi Kivilinna)
299		*
300		* In the bitsliced representation, a multiplication of every column by x
301		* mod x^4 + 1 is simply a right rotation.
302		*/
303
304		/* Shared for both directions is a multiplication by a(x), which can be
305		* rewritten as (x^3 + x^2 + x) + {02}*(x^3 + {01}).
306		*
307		* First compute s into the s? variables, (x^3 + {01}) * s into the s?_01
308		* variables and (x^3 + x^2 + x)*s into the s?_123 variables.
309		*/
310	0	uint16_t s0 = s->slice[0], s1 = s->slice[1], s2 = s->slice[2], s3 = s->slice[3];
311	0	uint16_t s4 = s->slice[4], s5 = s->slice[5], s6 = s->slice[6], s7 = s->slice[7];
312	0	uint16_t s0_01 = s0 ^ ROT(s0, 1), s0_123 = ROT(s0_01, 1) ^ ROT(s0, 3);
313	0	uint16_t s1_01 = s1 ^ ROT(s1, 1), s1_123 = ROT(s1_01, 1) ^ ROT(s1, 3);
314	0	uint16_t s2_01 = s2 ^ ROT(s2, 1), s2_123 = ROT(s2_01, 1) ^ ROT(s2, 3);
315	0	uint16_t s3_01 = s3 ^ ROT(s3, 1), s3_123 = ROT(s3_01, 1) ^ ROT(s3, 3);
316	0	uint16_t s4_01 = s4 ^ ROT(s4, 1), s4_123 = ROT(s4_01, 1) ^ ROT(s4, 3);
317	0	uint16_t s5_01 = s5 ^ ROT(s5, 1), s5_123 = ROT(s5_01, 1) ^ ROT(s5, 3);
318	0	uint16_t s6_01 = s6 ^ ROT(s6, 1), s6_123 = ROT(s6_01, 1) ^ ROT(s6, 3);
319	0	uint16_t s7_01 = s7 ^ ROT(s7, 1), s7_123 = ROT(s7_01, 1) ^ ROT(s7, 3);
320		/* Now compute s = s?_123 + {02} * s?_01. */
321	0	s->slice[0] = s7_01 ^ s0_123;
322	0	s->slice[1] = s7_01 ^ s0_01 ^ s1_123;
323	0	s->slice[2] = s1_01 ^ s2_123;
324	0	s->slice[3] = s7_01 ^ s2_01 ^ s3_123;
325	0	s->slice[4] = s7_01 ^ s3_01 ^ s4_123;
326	0	s->slice[5] = s4_01 ^ s5_123;
327	0	s->slice[6] = s5_01 ^ s6_123;
328	0	s->slice[7] = s6_01 ^ s7_123;
329	0	if (inv) {
330		/* In the reverse direction, we further need to multiply by
331		* {04}x^2 + {05}, which can be written as {04} * (x^2 + {01}) + {01}.
332		*
333		* First compute (x^2 + {01}) * s into the t?_02 variables: */
334	0	uint16_t t0_02 = s->slice[0] ^ ROT(s->slice[0], 2);
335	0	uint16_t t1_02 = s->slice[1] ^ ROT(s->slice[1], 2);
336	0	uint16_t t2_02 = s->slice[2] ^ ROT(s->slice[2], 2);
337	0	uint16_t t3_02 = s->slice[3] ^ ROT(s->slice[3], 2);
338	0	uint16_t t4_02 = s->slice[4] ^ ROT(s->slice[4], 2);
339	0	uint16_t t5_02 = s->slice[5] ^ ROT(s->slice[5], 2);
340	0	uint16_t t6_02 = s->slice[6] ^ ROT(s->slice[6], 2);
341	0	uint16_t t7_02 = s->slice[7] ^ ROT(s->slice[7], 2);
342		/* And then update s += {04} * t?_02 */
343	0	s->slice[0] ^= t6_02;
344	0	s->slice[1] ^= t6_02 ^ t7_02;
345	0	s->slice[2] ^= t0_02 ^ t7_02;
346	0	s->slice[3] ^= t1_02 ^ t6_02;
347	0	s->slice[4] ^= t2_02 ^ t6_02 ^ t7_02;
348	0	s->slice[5] ^= t3_02 ^ t7_02;
349	0	s->slice[6] ^= t4_02;
350	0	s->slice[7] ^= t5_02;
351	0	}
352	0	}
353
354	0	static void AddRoundKey(AES_state* s, const AES_state* round) {
355	0	int b;
356	0	for (b = 0; b < 8; b++) {
357	0	s->slice[b] ^= round->slice[b];
358	0	}
359	0	}
360
361		/** column_0(s) = column_c(a) */
362	0	static void GetOneColumn(AES_state* s, const AES_state* a, int c) {
363	0	int b;
364	0	for (b = 0; b < 8; b++) {
365	0	s->slice[b] = (a->slice[b] >> c) & 0x1111;
366	0	}
367	0	}
368
369		/** column_c1(r) \|= (column_0(s) ^= column_c2(a)) */
370	0	static void KeySetupColumnMix(AES_state* s, AES_state* r, const AES_state* a, int c1, int c2) {
371	0	int b;
372	0	for (b = 0; b < 8; b++) {
373	0	r->slice[b] \|= ((s->slice[b] ^= ((a->slice[b] >> c2) & 0x1111)) & 0x1111) << c1;
374	0	}
375	0	}
376
377		/** Rotate the rows in s one position upwards, and xor in r */
378	0	static void KeySetupTransform(AES_state* s, const AES_state* r) {
379	0	int b;
380	0	for (b = 0; b < 8; b++) {
381	0	s->slice[b] = ((s->slice[b] >> 4) \| (s->slice[b] << 12)) ^ r->slice[b];
382	0	}
383	0	}
384
385		/* Multiply the cells in s by x, as polynomials over GF(2) mod x^8 + x^4 + x^3 + x + 1 */
386	0	static void MultX(AES_state* s) {
387	0	uint16_t top = s->slice[7];
388	0	s->slice[7] = s->slice[6];
389	0	s->slice[6] = s->slice[5];
390	0	s->slice[5] = s->slice[4];
391	0	s->slice[4] = s->slice[3] ^ top;
392	0	s->slice[3] = s->slice[2] ^ top;
393	0	s->slice[2] = s->slice[1];
394	0	s->slice[1] = s->slice[0] ^ top;
395	0	s->slice[0] = top;
396	0	}
397
398		/** Expand the cipher key into the key schedule.
399		*
400		* state must be a pointer to an array of size nrounds + 1.
401		* key must be a pointer to 4 * nkeywords bytes.
402		*
403		* AES128 uses nkeywords = 4, nrounds = 10
404		* AES192 uses nkeywords = 6, nrounds = 12
405		* AES256 uses nkeywords = 8, nrounds = 14
406		*/
407		static void AES_setup(AES_state* rounds, const uint8_t* key, int nkeywords, int nrounds)
408	0	{
409	0	int i;
410
411		/* The one-byte round constant */
412	0	AES_state rcon = {{1,0,0,0,0,0,0,0}};
413		/* The number of the word being generated, modulo nkeywords */
414	0	int pos = 0;
415		/* The column representing the word currently being processed */
416	0	AES_state column;
417
418	0	for (i = 0; i < nrounds + 1; i++) {
419	0	int b;
420	0	for (b = 0; b < 8; b++) {
421	0	rounds[i].slice[b] = 0;
422	0	}
423	0	}
424
425		/* The first nkeywords round columns are just taken from the key directly. */
426	0	for (i = 0; i < nkeywords; i++) {
427	0	int r;
428	0	for (r = 0; r < 4; r++) {
429	0	LoadByte(&rounds[i >> 2], *(key++), r, i & 3);
430	0	}
431	0	}
432
433	0	GetOneColumn(&column, &rounds[(nkeywords - 1) >> 2], (nkeywords - 1) & 3);
434
435	0	for (i = nkeywords; i < 4 * (nrounds + 1); i++) {
436		/* Transform column */
437	0	if (pos == 0) {
438	0	SubBytes(&column, 0);
439	0	KeySetupTransform(&column, &rcon);
440	0	MultX(&rcon);
441	0	} else if (nkeywords > 6 && pos == 4) {
442	0	SubBytes(&column, 0);
443	0	}
444	0	if (++pos == nkeywords) pos = 0;
445	0	KeySetupColumnMix(&column, &rounds[i >> 2], &rounds[(i - nkeywords) >> 2], i & 3, (i - nkeywords) & 3);
446	0	}
447	0	}
448
449	0	static void AES_encrypt(const AES_state* rounds, int nrounds, unsigned char* cipher16, const unsigned char* plain16) {
450	0	AES_state s = {{0}};
451	0	int round;
452
453	0	LoadBytes(&s, plain16);
454	0	AddRoundKey(&s, rounds++);
455
456	0	for (round = 1; round < nrounds; round++) {
457	0	SubBytes(&s, 0);
458	0	ShiftRows(&s);
459	0	MixColumns(&s, 0);
460	0	AddRoundKey(&s, rounds++);
461	0	}
462
463	0	SubBytes(&s, 0);
464	0	ShiftRows(&s);
465	0	AddRoundKey(&s, rounds);
466
467	0	SaveBytes(cipher16, &s);
468	0	}
469
470	0	static void AES_decrypt(const AES_state* rounds, int nrounds, unsigned char* plain16, const unsigned char* cipher16) {
471		/* Most AES decryption implementations use the alternate scheme
472		* (the Equivalent Inverse Cipher), which allows for more code reuse between
473		* the encryption and decryption code, but requires separate setup for both.
474		*/
475	0	AES_state s = {{0}};
476	0	int round;
477
478	0	rounds += nrounds;
479
480	0	LoadBytes(&s, cipher16);
481	0	AddRoundKey(&s, rounds--);
482
483	0	for (round = 1; round < nrounds; round++) {
484	0	InvShiftRows(&s);
485	0	SubBytes(&s, 1);
486	0	AddRoundKey(&s, rounds--);
487	0	MixColumns(&s, 1);
488	0	}
489
490	0	InvShiftRows(&s);
491	0	SubBytes(&s, 1);
492	0	AddRoundKey(&s, rounds);
493
494	0	SaveBytes(plain16, &s);
495	0	}
496
497	0	void AES128_init(AES128_ctx* ctx, const unsigned char* key16) {
498	0	AES_setup(ctx->rk, key16, 4, 10);
499	0	}
500
501	0	void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
502	0	while (blocks--) {
503	0	AES_encrypt(ctx->rk, 10, cipher16, plain16);
504	0	cipher16 += 16;
505	0	plain16 += 16;
506	0	}
507	0	}
508
509	0	void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
510	0	while (blocks--) {
511	0	AES_decrypt(ctx->rk, 10, plain16, cipher16);
512	0	cipher16 += 16;
513	0	plain16 += 16;
514	0	}
515	0	}
516
517	0	void AES192_init(AES192_ctx* ctx, const unsigned char* key24) {
518	0	AES_setup(ctx->rk, key24, 6, 12);
519	0	}
520
521	0	void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
522	0	while (blocks--) {
523	0	AES_encrypt(ctx->rk, 12, cipher16, plain16);
524	0	cipher16 += 16;
525	0	plain16 += 16;
526	0	}
527
528	0	}
529
530	0	void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
531	0	while (blocks--) {
532	0	AES_decrypt(ctx->rk, 12, plain16, cipher16);
533	0	cipher16 += 16;
534	0	plain16 += 16;
535	0	}
536	0	}
537
538	0	void AES256_init(AES256_ctx* ctx, const unsigned char* key32) {
539	0	AES_setup(ctx->rk, key32, 8, 14);
540	0	}
541
542	0	void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
543	0	while (blocks--) {
544	0	AES_encrypt(ctx->rk, 14, cipher16, plain16);
545	0	cipher16 += 16;
546	0	plain16 += 16;
547	0	}
548	0	}
549
550	0	void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
551	0	while (blocks--) {
552	0	AES_decrypt(ctx->rk, 14, plain16, cipher16);
553	0	cipher16 += 16;
554	0	plain16 += 16;
555	0	}
556	0	}