yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. |
| 3 | * |
| 4 | * Licensed under the OpenSSL license (the "License"). You may not use |
| 5 | * this file except in compliance with the License. You can obtain a copy |
| 6 | * in the file LICENSE in the source distribution or at |
| 7 | * https://www.openssl.org/source/license.html |
| 8 | */ |
| 9 | |
| 10 | #include "internal/cryptlib.h" |
| 11 | |
| 12 | #include <openssl/aes.h> |
| 13 | #include "aes_local.h" |
| 14 | |
| 15 | /* XXX: probably some better way to do this */ |
| 16 | #if defined(__i386__) || defined(__x86_64__) |
| 17 | # define UNALIGNED_MEMOPS_ARE_FAST 1 |
| 18 | #else |
| 19 | # define UNALIGNED_MEMOPS_ARE_FAST 0 |
| 20 | #endif |
| 21 | |
| 22 | #define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long)) |
| 23 | typedef struct { |
| 24 | unsigned long data[N_WORDS]; |
| 25 | #if defined(__GNUC__) && UNALIGNED_MEMOPS_ARE_FAST |
| 26 | } aes_block_t __attribute((__aligned__(1))); |
| 27 | #else |
| 28 | } aes_block_t; |
| 29 | #endif |
| 30 | |
| 31 | #if UNALIGNED_MEMOPS_ARE_FAST |
| 32 | # define load_block(d, s) (d) = *(const aes_block_t *)(s) |
| 33 | # define store_block(d, s) *(aes_block_t *)(d) = (s) |
| 34 | #else |
| 35 | # define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE) |
| 36 | # define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE) |
| 37 | #endif |
| 38 | |
| 39 | /* N.B. The IV for this mode is _twice_ the block size */ |
| 40 | |
| 41 | void AES_ige_encrypt(const unsigned char *in, unsigned char *out, |
| 42 | size_t length, const AES_KEY *key, |
| 43 | unsigned char *ivec, const int enc) |
| 44 | { |
| 45 | size_t n; |
| 46 | size_t len = length; |
| 47 | |
| 48 | if (length == 0) |
| 49 | return; |
| 50 | |
| 51 | OPENSSL_assert(in && out && key && ivec); |
| 52 | OPENSSL_assert((AES_ENCRYPT == enc) || (AES_DECRYPT == enc)); |
| 53 | OPENSSL_assert((length % AES_BLOCK_SIZE) == 0); |
| 54 | |
| 55 | len = length / AES_BLOCK_SIZE; |
| 56 | |
| 57 | if (AES_ENCRYPT == enc) { |
| 58 | if (in != out && |
| 59 | (UNALIGNED_MEMOPS_ARE_FAST |
| 60 | || ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(long) == |
| 61 | 0)) { |
| 62 | aes_block_t *ivp = (aes_block_t *) ivec; |
| 63 | aes_block_t *iv2p = (aes_block_t *) (ivec + AES_BLOCK_SIZE); |
| 64 | |
| 65 | while (len) { |
| 66 | aes_block_t *inp = (aes_block_t *) in; |
| 67 | aes_block_t *outp = (aes_block_t *) out; |
| 68 | |
| 69 | for (n = 0; n < N_WORDS; ++n) |
| 70 | outp->data[n] = inp->data[n] ^ ivp->data[n]; |
| 71 | AES_encrypt((unsigned char *)outp->data, |
| 72 | (unsigned char *)outp->data, key); |
| 73 | for (n = 0; n < N_WORDS; ++n) |
| 74 | outp->data[n] ^= iv2p->data[n]; |
| 75 | ivp = outp; |
| 76 | iv2p = inp; |
| 77 | --len; |
| 78 | in += AES_BLOCK_SIZE; |
| 79 | out += AES_BLOCK_SIZE; |
| 80 | } |
| 81 | memcpy(ivec, ivp->data, AES_BLOCK_SIZE); |
| 82 | memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE); |
| 83 | } else { |
| 84 | aes_block_t tmp, tmp2; |
| 85 | aes_block_t iv; |
| 86 | aes_block_t iv2; |
| 87 | |
| 88 | load_block(iv, ivec); |
| 89 | load_block(iv2, ivec + AES_BLOCK_SIZE); |
| 90 | |
| 91 | while (len) { |
| 92 | load_block(tmp, in); |
| 93 | for (n = 0; n < N_WORDS; ++n) |
| 94 | tmp2.data[n] = tmp.data[n] ^ iv.data[n]; |
| 95 | AES_encrypt((unsigned char *)tmp2.data, |
| 96 | (unsigned char *)tmp2.data, key); |
| 97 | for (n = 0; n < N_WORDS; ++n) |
| 98 | tmp2.data[n] ^= iv2.data[n]; |
| 99 | store_block(out, tmp2); |
| 100 | iv = tmp2; |
| 101 | iv2 = tmp; |
| 102 | --len; |
| 103 | in += AES_BLOCK_SIZE; |
| 104 | out += AES_BLOCK_SIZE; |
| 105 | } |
| 106 | memcpy(ivec, iv.data, AES_BLOCK_SIZE); |
| 107 | memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE); |
| 108 | } |
| 109 | } else { |
| 110 | if (in != out && |
| 111 | (UNALIGNED_MEMOPS_ARE_FAST |
| 112 | || ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(long) == |
| 113 | 0)) { |
| 114 | aes_block_t *ivp = (aes_block_t *) ivec; |
| 115 | aes_block_t *iv2p = (aes_block_t *) (ivec + AES_BLOCK_SIZE); |
| 116 | |
| 117 | while (len) { |
| 118 | aes_block_t tmp; |
| 119 | aes_block_t *inp = (aes_block_t *) in; |
| 120 | aes_block_t *outp = (aes_block_t *) out; |
| 121 | |
| 122 | for (n = 0; n < N_WORDS; ++n) |
| 123 | tmp.data[n] = inp->data[n] ^ iv2p->data[n]; |
| 124 | AES_decrypt((unsigned char *)tmp.data, |
| 125 | (unsigned char *)outp->data, key); |
| 126 | for (n = 0; n < N_WORDS; ++n) |
| 127 | outp->data[n] ^= ivp->data[n]; |
| 128 | ivp = inp; |
| 129 | iv2p = outp; |
| 130 | --len; |
| 131 | in += AES_BLOCK_SIZE; |
| 132 | out += AES_BLOCK_SIZE; |
| 133 | } |
| 134 | memcpy(ivec, ivp->data, AES_BLOCK_SIZE); |
| 135 | memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE); |
| 136 | } else { |
| 137 | aes_block_t tmp, tmp2; |
| 138 | aes_block_t iv; |
| 139 | aes_block_t iv2; |
| 140 | |
| 141 | load_block(iv, ivec); |
| 142 | load_block(iv2, ivec + AES_BLOCK_SIZE); |
| 143 | |
| 144 | while (len) { |
| 145 | load_block(tmp, in); |
| 146 | tmp2 = tmp; |
| 147 | for (n = 0; n < N_WORDS; ++n) |
| 148 | tmp.data[n] ^= iv2.data[n]; |
| 149 | AES_decrypt((unsigned char *)tmp.data, |
| 150 | (unsigned char *)tmp.data, key); |
| 151 | for (n = 0; n < N_WORDS; ++n) |
| 152 | tmp.data[n] ^= iv.data[n]; |
| 153 | store_block(out, tmp); |
| 154 | iv = tmp2; |
| 155 | iv2 = tmp; |
| 156 | --len; |
| 157 | in += AES_BLOCK_SIZE; |
| 158 | out += AES_BLOCK_SIZE; |
| 159 | } |
| 160 | memcpy(ivec, iv.data, AES_BLOCK_SIZE); |
| 161 | memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE); |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /* |
| 167 | * Note that its effectively impossible to do biIGE in anything other |
| 168 | * than a single pass, so no provision is made for chaining. |
| 169 | */ |
| 170 | |
| 171 | /* N.B. The IV for this mode is _four times_ the block size */ |
| 172 | |
| 173 | void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out, |
| 174 | size_t length, const AES_KEY *key, |
| 175 | const AES_KEY *key2, const unsigned char *ivec, |
| 176 | const int enc) |
| 177 | { |
| 178 | size_t n; |
| 179 | size_t len = length; |
| 180 | unsigned char tmp[AES_BLOCK_SIZE]; |
| 181 | unsigned char tmp2[AES_BLOCK_SIZE]; |
| 182 | unsigned char tmp3[AES_BLOCK_SIZE]; |
| 183 | unsigned char prev[AES_BLOCK_SIZE]; |
| 184 | const unsigned char *iv; |
| 185 | const unsigned char *iv2; |
| 186 | |
| 187 | OPENSSL_assert(in && out && key && ivec); |
| 188 | OPENSSL_assert((AES_ENCRYPT == enc) || (AES_DECRYPT == enc)); |
| 189 | OPENSSL_assert((length % AES_BLOCK_SIZE) == 0); |
| 190 | |
| 191 | if (AES_ENCRYPT == enc) { |
| 192 | /* |
| 193 | * XXX: Do a separate case for when in != out (strictly should check |
| 194 | * for overlap, too) |
| 195 | */ |
| 196 | |
| 197 | /* First the forward pass */ |
| 198 | iv = ivec; |
| 199 | iv2 = ivec + AES_BLOCK_SIZE; |
| 200 | while (len >= AES_BLOCK_SIZE) { |
| 201 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 202 | out[n] = in[n] ^ iv[n]; |
| 203 | AES_encrypt(out, out, key); |
| 204 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 205 | out[n] ^= iv2[n]; |
| 206 | iv = out; |
| 207 | memcpy(prev, in, AES_BLOCK_SIZE); |
| 208 | iv2 = prev; |
| 209 | len -= AES_BLOCK_SIZE; |
| 210 | in += AES_BLOCK_SIZE; |
| 211 | out += AES_BLOCK_SIZE; |
| 212 | } |
| 213 | |
| 214 | /* And now backwards */ |
| 215 | iv = ivec + AES_BLOCK_SIZE * 2; |
| 216 | iv2 = ivec + AES_BLOCK_SIZE * 3; |
| 217 | len = length; |
| 218 | while (len >= AES_BLOCK_SIZE) { |
| 219 | out -= AES_BLOCK_SIZE; |
| 220 | /* |
| 221 | * XXX: reduce copies by alternating between buffers |
| 222 | */ |
| 223 | memcpy(tmp, out, AES_BLOCK_SIZE); |
| 224 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 225 | out[n] ^= iv[n]; |
| 226 | /* |
| 227 | * hexdump(stdout, "out ^ iv", out, AES_BLOCK_SIZE); |
| 228 | */ |
| 229 | AES_encrypt(out, out, key); |
| 230 | /* |
| 231 | * hexdump(stdout,"enc", out, AES_BLOCK_SIZE); |
| 232 | */ |
| 233 | /* |
| 234 | * hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); |
| 235 | */ |
| 236 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 237 | out[n] ^= iv2[n]; |
| 238 | /* |
| 239 | * hexdump(stdout,"out", out, AES_BLOCK_SIZE); |
| 240 | */ |
| 241 | iv = out; |
| 242 | memcpy(prev, tmp, AES_BLOCK_SIZE); |
| 243 | iv2 = prev; |
| 244 | len -= AES_BLOCK_SIZE; |
| 245 | } |
| 246 | } else { |
| 247 | /* First backwards */ |
| 248 | iv = ivec + AES_BLOCK_SIZE * 2; |
| 249 | iv2 = ivec + AES_BLOCK_SIZE * 3; |
| 250 | in += length; |
| 251 | out += length; |
| 252 | while (len >= AES_BLOCK_SIZE) { |
| 253 | in -= AES_BLOCK_SIZE; |
| 254 | out -= AES_BLOCK_SIZE; |
| 255 | memcpy(tmp, in, AES_BLOCK_SIZE); |
| 256 | memcpy(tmp2, in, AES_BLOCK_SIZE); |
| 257 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 258 | tmp[n] ^= iv2[n]; |
| 259 | AES_decrypt(tmp, out, key); |
| 260 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 261 | out[n] ^= iv[n]; |
| 262 | memcpy(tmp3, tmp2, AES_BLOCK_SIZE); |
| 263 | iv = tmp3; |
| 264 | iv2 = out; |
| 265 | len -= AES_BLOCK_SIZE; |
| 266 | } |
| 267 | |
| 268 | /* And now forwards */ |
| 269 | iv = ivec; |
| 270 | iv2 = ivec + AES_BLOCK_SIZE; |
| 271 | len = length; |
| 272 | while (len >= AES_BLOCK_SIZE) { |
| 273 | memcpy(tmp, out, AES_BLOCK_SIZE); |
| 274 | memcpy(tmp2, out, AES_BLOCK_SIZE); |
| 275 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 276 | tmp[n] ^= iv2[n]; |
| 277 | AES_decrypt(tmp, out, key); |
| 278 | for (n = 0; n < AES_BLOCK_SIZE; ++n) |
| 279 | out[n] ^= iv[n]; |
| 280 | memcpy(tmp3, tmp2, AES_BLOCK_SIZE); |
| 281 | iv = tmp3; |
| 282 | iv2 = out; |
| 283 | len -= AES_BLOCK_SIZE; |
| 284 | in += AES_BLOCK_SIZE; |
| 285 | out += AES_BLOCK_SIZE; |
| 286 | } |
| 287 | } |
| 288 | } |