| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * AES-NI + SSE2 implementation of AEGIS-128L | 
|  | 3 | * | 
|  | 4 | * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> | 
|  | 5 | * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. | 
|  | 6 | * | 
|  | 7 | * This program is free software; you can redistribute it and/or modify it | 
|  | 8 | * under the terms of the GNU General Public License version 2 as published | 
|  | 9 | * by the Free Software Foundation. | 
|  | 10 | */ | 
|  | 11 |  | 
|  | 12 | #include <linux/linkage.h> | 
|  | 13 | #include <asm/frame.h> | 
|  | 14 |  | 
|  | 15 | #define STATE0	%xmm0 | 
|  | 16 | #define STATE1	%xmm1 | 
|  | 17 | #define STATE2	%xmm2 | 
|  | 18 | #define STATE3	%xmm3 | 
|  | 19 | #define STATE4	%xmm4 | 
|  | 20 | #define STATE5	%xmm5 | 
|  | 21 | #define MSG	%xmm6 | 
|  | 22 | #define T0	%xmm7 | 
|  | 23 | #define T1	%xmm8 | 
|  | 24 | #define T2	%xmm9 | 
|  | 25 | #define T3	%xmm10 | 
|  | 26 |  | 
|  | 27 | #define STATEP	%rdi | 
|  | 28 | #define LEN	%rsi | 
|  | 29 | #define SRC	%rdx | 
|  | 30 | #define DST	%rcx | 
|  | 31 |  | 
|  | 32 | .section .rodata.cst16.aegis256_const, "aM", @progbits, 32 | 
|  | 33 | .align 16 | 
|  | 34 | .Laegis256_const_0: | 
|  | 35 | .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d | 
|  | 36 | .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 | 
|  | 37 | .Laegis256_const_1: | 
|  | 38 | .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 | 
|  | 39 | .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd | 
|  | 40 |  | 
|  | 41 | .section .rodata.cst16.aegis256_counter, "aM", @progbits, 16 | 
|  | 42 | .align 16 | 
|  | 43 | .Laegis256_counter: | 
|  | 44 | .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 | 
|  | 45 | .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f | 
|  | 46 |  | 
|  | 47 | .text | 
|  | 48 |  | 
|  | 49 | /* | 
|  | 50 | * __load_partial: internal ABI | 
|  | 51 | * input: | 
|  | 52 | *   LEN - bytes | 
|  | 53 | *   SRC - src | 
|  | 54 | * output: | 
|  | 55 | *   MSG  - message block | 
|  | 56 | * changed: | 
|  | 57 | *   T0 | 
|  | 58 | *   %r8 | 
|  | 59 | *   %r9 | 
|  | 60 | */ | 
|  | 61 | __load_partial: | 
|  | 62 | xor %r9d, %r9d | 
|  | 63 | pxor MSG, MSG | 
|  | 64 |  | 
|  | 65 | mov LEN, %r8 | 
|  | 66 | and $0x1, %r8 | 
|  | 67 | jz .Lld_partial_1 | 
|  | 68 |  | 
|  | 69 | mov LEN, %r8 | 
|  | 70 | and $0x1E, %r8 | 
|  | 71 | add SRC, %r8 | 
|  | 72 | mov (%r8), %r9b | 
|  | 73 |  | 
|  | 74 | .Lld_partial_1: | 
|  | 75 | mov LEN, %r8 | 
|  | 76 | and $0x2, %r8 | 
|  | 77 | jz .Lld_partial_2 | 
|  | 78 |  | 
|  | 79 | mov LEN, %r8 | 
|  | 80 | and $0x1C, %r8 | 
|  | 81 | add SRC, %r8 | 
|  | 82 | shl $0x10, %r9 | 
|  | 83 | mov (%r8), %r9w | 
|  | 84 |  | 
|  | 85 | .Lld_partial_2: | 
|  | 86 | mov LEN, %r8 | 
|  | 87 | and $0x4, %r8 | 
|  | 88 | jz .Lld_partial_4 | 
|  | 89 |  | 
|  | 90 | mov LEN, %r8 | 
|  | 91 | and $0x18, %r8 | 
|  | 92 | add SRC, %r8 | 
|  | 93 | shl $32, %r9 | 
|  | 94 | mov (%r8), %r8d | 
|  | 95 | xor %r8, %r9 | 
|  | 96 |  | 
|  | 97 | .Lld_partial_4: | 
|  | 98 | movq %r9, MSG | 
|  | 99 |  | 
|  | 100 | mov LEN, %r8 | 
|  | 101 | and $0x8, %r8 | 
|  | 102 | jz .Lld_partial_8 | 
|  | 103 |  | 
|  | 104 | mov LEN, %r8 | 
|  | 105 | and $0x10, %r8 | 
|  | 106 | add SRC, %r8 | 
|  | 107 | pslldq $8, MSG | 
|  | 108 | movq (%r8), T0 | 
|  | 109 | pxor T0, MSG | 
|  | 110 |  | 
|  | 111 | .Lld_partial_8: | 
|  | 112 | ret | 
|  | 113 | ENDPROC(__load_partial) | 
|  | 114 |  | 
|  | 115 | /* | 
|  | 116 | * __store_partial: internal ABI | 
|  | 117 | * input: | 
|  | 118 | *   LEN - bytes | 
|  | 119 | *   DST - dst | 
|  | 120 | * output: | 
|  | 121 | *   T0   - message block | 
|  | 122 | * changed: | 
|  | 123 | *   %r8 | 
|  | 124 | *   %r9 | 
|  | 125 | *   %r10 | 
|  | 126 | */ | 
|  | 127 | __store_partial: | 
|  | 128 | mov LEN, %r8 | 
|  | 129 | mov DST, %r9 | 
|  | 130 |  | 
|  | 131 | movq T0, %r10 | 
|  | 132 |  | 
|  | 133 | cmp $8, %r8 | 
|  | 134 | jl .Lst_partial_8 | 
|  | 135 |  | 
|  | 136 | mov %r10, (%r9) | 
|  | 137 | psrldq $8, T0 | 
|  | 138 | movq T0, %r10 | 
|  | 139 |  | 
|  | 140 | sub $8, %r8 | 
|  | 141 | add $8, %r9 | 
|  | 142 |  | 
|  | 143 | .Lst_partial_8: | 
|  | 144 | cmp $4, %r8 | 
|  | 145 | jl .Lst_partial_4 | 
|  | 146 |  | 
|  | 147 | mov %r10d, (%r9) | 
|  | 148 | shr $32, %r10 | 
|  | 149 |  | 
|  | 150 | sub $4, %r8 | 
|  | 151 | add $4, %r9 | 
|  | 152 |  | 
|  | 153 | .Lst_partial_4: | 
|  | 154 | cmp $2, %r8 | 
|  | 155 | jl .Lst_partial_2 | 
|  | 156 |  | 
|  | 157 | mov %r10w, (%r9) | 
|  | 158 | shr $0x10, %r10 | 
|  | 159 |  | 
|  | 160 | sub $2, %r8 | 
|  | 161 | add $2, %r9 | 
|  | 162 |  | 
|  | 163 | .Lst_partial_2: | 
|  | 164 | cmp $1, %r8 | 
|  | 165 | jl .Lst_partial_1 | 
|  | 166 |  | 
|  | 167 | mov %r10b, (%r9) | 
|  | 168 |  | 
|  | 169 | .Lst_partial_1: | 
|  | 170 | ret | 
|  | 171 | ENDPROC(__store_partial) | 
|  | 172 |  | 
|  | 173 | .macro update | 
|  | 174 | movdqa STATE5, T0 | 
|  | 175 | aesenc STATE0, STATE5 | 
|  | 176 | aesenc STATE1, STATE0 | 
|  | 177 | aesenc STATE2, STATE1 | 
|  | 178 | aesenc STATE3, STATE2 | 
|  | 179 | aesenc STATE4, STATE3 | 
|  | 180 | aesenc T0,     STATE4 | 
|  | 181 | .endm | 
|  | 182 |  | 
|  | 183 | .macro update0 m | 
|  | 184 | update | 
|  | 185 | pxor \m, STATE5 | 
|  | 186 | .endm | 
|  | 187 |  | 
|  | 188 | .macro update1 m | 
|  | 189 | update | 
|  | 190 | pxor \m, STATE4 | 
|  | 191 | .endm | 
|  | 192 |  | 
|  | 193 | .macro update2 m | 
|  | 194 | update | 
|  | 195 | pxor \m, STATE3 | 
|  | 196 | .endm | 
|  | 197 |  | 
|  | 198 | .macro update3 m | 
|  | 199 | update | 
|  | 200 | pxor \m, STATE2 | 
|  | 201 | .endm | 
|  | 202 |  | 
|  | 203 | .macro update4 m | 
|  | 204 | update | 
|  | 205 | pxor \m, STATE1 | 
|  | 206 | .endm | 
|  | 207 |  | 
|  | 208 | .macro update5 m | 
|  | 209 | update | 
|  | 210 | pxor \m, STATE0 | 
|  | 211 | .endm | 
|  | 212 |  | 
|  | 213 | .macro state_load | 
|  | 214 | movdqu 0x00(STATEP), STATE0 | 
|  | 215 | movdqu 0x10(STATEP), STATE1 | 
|  | 216 | movdqu 0x20(STATEP), STATE2 | 
|  | 217 | movdqu 0x30(STATEP), STATE3 | 
|  | 218 | movdqu 0x40(STATEP), STATE4 | 
|  | 219 | movdqu 0x50(STATEP), STATE5 | 
|  | 220 | .endm | 
|  | 221 |  | 
|  | 222 | .macro state_store s0 s1 s2 s3 s4 s5 | 
|  | 223 | movdqu \s5, 0x00(STATEP) | 
|  | 224 | movdqu \s0, 0x10(STATEP) | 
|  | 225 | movdqu \s1, 0x20(STATEP) | 
|  | 226 | movdqu \s2, 0x30(STATEP) | 
|  | 227 | movdqu \s3, 0x40(STATEP) | 
|  | 228 | movdqu \s4, 0x50(STATEP) | 
|  | 229 | .endm | 
|  | 230 |  | 
|  | 231 | .macro state_store0 | 
|  | 232 | state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 | 
|  | 233 | .endm | 
|  | 234 |  | 
|  | 235 | .macro state_store1 | 
|  | 236 | state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 | 
|  | 237 | .endm | 
|  | 238 |  | 
|  | 239 | .macro state_store2 | 
|  | 240 | state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 | 
|  | 241 | .endm | 
|  | 242 |  | 
|  | 243 | .macro state_store3 | 
|  | 244 | state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 | 
|  | 245 | .endm | 
|  | 246 |  | 
|  | 247 | .macro state_store4 | 
|  | 248 | state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 | 
|  | 249 | .endm | 
|  | 250 |  | 
|  | 251 | .macro state_store5 | 
|  | 252 | state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 | 
|  | 253 | .endm | 
|  | 254 |  | 
|  | 255 | /* | 
|  | 256 | * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv); | 
|  | 257 | */ | 
|  | 258 | ENTRY(crypto_aegis256_aesni_init) | 
|  | 259 | FRAME_BEGIN | 
|  | 260 |  | 
|  | 261 | /* load key: */ | 
|  | 262 | movdqa 0x00(%rsi), MSG | 
|  | 263 | movdqa 0x10(%rsi), T1 | 
|  | 264 | movdqa MSG, STATE4 | 
|  | 265 | movdqa T1, STATE5 | 
|  | 266 |  | 
|  | 267 | /* load IV: */ | 
|  | 268 | movdqu 0x00(%rdx), T2 | 
|  | 269 | movdqu 0x10(%rdx), T3 | 
|  | 270 | pxor MSG, T2 | 
|  | 271 | pxor T1, T3 | 
|  | 272 | movdqa T2, STATE0 | 
|  | 273 | movdqa T3, STATE1 | 
|  | 274 |  | 
|  | 275 | /* load the constants: */ | 
|  | 276 | movdqa .Laegis256_const_0, STATE3 | 
|  | 277 | movdqa .Laegis256_const_1, STATE2 | 
|  | 278 | pxor STATE3, STATE4 | 
|  | 279 | pxor STATE2, STATE5 | 
|  | 280 |  | 
|  | 281 | /* update 10 times with IV and KEY: */ | 
|  | 282 | update0 MSG | 
|  | 283 | update1 T1 | 
|  | 284 | update2 T2 | 
|  | 285 | update3 T3 | 
|  | 286 | update4 MSG | 
|  | 287 | update5 T1 | 
|  | 288 | update0 T2 | 
|  | 289 | update1 T3 | 
|  | 290 | update2 MSG | 
|  | 291 | update3 T1 | 
|  | 292 | update4 T2 | 
|  | 293 | update5 T3 | 
|  | 294 | update0 MSG | 
|  | 295 | update1 T1 | 
|  | 296 | update2 T2 | 
|  | 297 | update3 T3 | 
|  | 298 |  | 
|  | 299 | state_store3 | 
|  | 300 |  | 
|  | 301 | FRAME_END | 
|  | 302 | ret | 
|  | 303 | ENDPROC(crypto_aegis256_aesni_init) | 
|  | 304 |  | 
|  | 305 | .macro ad_block a i | 
|  | 306 | movdq\a (\i * 0x10)(SRC), MSG | 
|  | 307 | update\i MSG | 
|  | 308 | sub $0x10, LEN | 
|  | 309 | cmp $0x10, LEN | 
|  | 310 | jl .Lad_out_\i | 
|  | 311 | .endm | 
|  | 312 |  | 
|  | 313 | /* | 
|  | 314 | * void crypto_aegis256_aesni_ad(void *state, unsigned int length, | 
|  | 315 | *                               const void *data); | 
|  | 316 | */ | 
|  | 317 | ENTRY(crypto_aegis256_aesni_ad) | 
|  | 318 | FRAME_BEGIN | 
|  | 319 |  | 
|  | 320 | cmp $0x10, LEN | 
|  | 321 | jb .Lad_out | 
|  | 322 |  | 
|  | 323 | state_load | 
|  | 324 |  | 
|  | 325 | mov  SRC, %r8 | 
|  | 326 | and $0xf, %r8 | 
|  | 327 | jnz .Lad_u_loop | 
|  | 328 |  | 
|  | 329 | .align 8 | 
|  | 330 | .Lad_a_loop: | 
|  | 331 | ad_block a 0 | 
|  | 332 | ad_block a 1 | 
|  | 333 | ad_block a 2 | 
|  | 334 | ad_block a 3 | 
|  | 335 | ad_block a 4 | 
|  | 336 | ad_block a 5 | 
|  | 337 |  | 
|  | 338 | add $0x60, SRC | 
|  | 339 | jmp .Lad_a_loop | 
|  | 340 |  | 
|  | 341 | .align 8 | 
|  | 342 | .Lad_u_loop: | 
|  | 343 | ad_block u 0 | 
|  | 344 | ad_block u 1 | 
|  | 345 | ad_block u 2 | 
|  | 346 | ad_block u 3 | 
|  | 347 | ad_block u 4 | 
|  | 348 | ad_block u 5 | 
|  | 349 |  | 
|  | 350 | add $0x60, SRC | 
|  | 351 | jmp .Lad_u_loop | 
|  | 352 |  | 
|  | 353 | .Lad_out_0: | 
|  | 354 | state_store0 | 
|  | 355 | FRAME_END | 
|  | 356 | ret | 
|  | 357 |  | 
|  | 358 | .Lad_out_1: | 
|  | 359 | state_store1 | 
|  | 360 | FRAME_END | 
|  | 361 | ret | 
|  | 362 |  | 
|  | 363 | .Lad_out_2: | 
|  | 364 | state_store2 | 
|  | 365 | FRAME_END | 
|  | 366 | ret | 
|  | 367 |  | 
|  | 368 | .Lad_out_3: | 
|  | 369 | state_store3 | 
|  | 370 | FRAME_END | 
|  | 371 | ret | 
|  | 372 |  | 
|  | 373 | .Lad_out_4: | 
|  | 374 | state_store4 | 
|  | 375 | FRAME_END | 
|  | 376 | ret | 
|  | 377 |  | 
|  | 378 | .Lad_out_5: | 
|  | 379 | state_store5 | 
|  | 380 | FRAME_END | 
|  | 381 | ret | 
|  | 382 |  | 
|  | 383 | .Lad_out: | 
|  | 384 | FRAME_END | 
|  | 385 | ret | 
|  | 386 | ENDPROC(crypto_aegis256_aesni_ad) | 
|  | 387 |  | 
|  | 388 | .macro crypt m s0 s1 s2 s3 s4 s5 | 
|  | 389 | pxor \s1, \m | 
|  | 390 | pxor \s4, \m | 
|  | 391 | pxor \s5, \m | 
|  | 392 | movdqa \s2, T3 | 
|  | 393 | pand \s3, T3 | 
|  | 394 | pxor T3, \m | 
|  | 395 | .endm | 
|  | 396 |  | 
|  | 397 | .macro crypt0 m | 
|  | 398 | crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 | 
|  | 399 | .endm | 
|  | 400 |  | 
|  | 401 | .macro crypt1 m | 
|  | 402 | crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 | 
|  | 403 | .endm | 
|  | 404 |  | 
|  | 405 | .macro crypt2 m | 
|  | 406 | crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 | 
|  | 407 | .endm | 
|  | 408 |  | 
|  | 409 | .macro crypt3 m | 
|  | 410 | crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 | 
|  | 411 | .endm | 
|  | 412 |  | 
|  | 413 | .macro crypt4 m | 
|  | 414 | crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 | 
|  | 415 | .endm | 
|  | 416 |  | 
|  | 417 | .macro crypt5 m | 
|  | 418 | crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 | 
|  | 419 | .endm | 
|  | 420 |  | 
|  | 421 | .macro encrypt_block a i | 
|  | 422 | movdq\a (\i * 0x10)(SRC), MSG | 
|  | 423 | movdqa MSG, T0 | 
|  | 424 | crypt\i T0 | 
|  | 425 | movdq\a T0, (\i * 0x10)(DST) | 
|  | 426 |  | 
|  | 427 | update\i MSG | 
|  | 428 |  | 
|  | 429 | sub $0x10, LEN | 
|  | 430 | cmp $0x10, LEN | 
|  | 431 | jl .Lenc_out_\i | 
|  | 432 | .endm | 
|  | 433 |  | 
|  | 434 | .macro decrypt_block a i | 
|  | 435 | movdq\a (\i * 0x10)(SRC), MSG | 
|  | 436 | crypt\i MSG | 
|  | 437 | movdq\a MSG, (\i * 0x10)(DST) | 
|  | 438 |  | 
|  | 439 | update\i MSG | 
|  | 440 |  | 
|  | 441 | sub $0x10, LEN | 
|  | 442 | cmp $0x10, LEN | 
|  | 443 | jl .Ldec_out_\i | 
|  | 444 | .endm | 
|  | 445 |  | 
|  | 446 | /* | 
|  | 447 | * void crypto_aegis256_aesni_enc(void *state, unsigned int length, | 
|  | 448 | *                                const void *src, void *dst); | 
|  | 449 | */ | 
|  | 450 | ENTRY(crypto_aegis256_aesni_enc) | 
|  | 451 | FRAME_BEGIN | 
|  | 452 |  | 
|  | 453 | cmp $0x10, LEN | 
|  | 454 | jb .Lenc_out | 
|  | 455 |  | 
|  | 456 | state_load | 
|  | 457 |  | 
|  | 458 | mov  SRC, %r8 | 
|  | 459 | or   DST, %r8 | 
|  | 460 | and $0xf, %r8 | 
|  | 461 | jnz .Lenc_u_loop | 
|  | 462 |  | 
|  | 463 | .align 8 | 
|  | 464 | .Lenc_a_loop: | 
|  | 465 | encrypt_block a 0 | 
|  | 466 | encrypt_block a 1 | 
|  | 467 | encrypt_block a 2 | 
|  | 468 | encrypt_block a 3 | 
|  | 469 | encrypt_block a 4 | 
|  | 470 | encrypt_block a 5 | 
|  | 471 |  | 
|  | 472 | add $0x60, SRC | 
|  | 473 | add $0x60, DST | 
|  | 474 | jmp .Lenc_a_loop | 
|  | 475 |  | 
|  | 476 | .align 8 | 
|  | 477 | .Lenc_u_loop: | 
|  | 478 | encrypt_block u 0 | 
|  | 479 | encrypt_block u 1 | 
|  | 480 | encrypt_block u 2 | 
|  | 481 | encrypt_block u 3 | 
|  | 482 | encrypt_block u 4 | 
|  | 483 | encrypt_block u 5 | 
|  | 484 |  | 
|  | 485 | add $0x60, SRC | 
|  | 486 | add $0x60, DST | 
|  | 487 | jmp .Lenc_u_loop | 
|  | 488 |  | 
|  | 489 | .Lenc_out_0: | 
|  | 490 | state_store0 | 
|  | 491 | FRAME_END | 
|  | 492 | ret | 
|  | 493 |  | 
|  | 494 | .Lenc_out_1: | 
|  | 495 | state_store1 | 
|  | 496 | FRAME_END | 
|  | 497 | ret | 
|  | 498 |  | 
|  | 499 | .Lenc_out_2: | 
|  | 500 | state_store2 | 
|  | 501 | FRAME_END | 
|  | 502 | ret | 
|  | 503 |  | 
|  | 504 | .Lenc_out_3: | 
|  | 505 | state_store3 | 
|  | 506 | FRAME_END | 
|  | 507 | ret | 
|  | 508 |  | 
|  | 509 | .Lenc_out_4: | 
|  | 510 | state_store4 | 
|  | 511 | FRAME_END | 
|  | 512 | ret | 
|  | 513 |  | 
|  | 514 | .Lenc_out_5: | 
|  | 515 | state_store5 | 
|  | 516 | FRAME_END | 
|  | 517 | ret | 
|  | 518 |  | 
|  | 519 | .Lenc_out: | 
|  | 520 | FRAME_END | 
|  | 521 | ret | 
|  | 522 | ENDPROC(crypto_aegis256_aesni_enc) | 
|  | 523 |  | 
|  | 524 | /* | 
|  | 525 | * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length, | 
|  | 526 | *                                     const void *src, void *dst); | 
|  | 527 | */ | 
|  | 528 | ENTRY(crypto_aegis256_aesni_enc_tail) | 
|  | 529 | FRAME_BEGIN | 
|  | 530 |  | 
|  | 531 | state_load | 
|  | 532 |  | 
|  | 533 | /* encrypt message: */ | 
|  | 534 | call __load_partial | 
|  | 535 |  | 
|  | 536 | movdqa MSG, T0 | 
|  | 537 | crypt0 T0 | 
|  | 538 |  | 
|  | 539 | call __store_partial | 
|  | 540 |  | 
|  | 541 | update0 MSG | 
|  | 542 |  | 
|  | 543 | state_store0 | 
|  | 544 |  | 
|  | 545 | FRAME_END | 
|  | 546 | ret | 
|  | 547 | ENDPROC(crypto_aegis256_aesni_enc_tail) | 
|  | 548 |  | 
|  | 549 | /* | 
|  | 550 | * void crypto_aegis256_aesni_dec(void *state, unsigned int length, | 
|  | 551 | *                                const void *src, void *dst); | 
|  | 552 | */ | 
|  | 553 | ENTRY(crypto_aegis256_aesni_dec) | 
|  | 554 | FRAME_BEGIN | 
|  | 555 |  | 
|  | 556 | cmp $0x10, LEN | 
|  | 557 | jb .Ldec_out | 
|  | 558 |  | 
|  | 559 | state_load | 
|  | 560 |  | 
|  | 561 | mov  SRC, %r8 | 
|  | 562 | or   DST, %r8 | 
|  | 563 | and $0xF, %r8 | 
|  | 564 | jnz .Ldec_u_loop | 
|  | 565 |  | 
|  | 566 | .align 8 | 
|  | 567 | .Ldec_a_loop: | 
|  | 568 | decrypt_block a 0 | 
|  | 569 | decrypt_block a 1 | 
|  | 570 | decrypt_block a 2 | 
|  | 571 | decrypt_block a 3 | 
|  | 572 | decrypt_block a 4 | 
|  | 573 | decrypt_block a 5 | 
|  | 574 |  | 
|  | 575 | add $0x60, SRC | 
|  | 576 | add $0x60, DST | 
|  | 577 | jmp .Ldec_a_loop | 
|  | 578 |  | 
|  | 579 | .align 8 | 
|  | 580 | .Ldec_u_loop: | 
|  | 581 | decrypt_block u 0 | 
|  | 582 | decrypt_block u 1 | 
|  | 583 | decrypt_block u 2 | 
|  | 584 | decrypt_block u 3 | 
|  | 585 | decrypt_block u 4 | 
|  | 586 | decrypt_block u 5 | 
|  | 587 |  | 
|  | 588 | add $0x60, SRC | 
|  | 589 | add $0x60, DST | 
|  | 590 | jmp .Ldec_u_loop | 
|  | 591 |  | 
|  | 592 | .Ldec_out_0: | 
|  | 593 | state_store0 | 
|  | 594 | FRAME_END | 
|  | 595 | ret | 
|  | 596 |  | 
|  | 597 | .Ldec_out_1: | 
|  | 598 | state_store1 | 
|  | 599 | FRAME_END | 
|  | 600 | ret | 
|  | 601 |  | 
|  | 602 | .Ldec_out_2: | 
|  | 603 | state_store2 | 
|  | 604 | FRAME_END | 
|  | 605 | ret | 
|  | 606 |  | 
|  | 607 | .Ldec_out_3: | 
|  | 608 | state_store3 | 
|  | 609 | FRAME_END | 
|  | 610 | ret | 
|  | 611 |  | 
|  | 612 | .Ldec_out_4: | 
|  | 613 | state_store4 | 
|  | 614 | FRAME_END | 
|  | 615 | ret | 
|  | 616 |  | 
|  | 617 | .Ldec_out_5: | 
|  | 618 | state_store5 | 
|  | 619 | FRAME_END | 
|  | 620 | ret | 
|  | 621 |  | 
|  | 622 | .Ldec_out: | 
|  | 623 | FRAME_END | 
|  | 624 | ret | 
|  | 625 | ENDPROC(crypto_aegis256_aesni_dec) | 
|  | 626 |  | 
|  | 627 | /* | 
|  | 628 | * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length, | 
|  | 629 | *                                     const void *src, void *dst); | 
|  | 630 | */ | 
|  | 631 | ENTRY(crypto_aegis256_aesni_dec_tail) | 
|  | 632 | FRAME_BEGIN | 
|  | 633 |  | 
|  | 634 | state_load | 
|  | 635 |  | 
|  | 636 | /* decrypt message: */ | 
|  | 637 | call __load_partial | 
|  | 638 |  | 
|  | 639 | crypt0 MSG | 
|  | 640 |  | 
|  | 641 | movdqa MSG, T0 | 
|  | 642 | call __store_partial | 
|  | 643 |  | 
|  | 644 | /* mask with byte count: */ | 
|  | 645 | movq LEN, T0 | 
|  | 646 | punpcklbw T0, T0 | 
|  | 647 | punpcklbw T0, T0 | 
|  | 648 | punpcklbw T0, T0 | 
|  | 649 | punpcklbw T0, T0 | 
|  | 650 | movdqa .Laegis256_counter, T1 | 
|  | 651 | pcmpgtb T1, T0 | 
|  | 652 | pand T0, MSG | 
|  | 653 |  | 
|  | 654 | update0 MSG | 
|  | 655 |  | 
|  | 656 | state_store0 | 
|  | 657 |  | 
|  | 658 | FRAME_END | 
|  | 659 | ret | 
|  | 660 | ENDPROC(crypto_aegis256_aesni_dec_tail) | 
|  | 661 |  | 
|  | 662 | /* | 
|  | 663 | * void crypto_aegis256_aesni_final(void *state, void *tag_xor, | 
|  | 664 | *                                  u64 assoclen, u64 cryptlen); | 
|  | 665 | */ | 
|  | 666 | ENTRY(crypto_aegis256_aesni_final) | 
|  | 667 | FRAME_BEGIN | 
|  | 668 |  | 
|  | 669 | state_load | 
|  | 670 |  | 
|  | 671 | /* prepare length block: */ | 
|  | 672 | movq %rdx, MSG | 
|  | 673 | movq %rcx, T0 | 
|  | 674 | pslldq $8, T0 | 
|  | 675 | pxor T0, MSG | 
|  | 676 | psllq $3, MSG /* multiply by 8 (to get bit count) */ | 
|  | 677 |  | 
|  | 678 | pxor STATE3, MSG | 
|  | 679 |  | 
|  | 680 | /* update state: */ | 
|  | 681 | update0 MSG | 
|  | 682 | update1 MSG | 
|  | 683 | update2 MSG | 
|  | 684 | update3 MSG | 
|  | 685 | update4 MSG | 
|  | 686 | update5 MSG | 
|  | 687 | update0 MSG | 
|  | 688 |  | 
|  | 689 | /* xor tag: */ | 
|  | 690 | movdqu (%rsi), MSG | 
|  | 691 |  | 
|  | 692 | pxor STATE0, MSG | 
|  | 693 | pxor STATE1, MSG | 
|  | 694 | pxor STATE2, MSG | 
|  | 695 | pxor STATE3, MSG | 
|  | 696 | pxor STATE4, MSG | 
|  | 697 | pxor STATE5, MSG | 
|  | 698 |  | 
|  | 699 | movdqu MSG, (%rsi) | 
|  | 700 |  | 
|  | 701 | FRAME_END | 
|  | 702 | ret | 
|  | 703 | ENDPROC(crypto_aegis256_aesni_final) |