yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame^] | 1 | #! /usr/bin/env perl |
| 2 | # Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. |
| 3 | # |
| 4 | # Licensed under the OpenSSL license (the "License"). You may not use |
| 5 | # this file except in compliance with the License. You can obtain a copy |
| 6 | # in the file LICENSE in the source distribution or at |
| 7 | # https://www.openssl.org/source/license.html |
| 8 | |
| 9 | # |
| 10 | # ==================================================================== |
| 11 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| 12 | # project. The module is, however, dual licensed under OpenSSL and |
| 13 | # CRYPTOGAMS licenses depending on where you obtain it. For further |
| 14 | # details see http://www.openssl.org/~appro/cryptogams/. |
| 15 | # ==================================================================== |
| 16 | # |
| 17 | # This module implements support for ARMv8 AES instructions. The |
| 18 | # module is endian-agnostic in sense that it supports both big- and |
| 19 | # little-endian cases. As does it support both 32- and 64-bit modes |
| 20 | # of operation. Latter is achieved by limiting amount of utilized |
| 21 | # registers to 16, which implies additional NEON load and integer |
| 22 | # instructions. This has no effect on mighty Apple A7, where results |
| 23 | # are literally equal to the theoretical estimates based on AES |
| 24 | # instruction latencies and issue rates. On Cortex-A53, an in-order |
| 25 | # execution core, this costs up to 10-15%, which is partially |
| 26 | # compensated by implementing dedicated code path for 128-bit |
| 27 | # CBC encrypt case. On Cortex-A57 parallelizable mode performance |
| 28 | # seems to be limited by sheer amount of NEON instructions... |
| 29 | # |
| 30 | # Performance in cycles per byte processed with 128-bit key: |
| 31 | # |
| 32 | # CBC enc CBC dec CTR |
| 33 | # Apple A7 2.39 1.20 1.20 |
| 34 | # Cortex-A53 1.32 1.29 1.46 |
| 35 | # Cortex-A57(*) 1.95 0.85 0.93 |
| 36 | # Denver 1.96 0.86 0.80 |
| 37 | # Mongoose 1.33 1.20 1.20 |
| 38 | # Kryo 1.26 0.94 1.00 |
| 39 | # |
| 40 | # (*) original 3.64/1.34/1.32 results were for r0p0 revision |
| 41 | # and are still same even for updated module; |
| 42 | |
| 43 | $flavour = shift; |
| 44 | $output = shift; |
| 45 | |
| 46 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 47 | ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or |
| 48 | ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or |
| 49 | die "can't locate arm-xlate.pl"; |
| 50 | |
| 51 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
| 52 | *STDOUT=*OUT; |
| 53 | |
| 54 | $prefix="aes_v8"; |
| 55 | |
| 56 | $code=<<___; |
| 57 | #include "arm_arch.h" |
| 58 | |
| 59 | #if __ARM_MAX_ARCH__>=7 |
| 60 | .text |
| 61 | ___ |
| 62 | $code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); |
| 63 | $code.=<<___ if ($flavour !~ /64/); |
| 64 | .arch armv7-a // don't confuse not-so-latest binutils with argv8 :-) |
| 65 | .fpu neon |
| 66 | .code 32 |
| 67 | #undef __thumb2__ |
| 68 | ___ |
| 69 | |
| 70 | # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, |
| 71 | # NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to |
| 72 | # maintain both 32- and 64-bit codes within single module and |
| 73 | # transliterate common code to either flavour with regex vodoo. |
| 74 | # |
| 75 | {{{ |
| 76 | my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); |
| 77 | my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= |
| 78 | $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); |
| 79 | |
| 80 | |
| 81 | $code.=<<___; |
| 82 | .align 5 |
| 83 | .Lrcon: |
| 84 | .long 0x01,0x01,0x01,0x01 |
| 85 | .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat |
| 86 | .long 0x1b,0x1b,0x1b,0x1b |
| 87 | |
| 88 | .globl ${prefix}_set_encrypt_key |
| 89 | .type ${prefix}_set_encrypt_key,%function |
| 90 | .align 5 |
| 91 | ${prefix}_set_encrypt_key: |
| 92 | .Lenc_key: |
| 93 | ___ |
| 94 | $code.=<<___ if ($flavour =~ /64/); |
| 95 | stp x29,x30,[sp,#-16]! |
| 96 | add x29,sp,#0 |
| 97 | ___ |
| 98 | $code.=<<___; |
| 99 | mov $ptr,#-1 |
| 100 | cmp $inp,#0 |
| 101 | b.eq .Lenc_key_abort |
| 102 | cmp $out,#0 |
| 103 | b.eq .Lenc_key_abort |
| 104 | mov $ptr,#-2 |
| 105 | cmp $bits,#128 |
| 106 | b.lt .Lenc_key_abort |
| 107 | cmp $bits,#256 |
| 108 | b.gt .Lenc_key_abort |
| 109 | tst $bits,#0x3f |
| 110 | b.ne .Lenc_key_abort |
| 111 | |
| 112 | adr $ptr,.Lrcon |
| 113 | cmp $bits,#192 |
| 114 | |
| 115 | veor $zero,$zero,$zero |
| 116 | vld1.8 {$in0},[$inp],#16 |
| 117 | mov $bits,#8 // reuse $bits |
| 118 | vld1.32 {$rcon,$mask},[$ptr],#32 |
| 119 | |
| 120 | b.lt .Loop128 |
| 121 | b.eq .L192 |
| 122 | b .L256 |
| 123 | |
| 124 | .align 4 |
| 125 | .Loop128: |
| 126 | vtbl.8 $key,{$in0},$mask |
| 127 | vext.8 $tmp,$zero,$in0,#12 |
| 128 | vst1.32 {$in0},[$out],#16 |
| 129 | aese $key,$zero |
| 130 | subs $bits,$bits,#1 |
| 131 | |
| 132 | veor $in0,$in0,$tmp |
| 133 | vext.8 $tmp,$zero,$tmp,#12 |
| 134 | veor $in0,$in0,$tmp |
| 135 | vext.8 $tmp,$zero,$tmp,#12 |
| 136 | veor $key,$key,$rcon |
| 137 | veor $in0,$in0,$tmp |
| 138 | vshl.u8 $rcon,$rcon,#1 |
| 139 | veor $in0,$in0,$key |
| 140 | b.ne .Loop128 |
| 141 | |
| 142 | vld1.32 {$rcon},[$ptr] |
| 143 | |
| 144 | vtbl.8 $key,{$in0},$mask |
| 145 | vext.8 $tmp,$zero,$in0,#12 |
| 146 | vst1.32 {$in0},[$out],#16 |
| 147 | aese $key,$zero |
| 148 | |
| 149 | veor $in0,$in0,$tmp |
| 150 | vext.8 $tmp,$zero,$tmp,#12 |
| 151 | veor $in0,$in0,$tmp |
| 152 | vext.8 $tmp,$zero,$tmp,#12 |
| 153 | veor $key,$key,$rcon |
| 154 | veor $in0,$in0,$tmp |
| 155 | vshl.u8 $rcon,$rcon,#1 |
| 156 | veor $in0,$in0,$key |
| 157 | |
| 158 | vtbl.8 $key,{$in0},$mask |
| 159 | vext.8 $tmp,$zero,$in0,#12 |
| 160 | vst1.32 {$in0},[$out],#16 |
| 161 | aese $key,$zero |
| 162 | |
| 163 | veor $in0,$in0,$tmp |
| 164 | vext.8 $tmp,$zero,$tmp,#12 |
| 165 | veor $in0,$in0,$tmp |
| 166 | vext.8 $tmp,$zero,$tmp,#12 |
| 167 | veor $key,$key,$rcon |
| 168 | veor $in0,$in0,$tmp |
| 169 | veor $in0,$in0,$key |
| 170 | vst1.32 {$in0},[$out] |
| 171 | add $out,$out,#0x50 |
| 172 | |
| 173 | mov $rounds,#10 |
| 174 | b .Ldone |
| 175 | |
| 176 | .align 4 |
| 177 | .L192: |
| 178 | vld1.8 {$in1},[$inp],#8 |
| 179 | vmov.i8 $key,#8 // borrow $key |
| 180 | vst1.32 {$in0},[$out],#16 |
| 181 | vsub.i8 $mask,$mask,$key // adjust the mask |
| 182 | |
| 183 | .Loop192: |
| 184 | vtbl.8 $key,{$in1},$mask |
| 185 | vext.8 $tmp,$zero,$in0,#12 |
| 186 | #ifdef __ARMEB__ |
| 187 | vst1.32 {$in1},[$out],#16 |
| 188 | sub $out,$out,#8 |
| 189 | #else |
| 190 | vst1.32 {$in1},[$out],#8 |
| 191 | #endif |
| 192 | aese $key,$zero |
| 193 | subs $bits,$bits,#1 |
| 194 | |
| 195 | veor $in0,$in0,$tmp |
| 196 | vext.8 $tmp,$zero,$tmp,#12 |
| 197 | veor $in0,$in0,$tmp |
| 198 | vext.8 $tmp,$zero,$tmp,#12 |
| 199 | veor $in0,$in0,$tmp |
| 200 | |
| 201 | vdup.32 $tmp,${in0}[3] |
| 202 | veor $tmp,$tmp,$in1 |
| 203 | veor $key,$key,$rcon |
| 204 | vext.8 $in1,$zero,$in1,#12 |
| 205 | vshl.u8 $rcon,$rcon,#1 |
| 206 | veor $in1,$in1,$tmp |
| 207 | veor $in0,$in0,$key |
| 208 | veor $in1,$in1,$key |
| 209 | vst1.32 {$in0},[$out],#16 |
| 210 | b.ne .Loop192 |
| 211 | |
| 212 | mov $rounds,#12 |
| 213 | add $out,$out,#0x20 |
| 214 | b .Ldone |
| 215 | |
| 216 | .align 4 |
| 217 | .L256: |
| 218 | vld1.8 {$in1},[$inp] |
| 219 | mov $bits,#7 |
| 220 | mov $rounds,#14 |
| 221 | vst1.32 {$in0},[$out],#16 |
| 222 | |
| 223 | .Loop256: |
| 224 | vtbl.8 $key,{$in1},$mask |
| 225 | vext.8 $tmp,$zero,$in0,#12 |
| 226 | vst1.32 {$in1},[$out],#16 |
| 227 | aese $key,$zero |
| 228 | subs $bits,$bits,#1 |
| 229 | |
| 230 | veor $in0,$in0,$tmp |
| 231 | vext.8 $tmp,$zero,$tmp,#12 |
| 232 | veor $in0,$in0,$tmp |
| 233 | vext.8 $tmp,$zero,$tmp,#12 |
| 234 | veor $key,$key,$rcon |
| 235 | veor $in0,$in0,$tmp |
| 236 | vshl.u8 $rcon,$rcon,#1 |
| 237 | veor $in0,$in0,$key |
| 238 | vst1.32 {$in0},[$out],#16 |
| 239 | b.eq .Ldone |
| 240 | |
| 241 | vdup.32 $key,${in0}[3] // just splat |
| 242 | vext.8 $tmp,$zero,$in1,#12 |
| 243 | aese $key,$zero |
| 244 | |
| 245 | veor $in1,$in1,$tmp |
| 246 | vext.8 $tmp,$zero,$tmp,#12 |
| 247 | veor $in1,$in1,$tmp |
| 248 | vext.8 $tmp,$zero,$tmp,#12 |
| 249 | veor $in1,$in1,$tmp |
| 250 | |
| 251 | veor $in1,$in1,$key |
| 252 | b .Loop256 |
| 253 | |
| 254 | .Ldone: |
| 255 | str $rounds,[$out] |
| 256 | mov $ptr,#0 |
| 257 | |
| 258 | .Lenc_key_abort: |
| 259 | mov x0,$ptr // return value |
| 260 | `"ldr x29,[sp],#16" if ($flavour =~ /64/)` |
| 261 | ret |
| 262 | .size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key |
| 263 | |
| 264 | .globl ${prefix}_set_decrypt_key |
| 265 | .type ${prefix}_set_decrypt_key,%function |
| 266 | .align 5 |
| 267 | ${prefix}_set_decrypt_key: |
| 268 | ___ |
| 269 | $code.=<<___ if ($flavour =~ /64/); |
| 270 | .inst 0xd503233f // paciasp |
| 271 | stp x29,x30,[sp,#-16]! |
| 272 | add x29,sp,#0 |
| 273 | ___ |
| 274 | $code.=<<___ if ($flavour !~ /64/); |
| 275 | stmdb sp!,{r4,lr} |
| 276 | ___ |
| 277 | $code.=<<___; |
| 278 | bl .Lenc_key |
| 279 | |
| 280 | cmp x0,#0 |
| 281 | b.ne .Ldec_key_abort |
| 282 | |
| 283 | sub $out,$out,#240 // restore original $out |
| 284 | mov x4,#-16 |
| 285 | add $inp,$out,x12,lsl#4 // end of key schedule |
| 286 | |
| 287 | vld1.32 {v0.16b},[$out] |
| 288 | vld1.32 {v1.16b},[$inp] |
| 289 | vst1.32 {v0.16b},[$inp],x4 |
| 290 | vst1.32 {v1.16b},[$out],#16 |
| 291 | |
| 292 | .Loop_imc: |
| 293 | vld1.32 {v0.16b},[$out] |
| 294 | vld1.32 {v1.16b},[$inp] |
| 295 | aesimc v0.16b,v0.16b |
| 296 | aesimc v1.16b,v1.16b |
| 297 | vst1.32 {v0.16b},[$inp],x4 |
| 298 | vst1.32 {v1.16b},[$out],#16 |
| 299 | cmp $inp,$out |
| 300 | b.hi .Loop_imc |
| 301 | |
| 302 | vld1.32 {v0.16b},[$out] |
| 303 | aesimc v0.16b,v0.16b |
| 304 | vst1.32 {v0.16b},[$inp] |
| 305 | |
| 306 | eor x0,x0,x0 // return value |
| 307 | .Ldec_key_abort: |
| 308 | ___ |
| 309 | $code.=<<___ if ($flavour !~ /64/); |
| 310 | ldmia sp!,{r4,pc} |
| 311 | ___ |
| 312 | $code.=<<___ if ($flavour =~ /64/); |
| 313 | ldp x29,x30,[sp],#16 |
| 314 | .inst 0xd50323bf // autiasp |
| 315 | ret |
| 316 | ___ |
| 317 | $code.=<<___; |
| 318 | .size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key |
| 319 | ___ |
| 320 | }}} |
| 321 | {{{ |
| 322 | sub gen_block () { |
| 323 | my $dir = shift; |
| 324 | my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); |
| 325 | my ($inp,$out,$key)=map("x$_",(0..2)); |
| 326 | my $rounds="w3"; |
| 327 | my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); |
| 328 | |
| 329 | $code.=<<___; |
| 330 | .globl ${prefix}_${dir}crypt |
| 331 | .type ${prefix}_${dir}crypt,%function |
| 332 | .align 5 |
| 333 | ${prefix}_${dir}crypt: |
| 334 | ldr $rounds,[$key,#240] |
| 335 | vld1.32 {$rndkey0},[$key],#16 |
| 336 | vld1.8 {$inout},[$inp] |
| 337 | sub $rounds,$rounds,#2 |
| 338 | vld1.32 {$rndkey1},[$key],#16 |
| 339 | |
| 340 | .Loop_${dir}c: |
| 341 | aes$e $inout,$rndkey0 |
| 342 | aes$mc $inout,$inout |
| 343 | vld1.32 {$rndkey0},[$key],#16 |
| 344 | subs $rounds,$rounds,#2 |
| 345 | aes$e $inout,$rndkey1 |
| 346 | aes$mc $inout,$inout |
| 347 | vld1.32 {$rndkey1},[$key],#16 |
| 348 | b.gt .Loop_${dir}c |
| 349 | |
| 350 | aes$e $inout,$rndkey0 |
| 351 | aes$mc $inout,$inout |
| 352 | vld1.32 {$rndkey0},[$key] |
| 353 | aes$e $inout,$rndkey1 |
| 354 | veor $inout,$inout,$rndkey0 |
| 355 | |
| 356 | vst1.8 {$inout},[$out] |
| 357 | ret |
| 358 | .size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt |
| 359 | ___ |
| 360 | } |
| 361 | &gen_block("en"); |
| 362 | &gen_block("de"); |
| 363 | }}} |
| 364 | {{{ |
| 365 | my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; |
| 366 | my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); |
| 367 | my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); |
| 368 | |
| 369 | my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); |
| 370 | my ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key); |
| 371 | |
| 372 | ### q8-q15 preloaded key schedule |
| 373 | |
| 374 | $code.=<<___; |
| 375 | .globl ${prefix}_cbc_encrypt |
| 376 | .type ${prefix}_cbc_encrypt,%function |
| 377 | .align 5 |
| 378 | ${prefix}_cbc_encrypt: |
| 379 | ___ |
| 380 | $code.=<<___ if ($flavour =~ /64/); |
| 381 | stp x29,x30,[sp,#-16]! |
| 382 | add x29,sp,#0 |
| 383 | ___ |
| 384 | $code.=<<___ if ($flavour !~ /64/); |
| 385 | mov ip,sp |
| 386 | stmdb sp!,{r4-r8,lr} |
| 387 | vstmdb sp!,{d8-d15} @ ABI specification says so |
| 388 | ldmia ip,{r4-r5} @ load remaining args |
| 389 | ___ |
| 390 | $code.=<<___; |
| 391 | subs $len,$len,#16 |
| 392 | mov $step,#16 |
| 393 | b.lo .Lcbc_abort |
| 394 | cclr $step,eq |
| 395 | |
| 396 | cmp $enc,#0 // en- or decrypting? |
| 397 | ldr $rounds,[$key,#240] |
| 398 | and $len,$len,#-16 |
| 399 | vld1.8 {$ivec},[$ivp] |
| 400 | vld1.8 {$dat},[$inp],$step |
| 401 | |
| 402 | vld1.32 {q8-q9},[$key] // load key schedule... |
| 403 | sub $rounds,$rounds,#6 |
| 404 | add $key_,$key,x5,lsl#4 // pointer to last 7 round keys |
| 405 | sub $rounds,$rounds,#2 |
| 406 | vld1.32 {q10-q11},[$key_],#32 |
| 407 | vld1.32 {q12-q13},[$key_],#32 |
| 408 | vld1.32 {q14-q15},[$key_],#32 |
| 409 | vld1.32 {$rndlast},[$key_] |
| 410 | |
| 411 | add $key_,$key,#32 |
| 412 | mov $cnt,$rounds |
| 413 | b.eq .Lcbc_dec |
| 414 | |
| 415 | cmp $rounds,#2 |
| 416 | veor $dat,$dat,$ivec |
| 417 | veor $rndzero_n_last,q8,$rndlast |
| 418 | b.eq .Lcbc_enc128 |
| 419 | |
| 420 | vld1.32 {$in0-$in1},[$key_] |
| 421 | add $key_,$key,#16 |
| 422 | add $key4,$key,#16*4 |
| 423 | add $key5,$key,#16*5 |
| 424 | aese $dat,q8 |
| 425 | aesmc $dat,$dat |
| 426 | add $key6,$key,#16*6 |
| 427 | add $key7,$key,#16*7 |
| 428 | b .Lenter_cbc_enc |
| 429 | |
| 430 | .align 4 |
| 431 | .Loop_cbc_enc: |
| 432 | aese $dat,q8 |
| 433 | aesmc $dat,$dat |
| 434 | vst1.8 {$ivec},[$out],#16 |
| 435 | .Lenter_cbc_enc: |
| 436 | aese $dat,q9 |
| 437 | aesmc $dat,$dat |
| 438 | aese $dat,$in0 |
| 439 | aesmc $dat,$dat |
| 440 | vld1.32 {q8},[$key4] |
| 441 | cmp $rounds,#4 |
| 442 | aese $dat,$in1 |
| 443 | aesmc $dat,$dat |
| 444 | vld1.32 {q9},[$key5] |
| 445 | b.eq .Lcbc_enc192 |
| 446 | |
| 447 | aese $dat,q8 |
| 448 | aesmc $dat,$dat |
| 449 | vld1.32 {q8},[$key6] |
| 450 | aese $dat,q9 |
| 451 | aesmc $dat,$dat |
| 452 | vld1.32 {q9},[$key7] |
| 453 | nop |
| 454 | |
| 455 | .Lcbc_enc192: |
| 456 | aese $dat,q8 |
| 457 | aesmc $dat,$dat |
| 458 | subs $len,$len,#16 |
| 459 | aese $dat,q9 |
| 460 | aesmc $dat,$dat |
| 461 | cclr $step,eq |
| 462 | aese $dat,q10 |
| 463 | aesmc $dat,$dat |
| 464 | aese $dat,q11 |
| 465 | aesmc $dat,$dat |
| 466 | vld1.8 {q8},[$inp],$step |
| 467 | aese $dat,q12 |
| 468 | aesmc $dat,$dat |
| 469 | veor q8,q8,$rndzero_n_last |
| 470 | aese $dat,q13 |
| 471 | aesmc $dat,$dat |
| 472 | vld1.32 {q9},[$key_] // re-pre-load rndkey[1] |
| 473 | aese $dat,q14 |
| 474 | aesmc $dat,$dat |
| 475 | aese $dat,q15 |
| 476 | veor $ivec,$dat,$rndlast |
| 477 | b.hs .Loop_cbc_enc |
| 478 | |
| 479 | vst1.8 {$ivec},[$out],#16 |
| 480 | b .Lcbc_done |
| 481 | |
| 482 | .align 5 |
| 483 | .Lcbc_enc128: |
| 484 | vld1.32 {$in0-$in1},[$key_] |
| 485 | aese $dat,q8 |
| 486 | aesmc $dat,$dat |
| 487 | b .Lenter_cbc_enc128 |
| 488 | .Loop_cbc_enc128: |
| 489 | aese $dat,q8 |
| 490 | aesmc $dat,$dat |
| 491 | vst1.8 {$ivec},[$out],#16 |
| 492 | .Lenter_cbc_enc128: |
| 493 | aese $dat,q9 |
| 494 | aesmc $dat,$dat |
| 495 | subs $len,$len,#16 |
| 496 | aese $dat,$in0 |
| 497 | aesmc $dat,$dat |
| 498 | cclr $step,eq |
| 499 | aese $dat,$in1 |
| 500 | aesmc $dat,$dat |
| 501 | aese $dat,q10 |
| 502 | aesmc $dat,$dat |
| 503 | aese $dat,q11 |
| 504 | aesmc $dat,$dat |
| 505 | vld1.8 {q8},[$inp],$step |
| 506 | aese $dat,q12 |
| 507 | aesmc $dat,$dat |
| 508 | aese $dat,q13 |
| 509 | aesmc $dat,$dat |
| 510 | aese $dat,q14 |
| 511 | aesmc $dat,$dat |
| 512 | veor q8,q8,$rndzero_n_last |
| 513 | aese $dat,q15 |
| 514 | veor $ivec,$dat,$rndlast |
| 515 | b.hs .Loop_cbc_enc128 |
| 516 | |
| 517 | vst1.8 {$ivec},[$out],#16 |
| 518 | b .Lcbc_done |
| 519 | ___ |
| 520 | { |
| 521 | my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); |
| 522 | $code.=<<___; |
| 523 | .align 5 |
| 524 | .Lcbc_dec: |
| 525 | vld1.8 {$dat2},[$inp],#16 |
| 526 | subs $len,$len,#32 // bias |
| 527 | add $cnt,$rounds,#2 |
| 528 | vorr $in1,$dat,$dat |
| 529 | vorr $dat1,$dat,$dat |
| 530 | vorr $in2,$dat2,$dat2 |
| 531 | b.lo .Lcbc_dec_tail |
| 532 | |
| 533 | vorr $dat1,$dat2,$dat2 |
| 534 | vld1.8 {$dat2},[$inp],#16 |
| 535 | vorr $in0,$dat,$dat |
| 536 | vorr $in1,$dat1,$dat1 |
| 537 | vorr $in2,$dat2,$dat2 |
| 538 | |
| 539 | .Loop3x_cbc_dec: |
| 540 | aesd $dat0,q8 |
| 541 | aesimc $dat0,$dat0 |
| 542 | aesd $dat1,q8 |
| 543 | aesimc $dat1,$dat1 |
| 544 | aesd $dat2,q8 |
| 545 | aesimc $dat2,$dat2 |
| 546 | vld1.32 {q8},[$key_],#16 |
| 547 | subs $cnt,$cnt,#2 |
| 548 | aesd $dat0,q9 |
| 549 | aesimc $dat0,$dat0 |
| 550 | aesd $dat1,q9 |
| 551 | aesimc $dat1,$dat1 |
| 552 | aesd $dat2,q9 |
| 553 | aesimc $dat2,$dat2 |
| 554 | vld1.32 {q9},[$key_],#16 |
| 555 | b.gt .Loop3x_cbc_dec |
| 556 | |
| 557 | aesd $dat0,q8 |
| 558 | aesimc $dat0,$dat0 |
| 559 | aesd $dat1,q8 |
| 560 | aesimc $dat1,$dat1 |
| 561 | aesd $dat2,q8 |
| 562 | aesimc $dat2,$dat2 |
| 563 | veor $tmp0,$ivec,$rndlast |
| 564 | subs $len,$len,#0x30 |
| 565 | veor $tmp1,$in0,$rndlast |
| 566 | mov.lo x6,$len // x6, $cnt, is zero at this point |
| 567 | aesd $dat0,q9 |
| 568 | aesimc $dat0,$dat0 |
| 569 | aesd $dat1,q9 |
| 570 | aesimc $dat1,$dat1 |
| 571 | aesd $dat2,q9 |
| 572 | aesimc $dat2,$dat2 |
| 573 | veor $tmp2,$in1,$rndlast |
| 574 | add $inp,$inp,x6 // $inp is adjusted in such way that |
| 575 | // at exit from the loop $dat1-$dat2 |
| 576 | // are loaded with last "words" |
| 577 | vorr $ivec,$in2,$in2 |
| 578 | mov $key_,$key |
| 579 | aesd $dat0,q12 |
| 580 | aesimc $dat0,$dat0 |
| 581 | aesd $dat1,q12 |
| 582 | aesimc $dat1,$dat1 |
| 583 | aesd $dat2,q12 |
| 584 | aesimc $dat2,$dat2 |
| 585 | vld1.8 {$in0},[$inp],#16 |
| 586 | aesd $dat0,q13 |
| 587 | aesimc $dat0,$dat0 |
| 588 | aesd $dat1,q13 |
| 589 | aesimc $dat1,$dat1 |
| 590 | aesd $dat2,q13 |
| 591 | aesimc $dat2,$dat2 |
| 592 | vld1.8 {$in1},[$inp],#16 |
| 593 | aesd $dat0,q14 |
| 594 | aesimc $dat0,$dat0 |
| 595 | aesd $dat1,q14 |
| 596 | aesimc $dat1,$dat1 |
| 597 | aesd $dat2,q14 |
| 598 | aesimc $dat2,$dat2 |
| 599 | vld1.8 {$in2},[$inp],#16 |
| 600 | aesd $dat0,q15 |
| 601 | aesd $dat1,q15 |
| 602 | aesd $dat2,q15 |
| 603 | vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] |
| 604 | add $cnt,$rounds,#2 |
| 605 | veor $tmp0,$tmp0,$dat0 |
| 606 | veor $tmp1,$tmp1,$dat1 |
| 607 | veor $dat2,$dat2,$tmp2 |
| 608 | vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] |
| 609 | vst1.8 {$tmp0},[$out],#16 |
| 610 | vorr $dat0,$in0,$in0 |
| 611 | vst1.8 {$tmp1},[$out],#16 |
| 612 | vorr $dat1,$in1,$in1 |
| 613 | vst1.8 {$dat2},[$out],#16 |
| 614 | vorr $dat2,$in2,$in2 |
| 615 | b.hs .Loop3x_cbc_dec |
| 616 | |
| 617 | cmn $len,#0x30 |
| 618 | b.eq .Lcbc_done |
| 619 | nop |
| 620 | |
| 621 | .Lcbc_dec_tail: |
| 622 | aesd $dat1,q8 |
| 623 | aesimc $dat1,$dat1 |
| 624 | aesd $dat2,q8 |
| 625 | aesimc $dat2,$dat2 |
| 626 | vld1.32 {q8},[$key_],#16 |
| 627 | subs $cnt,$cnt,#2 |
| 628 | aesd $dat1,q9 |
| 629 | aesimc $dat1,$dat1 |
| 630 | aesd $dat2,q9 |
| 631 | aesimc $dat2,$dat2 |
| 632 | vld1.32 {q9},[$key_],#16 |
| 633 | b.gt .Lcbc_dec_tail |
| 634 | |
| 635 | aesd $dat1,q8 |
| 636 | aesimc $dat1,$dat1 |
| 637 | aesd $dat2,q8 |
| 638 | aesimc $dat2,$dat2 |
| 639 | aesd $dat1,q9 |
| 640 | aesimc $dat1,$dat1 |
| 641 | aesd $dat2,q9 |
| 642 | aesimc $dat2,$dat2 |
| 643 | aesd $dat1,q12 |
| 644 | aesimc $dat1,$dat1 |
| 645 | aesd $dat2,q12 |
| 646 | aesimc $dat2,$dat2 |
| 647 | cmn $len,#0x20 |
| 648 | aesd $dat1,q13 |
| 649 | aesimc $dat1,$dat1 |
| 650 | aesd $dat2,q13 |
| 651 | aesimc $dat2,$dat2 |
| 652 | veor $tmp1,$ivec,$rndlast |
| 653 | aesd $dat1,q14 |
| 654 | aesimc $dat1,$dat1 |
| 655 | aesd $dat2,q14 |
| 656 | aesimc $dat2,$dat2 |
| 657 | veor $tmp2,$in1,$rndlast |
| 658 | aesd $dat1,q15 |
| 659 | aesd $dat2,q15 |
| 660 | b.eq .Lcbc_dec_one |
| 661 | veor $tmp1,$tmp1,$dat1 |
| 662 | veor $tmp2,$tmp2,$dat2 |
| 663 | vorr $ivec,$in2,$in2 |
| 664 | vst1.8 {$tmp1},[$out],#16 |
| 665 | vst1.8 {$tmp2},[$out],#16 |
| 666 | b .Lcbc_done |
| 667 | |
| 668 | .Lcbc_dec_one: |
| 669 | veor $tmp1,$tmp1,$dat2 |
| 670 | vorr $ivec,$in2,$in2 |
| 671 | vst1.8 {$tmp1},[$out],#16 |
| 672 | |
| 673 | .Lcbc_done: |
| 674 | vst1.8 {$ivec},[$ivp] |
| 675 | .Lcbc_abort: |
| 676 | ___ |
| 677 | } |
| 678 | $code.=<<___ if ($flavour !~ /64/); |
| 679 | vldmia sp!,{d8-d15} |
| 680 | ldmia sp!,{r4-r8,pc} |
| 681 | ___ |
| 682 | $code.=<<___ if ($flavour =~ /64/); |
| 683 | ldr x29,[sp],#16 |
| 684 | ret |
| 685 | ___ |
| 686 | $code.=<<___; |
| 687 | .size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt |
| 688 | ___ |
| 689 | }}} |
| 690 | {{{ |
| 691 | my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); |
| 692 | my ($rounds,$cnt,$key_)=("w5","w6","x7"); |
| 693 | my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12)); |
| 694 | my $step="x12"; # aliases with $tctr2 |
| 695 | |
| 696 | my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); |
| 697 | my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); |
| 698 | |
| 699 | my ($dat,$tmp)=($dat0,$tmp0); |
| 700 | |
| 701 | ### q8-q15 preloaded key schedule |
| 702 | |
| 703 | $code.=<<___; |
| 704 | .globl ${prefix}_ctr32_encrypt_blocks |
| 705 | .type ${prefix}_ctr32_encrypt_blocks,%function |
| 706 | .align 5 |
| 707 | ${prefix}_ctr32_encrypt_blocks: |
| 708 | ___ |
| 709 | $code.=<<___ if ($flavour =~ /64/); |
| 710 | stp x29,x30,[sp,#-16]! |
| 711 | add x29,sp,#0 |
| 712 | ___ |
| 713 | $code.=<<___ if ($flavour !~ /64/); |
| 714 | mov ip,sp |
| 715 | stmdb sp!,{r4-r10,lr} |
| 716 | vstmdb sp!,{d8-d15} @ ABI specification says so |
| 717 | ldr r4, [ip] @ load remaining arg |
| 718 | ___ |
| 719 | $code.=<<___; |
| 720 | ldr $rounds,[$key,#240] |
| 721 | |
| 722 | ldr $ctr, [$ivp, #12] |
| 723 | #ifdef __ARMEB__ |
| 724 | vld1.8 {$dat0},[$ivp] |
| 725 | #else |
| 726 | vld1.32 {$dat0},[$ivp] |
| 727 | #endif |
| 728 | vld1.32 {q8-q9},[$key] // load key schedule... |
| 729 | sub $rounds,$rounds,#4 |
| 730 | mov $step,#16 |
| 731 | cmp $len,#2 |
| 732 | add $key_,$key,x5,lsl#4 // pointer to last 5 round keys |
| 733 | sub $rounds,$rounds,#2 |
| 734 | vld1.32 {q12-q13},[$key_],#32 |
| 735 | vld1.32 {q14-q15},[$key_],#32 |
| 736 | vld1.32 {$rndlast},[$key_] |
| 737 | add $key_,$key,#32 |
| 738 | mov $cnt,$rounds |
| 739 | cclr $step,lo |
| 740 | #ifndef __ARMEB__ |
| 741 | rev $ctr, $ctr |
| 742 | #endif |
| 743 | add $tctr1, $ctr, #1 |
| 744 | vorr $ivec,$dat0,$dat0 |
| 745 | rev $tctr1, $tctr1 |
| 746 | vmov.32 ${ivec}[3],$tctr1 |
| 747 | add $ctr, $ctr, #2 |
| 748 | vorr $dat1,$ivec,$ivec |
| 749 | b.ls .Lctr32_tail |
| 750 | rev $tctr2, $ctr |
| 751 | vmov.32 ${ivec}[3],$tctr2 |
| 752 | sub $len,$len,#3 // bias |
| 753 | vorr $dat2,$ivec,$ivec |
| 754 | b .Loop3x_ctr32 |
| 755 | |
| 756 | .align 4 |
| 757 | .Loop3x_ctr32: |
| 758 | aese $dat0,q8 |
| 759 | aesmc $dat0,$dat0 |
| 760 | aese $dat1,q8 |
| 761 | aesmc $dat1,$dat1 |
| 762 | aese $dat2,q8 |
| 763 | aesmc $dat2,$dat2 |
| 764 | vld1.32 {q8},[$key_],#16 |
| 765 | subs $cnt,$cnt,#2 |
| 766 | aese $dat0,q9 |
| 767 | aesmc $dat0,$dat0 |
| 768 | aese $dat1,q9 |
| 769 | aesmc $dat1,$dat1 |
| 770 | aese $dat2,q9 |
| 771 | aesmc $dat2,$dat2 |
| 772 | vld1.32 {q9},[$key_],#16 |
| 773 | b.gt .Loop3x_ctr32 |
| 774 | |
| 775 | aese $dat0,q8 |
| 776 | aesmc $tmp0,$dat0 |
| 777 | aese $dat1,q8 |
| 778 | aesmc $tmp1,$dat1 |
| 779 | vld1.8 {$in0},[$inp],#16 |
| 780 | add $tctr0,$ctr,#1 |
| 781 | aese $dat2,q8 |
| 782 | aesmc $dat2,$dat2 |
| 783 | vld1.8 {$in1},[$inp],#16 |
| 784 | rev $tctr0,$tctr0 |
| 785 | aese $tmp0,q9 |
| 786 | aesmc $tmp0,$tmp0 |
| 787 | aese $tmp1,q9 |
| 788 | aesmc $tmp1,$tmp1 |
| 789 | vld1.8 {$in2},[$inp],#16 |
| 790 | mov $key_,$key |
| 791 | aese $dat2,q9 |
| 792 | aesmc $tmp2,$dat2 |
| 793 | aese $tmp0,q12 |
| 794 | aesmc $tmp0,$tmp0 |
| 795 | aese $tmp1,q12 |
| 796 | aesmc $tmp1,$tmp1 |
| 797 | veor $in0,$in0,$rndlast |
| 798 | add $tctr1,$ctr,#2 |
| 799 | aese $tmp2,q12 |
| 800 | aesmc $tmp2,$tmp2 |
| 801 | veor $in1,$in1,$rndlast |
| 802 | add $ctr,$ctr,#3 |
| 803 | aese $tmp0,q13 |
| 804 | aesmc $tmp0,$tmp0 |
| 805 | aese $tmp1,q13 |
| 806 | aesmc $tmp1,$tmp1 |
| 807 | veor $in2,$in2,$rndlast |
| 808 | vmov.32 ${ivec}[3], $tctr0 |
| 809 | aese $tmp2,q13 |
| 810 | aesmc $tmp2,$tmp2 |
| 811 | vorr $dat0,$ivec,$ivec |
| 812 | rev $tctr1,$tctr1 |
| 813 | aese $tmp0,q14 |
| 814 | aesmc $tmp0,$tmp0 |
| 815 | vmov.32 ${ivec}[3], $tctr1 |
| 816 | rev $tctr2,$ctr |
| 817 | aese $tmp1,q14 |
| 818 | aesmc $tmp1,$tmp1 |
| 819 | vorr $dat1,$ivec,$ivec |
| 820 | vmov.32 ${ivec}[3], $tctr2 |
| 821 | aese $tmp2,q14 |
| 822 | aesmc $tmp2,$tmp2 |
| 823 | vorr $dat2,$ivec,$ivec |
| 824 | subs $len,$len,#3 |
| 825 | aese $tmp0,q15 |
| 826 | aese $tmp1,q15 |
| 827 | aese $tmp2,q15 |
| 828 | |
| 829 | veor $in0,$in0,$tmp0 |
| 830 | vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] |
| 831 | vst1.8 {$in0},[$out],#16 |
| 832 | veor $in1,$in1,$tmp1 |
| 833 | mov $cnt,$rounds |
| 834 | vst1.8 {$in1},[$out],#16 |
| 835 | veor $in2,$in2,$tmp2 |
| 836 | vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] |
| 837 | vst1.8 {$in2},[$out],#16 |
| 838 | b.hs .Loop3x_ctr32 |
| 839 | |
| 840 | adds $len,$len,#3 |
| 841 | b.eq .Lctr32_done |
| 842 | cmp $len,#1 |
| 843 | mov $step,#16 |
| 844 | cclr $step,eq |
| 845 | |
| 846 | .Lctr32_tail: |
| 847 | aese $dat0,q8 |
| 848 | aesmc $dat0,$dat0 |
| 849 | aese $dat1,q8 |
| 850 | aesmc $dat1,$dat1 |
| 851 | vld1.32 {q8},[$key_],#16 |
| 852 | subs $cnt,$cnt,#2 |
| 853 | aese $dat0,q9 |
| 854 | aesmc $dat0,$dat0 |
| 855 | aese $dat1,q9 |
| 856 | aesmc $dat1,$dat1 |
| 857 | vld1.32 {q9},[$key_],#16 |
| 858 | b.gt .Lctr32_tail |
| 859 | |
| 860 | aese $dat0,q8 |
| 861 | aesmc $dat0,$dat0 |
| 862 | aese $dat1,q8 |
| 863 | aesmc $dat1,$dat1 |
| 864 | aese $dat0,q9 |
| 865 | aesmc $dat0,$dat0 |
| 866 | aese $dat1,q9 |
| 867 | aesmc $dat1,$dat1 |
| 868 | vld1.8 {$in0},[$inp],$step |
| 869 | aese $dat0,q12 |
| 870 | aesmc $dat0,$dat0 |
| 871 | aese $dat1,q12 |
| 872 | aesmc $dat1,$dat1 |
| 873 | vld1.8 {$in1},[$inp] |
| 874 | aese $dat0,q13 |
| 875 | aesmc $dat0,$dat0 |
| 876 | aese $dat1,q13 |
| 877 | aesmc $dat1,$dat1 |
| 878 | veor $in0,$in0,$rndlast |
| 879 | aese $dat0,q14 |
| 880 | aesmc $dat0,$dat0 |
| 881 | aese $dat1,q14 |
| 882 | aesmc $dat1,$dat1 |
| 883 | veor $in1,$in1,$rndlast |
| 884 | aese $dat0,q15 |
| 885 | aese $dat1,q15 |
| 886 | |
| 887 | cmp $len,#1 |
| 888 | veor $in0,$in0,$dat0 |
| 889 | veor $in1,$in1,$dat1 |
| 890 | vst1.8 {$in0},[$out],#16 |
| 891 | b.eq .Lctr32_done |
| 892 | vst1.8 {$in1},[$out] |
| 893 | |
| 894 | .Lctr32_done: |
| 895 | ___ |
| 896 | $code.=<<___ if ($flavour !~ /64/); |
| 897 | vldmia sp!,{d8-d15} |
| 898 | ldmia sp!,{r4-r10,pc} |
| 899 | ___ |
| 900 | $code.=<<___ if ($flavour =~ /64/); |
| 901 | ldr x29,[sp],#16 |
| 902 | ret |
| 903 | ___ |
| 904 | $code.=<<___; |
| 905 | .size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks |
| 906 | ___ |
| 907 | }}} |
| 908 | $code.=<<___; |
| 909 | #endif |
| 910 | ___ |
| 911 | ######################################## |
| 912 | if ($flavour =~ /64/) { ######## 64-bit code |
| 913 | my %opcode = ( |
| 914 | "aesd" => 0x4e285800, "aese" => 0x4e284800, |
| 915 | "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); |
| 916 | |
| 917 | local *unaes = sub { |
| 918 | my ($mnemonic,$arg)=@_; |
| 919 | |
| 920 | $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && |
| 921 | sprintf ".inst\t0x%08x\t//%s %s", |
| 922 | $opcode{$mnemonic}|$1|($2<<5), |
| 923 | $mnemonic,$arg; |
| 924 | }; |
| 925 | |
| 926 | foreach(split("\n",$code)) { |
| 927 | s/\`([^\`]*)\`/eval($1)/geo; |
| 928 | |
| 929 | s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers |
| 930 | s/@\s/\/\//o; # old->new style commentary |
| 931 | |
| 932 | #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or |
| 933 | s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or |
| 934 | s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel $2,$3,$2,$1/o or |
| 935 | s/vmov\.i8/movi/o or # fix up legacy mnemonics |
| 936 | s/vext\.8/ext/o or |
| 937 | s/vrev32\.8/rev32/o or |
| 938 | s/vtst\.8/cmtst/o or |
| 939 | s/vshr/ushr/o or |
| 940 | s/^(\s+)v/$1/o or # strip off v prefix |
| 941 | s/\bbx\s+lr\b/ret/o; |
| 942 | |
| 943 | # fix up remaining legacy suffixes |
| 944 | s/\.[ui]?8//o; |
| 945 | m/\],#8/o and s/\.16b/\.8b/go; |
| 946 | s/\.[ui]?32//o and s/\.16b/\.4s/go; |
| 947 | s/\.[ui]?64//o and s/\.16b/\.2d/go; |
| 948 | s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; |
| 949 | |
| 950 | print $_,"\n"; |
| 951 | } |
| 952 | } else { ######## 32-bit code |
| 953 | my %opcode = ( |
| 954 | "aesd" => 0xf3b00340, "aese" => 0xf3b00300, |
| 955 | "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); |
| 956 | |
| 957 | local *unaes = sub { |
| 958 | my ($mnemonic,$arg)=@_; |
| 959 | |
| 960 | if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { |
| 961 | my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) |
| 962 | |(($2&7)<<1) |(($2&8)<<2); |
| 963 | # since ARMv7 instructions are always encoded little-endian. |
| 964 | # correct solution is to use .inst directive, but older |
| 965 | # assemblers don't implement it:-( |
| 966 | sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", |
| 967 | $word&0xff,($word>>8)&0xff, |
| 968 | ($word>>16)&0xff,($word>>24)&0xff, |
| 969 | $mnemonic,$arg; |
| 970 | } |
| 971 | }; |
| 972 | |
| 973 | sub unvtbl { |
| 974 | my $arg=shift; |
| 975 | |
| 976 | $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && |
| 977 | sprintf "vtbl.8 d%d,{q%d},d%d\n\t". |
| 978 | "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; |
| 979 | } |
| 980 | |
| 981 | sub unvdup32 { |
| 982 | my $arg=shift; |
| 983 | |
| 984 | $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && |
| 985 | sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; |
| 986 | } |
| 987 | |
| 988 | sub unvmov32 { |
| 989 | my $arg=shift; |
| 990 | |
| 991 | $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && |
| 992 | sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; |
| 993 | } |
| 994 | |
| 995 | foreach(split("\n",$code)) { |
| 996 | s/\`([^\`]*)\`/eval($1)/geo; |
| 997 | |
| 998 | s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers |
| 999 | s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers |
| 1000 | s/\/\/\s?/@ /o; # new->old style commentary |
| 1001 | |
| 1002 | # fix up remaining new-style suffixes |
| 1003 | s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or |
| 1004 | s/\],#[0-9]+/]!/o; |
| 1005 | |
| 1006 | s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or |
| 1007 | s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or |
| 1008 | s/vtbl\.8\s+(.*)/unvtbl($1)/geo or |
| 1009 | s/vdup\.32\s+(.*)/unvdup32($1)/geo or |
| 1010 | s/vmov\.32\s+(.*)/unvmov32($1)/geo or |
| 1011 | s/^(\s+)b\./$1b/o or |
| 1012 | s/^(\s+)mov\./$1mov/o or |
| 1013 | s/^(\s+)ret/$1bx\tlr/o; |
| 1014 | |
| 1015 | print $_,"\n"; |
| 1016 | } |
| 1017 | } |
| 1018 | |
| 1019 | close STDOUT or die "error closing STDOUT: $!"; |