yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame^] | 1 | #!/usr/bin/env perl |
| 2 | # Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved. |
| 3 | # |
| 4 | # Licensed under the OpenSSL license (the "License"). You may not use |
| 5 | # this file except in compliance with the License. You can obtain a copy |
| 6 | # in the file LICENSE in the source distribution or at |
| 7 | # https://www.openssl.org/source/license.html |
| 8 | # |
| 9 | # ==================================================================== |
| 10 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| 11 | # project. The module is, however, dual licensed under OpenSSL and |
| 12 | # CRYPTOGAMS licenses depending on where you obtain it. For further |
| 13 | # details see http://www.openssl.org/~appro/cryptogams/. |
| 14 | # ==================================================================== |
| 15 | # |
| 16 | # [ABI- and endian-neutral] Keccak-1600 for C64x. |
| 17 | # |
| 18 | # June 2017. |
| 19 | # |
| 20 | # This is straightforward KECCAK_1X_ALT variant (see sha/keccak1600.c) |
| 21 | # with bit interleaving. 64-bit values are simply split between A- and |
| 22 | # B-files, with A-file holding least significant halves. This works |
| 23 | # out perfectly, because all operations including cross-communications |
| 24 | # [in rotate operations] are always complementary. Performance is |
| 25 | # [incredible for a 32-bit processor] 10.9 cycles per processed byte |
| 26 | # for r=1088, which corresponds to SHA3-256. This is >15x faster than |
| 27 | # compiler-generated KECCAK_1X_ALT code, and >10x than other variants. |
| 28 | # On average processor ends up issuing ~4.5 instructions per cycle... |
| 29 | |
| 30 | my @A = map([ $_, ($_+1), ($_+2), ($_+3), ($_+4) ], (5,10,16,21,26)); |
| 31 | $A[1][4] = 31; # B14 is reserved, A14 is used as iota[] |
| 32 | ($A[3][0],$A[4][1]) = ($A[4][1],$A[3][0]); |
| 33 | my @C = (0..4,$A[3][0],$A[4][0]); |
| 34 | my $iotas = "A14"; |
| 35 | |
| 36 | my @rhotates = ([ 0, 1, 62, 28, 27 ], |
| 37 | [ 36, 44, 6, 55, 20 ], |
| 38 | [ 3, 10, 43, 25, 39 ], |
| 39 | [ 41, 45, 15, 21, 8 ], |
| 40 | [ 18, 2, 61, 56, 14 ]); |
| 41 | |
| 42 | sub ROL64 { |
| 43 | my ($src,$rot,$dst,$p) = @_; |
| 44 | |
| 45 | if ($rot&1) { |
| 46 | $code.=<<___; |
| 47 | $p ROTL B$src,$rot/2+1,A$dst |
| 48 | || ROTL A$src,$rot/2, B$dst |
| 49 | ___ |
| 50 | } else { |
| 51 | $code.=<<___; |
| 52 | $p ROTL A$src,$rot/2,A$dst |
| 53 | || ROTL B$src,$rot/2,B$dst |
| 54 | ___ |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | ######################################################################## |
| 59 | # Stack frame layout |
| 60 | # |
| 61 | # SP--->+------+------+ |
| 62 | # | | | |
| 63 | # +1--->+------+------+<- -9 below 4 slots are used by KeccakF1600_int |
| 64 | # | | | |
| 65 | # +2--->+------+------+<- -8 |
| 66 | # | | | |
| 67 | # +3--->+------+------+<- -7 |
| 68 | # | A2 | A3 | A3:A2 are preserved by KeccakF1600_int |
| 69 | # +4--->+------+------+<- -6 |
| 70 | # | B2 | B3 | B3:B2 are preserved by KeccakF1600_int |
| 71 | # +5--->+------+------+<- -5 below is ABI-compliant layout |
| 72 | # | A10 | A11 | |
| 73 | # +6--->+------+------+<- -4 |
| 74 | # | A12 | A13 | |
| 75 | # +7--->+------+------+<- -3 |
| 76 | # | A14 | B3 | |
| 77 | # +8--->+------+------+<- -2 |
| 78 | # | B10 | B11 | |
| 79 | # +9--->+------+------+<- -1 |
| 80 | # | B12 | B13 | |
| 81 | # +------+------+<---FP |
| 82 | # | A15 | |
| 83 | # +------+-- |
| 84 | |
| 85 | $code.=<<___; |
| 86 | .text |
| 87 | |
| 88 | .if .ASSEMBLER_VERSION<7000000 |
| 89 | .asg 0,__TI_EABI__ |
| 90 | .endif |
| 91 | .if __TI_EABI__ |
| 92 | .nocmp |
| 93 | .asg KeccakF1600,_KeccakF1600 |
| 94 | .asg SHA3_absorb,_SHA3_absorb |
| 95 | .asg SHA3_squeeze,_SHA3_squeeze |
| 96 | .endif |
| 97 | |
| 98 | .asg B3,RA |
| 99 | .asg A15,FP |
| 100 | .asg B15,SP |
| 101 | |
| 102 | .align 32 |
| 103 | _KeccakF1600_int: |
| 104 | .asmfunc |
| 105 | STDW A3:A2,*FP[-7] |
| 106 | || STDW B3:B2,*SP[4] |
| 107 | _KeccakF1600_cheat: |
| 108 | .if __TI_EABI__ |
| 109 | ADDKPC _KeccakF1600_int,B0 |
| 110 | || MVKL \$PCR_OFFSET(iotas,_KeccakF1600_int),$iotas |
| 111 | MVKH \$PCR_OFFSET(iotas,_KeccakF1600_int),$iotas |
| 112 | .else |
| 113 | ADDKPC _KeccakF1600_int,B0 |
| 114 | || MVKL (iotas-_KeccakF1600_int),$iotas |
| 115 | MVKH (iotas-_KeccakF1600_int),$iotas |
| 116 | .endif |
| 117 | ADD B0,$iotas,$iotas |
| 118 | loop?: |
| 119 | XOR A$A[0][2],A$A[1][2],A$C[2] ; Theta |
| 120 | || XOR B$A[0][2],B$A[1][2],B$C[2] |
| 121 | || XOR A$A[0][3],A$A[1][3],A$C[3] |
| 122 | || XOR B$A[0][3],B$A[1][3],B$C[3] |
| 123 | || XOR A$A[0][0],A$A[1][0],A$C[0] |
| 124 | || XOR B$A[0][0],B$A[1][0],B$C[0] |
| 125 | XOR A$A[2][2],A$C[2],A$C[2] |
| 126 | || XOR B$A[2][2],B$C[2],B$C[2] |
| 127 | || XOR A$A[2][3],A$C[3],A$C[3] |
| 128 | || XOR B$A[2][3],B$C[3],B$C[3] |
| 129 | || XOR A$A[2][0],A$C[0],A$C[0] |
| 130 | || XOR B$A[2][0],B$C[0],B$C[0] |
| 131 | XOR A$A[3][2],A$C[2],A$C[2] |
| 132 | || XOR B$A[3][2],B$C[2],B$C[2] |
| 133 | || XOR A$A[3][3],A$C[3],A$C[3] |
| 134 | || XOR B$A[3][3],B$C[3],B$C[3] |
| 135 | || XOR A$A[3][0],A$C[0],A$C[0] |
| 136 | || XOR B$A[3][0],B$C[0],B$C[0] |
| 137 | XOR A$A[4][2],A$C[2],A$C[2] |
| 138 | || XOR B$A[4][2],B$C[2],B$C[2] |
| 139 | || XOR A$A[4][3],A$C[3],A$C[3] |
| 140 | || XOR B$A[4][3],B$C[3],B$C[3] |
| 141 | || XOR A$A[4][0],A$C[0],A$C[0] |
| 142 | || XOR B$A[4][0],B$C[0],B$C[0] |
| 143 | XOR A$A[0][4],A$A[1][4],A$C[4] |
| 144 | || XOR B$A[0][4],B$A[1][4],B$C[4] |
| 145 | || XOR A$A[0][1],A$A[1][1],A$C[1] |
| 146 | || XOR B$A[0][1],B$A[1][1],B$C[1] |
| 147 | || STDW A$A[3][0]:A$A[4][0],*SP[1] ; offload some data |
| 148 | STDW B$A[3][0]:B$A[4][0],*SP[2] |
| 149 | || XOR A$A[2][4],A$C[4],A$C[4] |
| 150 | || XOR B$A[2][4],B$C[4],B$C[4] |
| 151 | || XOR A$A[2][1],A$C[1],A$C[1] |
| 152 | || XOR B$A[2][1],B$C[1],B$C[1] |
| 153 | || ROTL B$C[2],1,A$C[5] ; ROL64(C[2],1) |
| 154 | || ROTL A$C[2],0,B$C[5] |
| 155 | XOR A$A[3][4],A$C[4],A$C[4] |
| 156 | || XOR B$A[3][4],B$C[4],B$C[4] |
| 157 | || XOR A$A[3][1],A$C[1],A$C[1] |
| 158 | || XOR B$A[3][1],B$C[1],B$C[1] |
| 159 | || ROTL B$C[3],1,A$C[6] ; ROL64(C[3],1) |
| 160 | || ROTL A$C[3],0,B$C[6] |
| 161 | XOR A$A[4][4],A$C[4],A$C[4] |
| 162 | || XOR B$A[4][4],B$C[4],B$C[4] |
| 163 | || XOR A$A[4][1],A$C[1],A$C[1] |
| 164 | || XOR B$A[4][1],B$C[1],B$C[1] |
| 165 | || XOR A$C[0],A$C[5],A$C[5] ; C[0] ^ ROL64(C[2],1) |
| 166 | || XOR B$C[0],B$C[5],B$C[5] |
| 167 | XOR A$C[5],A$A[0][1],A$A[0][1] |
| 168 | || XOR B$C[5],B$A[0][1],B$A[0][1] |
| 169 | || XOR A$C[5],A$A[1][1],A$A[1][1] |
| 170 | || XOR B$C[5],B$A[1][1],B$A[1][1] |
| 171 | || XOR A$C[5],A$A[2][1],A$A[2][1] |
| 172 | || XOR B$C[5],B$A[2][1],B$A[2][1] |
| 173 | XOR A$C[5],A$A[3][1],A$A[3][1] |
| 174 | || XOR B$C[5],B$A[3][1],B$A[3][1] |
| 175 | || XOR A$C[5],A$A[4][1],A$A[4][1] |
| 176 | || XOR B$C[5],B$A[4][1],B$A[4][1] |
| 177 | || ROTL B$C[4],1,A$C[5] ; ROL64(C[4],1) |
| 178 | || ROTL A$C[4],0,B$C[5] |
| 179 | || XOR A$C[1],A$C[6],A$C[6] ; C[1] ^ ROL64(C[3],1) |
| 180 | || XOR B$C[1],B$C[6],B$C[6] |
| 181 | XOR A$C[6],A$A[0][2],A$A[0][2] |
| 182 | || XOR B$C[6],B$A[0][2],B$A[0][2] |
| 183 | || XOR A$C[6],A$A[1][2],A$A[1][2] |
| 184 | || XOR B$C[6],B$A[1][2],B$A[1][2] |
| 185 | || XOR A$C[6],A$A[2][2],A$A[2][2] |
| 186 | || XOR B$C[6],B$A[2][2],B$A[2][2] |
| 187 | || ROTL B$C[1],1,A$C[1] ; ROL64(C[1],1) |
| 188 | || ROTL A$C[1],0,B$C[1] |
| 189 | XOR A$C[6],A$A[3][2],A$A[3][2] |
| 190 | || XOR B$C[6],B$A[3][2],B$A[3][2] |
| 191 | || XOR A$C[6],A$A[4][2],A$A[4][2] |
| 192 | || XOR B$C[6],B$A[4][2],B$A[4][2] |
| 193 | || ROTL B$C[0],1,A$C[6] ; ROL64(C[0],1) |
| 194 | || ROTL A$C[0],0,B$C[6] |
| 195 | || XOR A$C[5],A$C[2],A$C[2] ; C[2] ^= ROL64(C[4],1) |
| 196 | || XOR B$C[5],B$C[2],B$C[2] |
| 197 | XOR A$C[2],A$A[0][3],A$A[0][3] |
| 198 | || XOR B$C[2],B$A[0][3],B$A[0][3] |
| 199 | || XOR A$C[2],A$A[1][3],A$A[1][3] |
| 200 | || XOR B$C[2],B$A[1][3],B$A[1][3] |
| 201 | || XOR A$C[2],A$A[2][3],A$A[2][3] |
| 202 | || XOR B$C[2],B$A[2][3],B$A[2][3] |
| 203 | XOR A$C[6],A$C[3],A$C[3] ; C[3] ^= ROL64(C[0],1) |
| 204 | || XOR B$C[6],B$C[3],B$C[3] |
| 205 | || LDDW *FP[-9],A$A[3][0]:A$A[4][0] ; restore offloaded data |
| 206 | || LDDW *SP[2],B$A[3][0]:B$A[4][0] |
| 207 | || XOR A$C[2],A$A[3][3],A$A[3][3] |
| 208 | || XOR B$C[2],B$A[3][3],B$A[3][3] |
| 209 | XOR A$C[2],A$A[4][3],A$A[4][3] |
| 210 | || XOR B$C[2],B$A[4][3],B$A[4][3] |
| 211 | || XOR A$C[3],A$A[0][4],A$A[0][4] |
| 212 | || XOR B$C[3],B$A[0][4],B$A[0][4] |
| 213 | || XOR A$C[3],A$A[1][4],A$A[1][4] |
| 214 | || XOR B$C[3],B$A[1][4],B$A[1][4] |
| 215 | XOR A$C[3],A$A[2][4],A$A[2][4] |
| 216 | || XOR B$C[3],B$A[2][4],B$A[2][4] |
| 217 | || XOR A$C[3],A$A[3][4],A$A[3][4] |
| 218 | || XOR B$C[3],B$A[3][4],B$A[3][4] |
| 219 | || XOR A$C[3],A$A[4][4],A$A[4][4] |
| 220 | || XOR B$C[3],B$A[4][4],B$A[4][4] |
| 221 | XOR A$C[1],A$C[4],A$C[4] ; C[4] ^= ROL64(C[1],1) |
| 222 | || XOR B$C[1],B$C[4],B$C[4] |
| 223 | || MV A$A[0][1],A$C[1] ; Rho+Pi, "early start" |
| 224 | || MV B$A[0][1],B$C[1] |
| 225 | ___ |
| 226 | &ROL64 ($A[1][1],$rhotates[1][1],$A[0][1],"||"); |
| 227 | $code.=<<___; |
| 228 | XOR A$C[4],A$A[0][0],A$A[0][0] |
| 229 | || XOR B$C[4],B$A[0][0],B$A[0][0] |
| 230 | || XOR A$C[4],A$A[1][0],A$A[1][0] |
| 231 | || XOR B$C[4],B$A[1][0],B$A[1][0] |
| 232 | || MV A$A[0][3],A$C[3] |
| 233 | || MV B$A[0][3],B$C[3] |
| 234 | ___ |
| 235 | &ROL64 ($A[3][3],$rhotates[3][3],$A[0][3],"||"); |
| 236 | $code.=<<___; |
| 237 | XOR A$C[4],A$A[2][0],A$A[2][0] |
| 238 | || XOR B$C[4],B$A[2][0],B$A[2][0] |
| 239 | || XOR A$C[4],A$A[3][0],A$A[3][0] |
| 240 | || XOR B$C[4],B$A[3][0],B$A[3][0] |
| 241 | || MV A$A[0][2],A$C[2] |
| 242 | || MV B$A[0][2],B$C[2] |
| 243 | ___ |
| 244 | &ROL64 ($A[2][2],$rhotates[2][2],$A[0][2],"||"); |
| 245 | $code.=<<___; |
| 246 | XOR A$C[4],A$A[4][0],A$A[4][0] |
| 247 | || XOR B$C[4],B$A[4][0],B$A[4][0] |
| 248 | || MV A$A[0][4],A$C[4] |
| 249 | || MV B$A[0][4],B$C[4] |
| 250 | ___ |
| 251 | &ROL64 ($A[4][4],$rhotates[4][4],$A[0][4],"||"); |
| 252 | |
| 253 | &ROL64 ($A[1][4],$rhotates[1][4],$A[1][1]); |
| 254 | $code.=<<___; |
| 255 | || LDW *${iotas}++[2],A$C[0] |
| 256 | ___ |
| 257 | &ROL64 ($A[2][3],$rhotates[2][3],$A[2][2]); |
| 258 | $code.=<<___; |
| 259 | || LDW *${iotas}[-1],B$C[0] |
| 260 | ___ |
| 261 | &ROL64 ($A[3][2],$rhotates[3][2],$A[3][3]); |
| 262 | &ROL64 ($A[4][1],$rhotates[4][1],$A[4][4]); |
| 263 | |
| 264 | &ROL64 ($A[4][2],$rhotates[4][2],$A[1][4]); |
| 265 | &ROL64 ($A[3][4],$rhotates[3][4],$A[2][3]); |
| 266 | &ROL64 ($A[2][1],$rhotates[2][1],$A[3][2]); |
| 267 | &ROL64 ($A[1][3],$rhotates[1][3],$A[4][1]); |
| 268 | |
| 269 | &ROL64 ($A[2][4],$rhotates[2][4],$A[4][2]); |
| 270 | &ROL64 ($A[4][3],$rhotates[4][3],$A[3][4]); |
| 271 | &ROL64 ($A[1][2],$rhotates[1][2],$A[2][1]); |
| 272 | &ROL64 ($A[3][1],$rhotates[3][1],$A[1][3]); |
| 273 | |
| 274 | &ROL64 ($A[4][0],$rhotates[4][0],$A[2][4]); |
| 275 | &ROL64 ($A[3][0],$rhotates[3][0],$A[4][3]); |
| 276 | &ROL64 ($A[2][0],$rhotates[2][0],$A[1][2]); |
| 277 | &ROL64 ($A[1][0],$rhotates[1][0],$A[3][1]); |
| 278 | |
| 279 | #&ROL64 ($C[3], $rhotates[0][3],$A[1][0]); # moved below |
| 280 | &ROL64 ($C[1], $rhotates[0][1],$A[2][0]); |
| 281 | &ROL64 ($C[4], $rhotates[0][4],$A[3][0]); |
| 282 | &ROL64 ($C[2], $rhotates[0][2],$A[4][0]); |
| 283 | $code.=<<___; |
| 284 | || ANDN A$A[0][2],A$A[0][1],A$C[4] ; Chi+Iota |
| 285 | || ANDN B$A[0][2],B$A[0][1],B$C[4] |
| 286 | || ANDN A$A[0][3],A$A[0][2],A$C[1] |
| 287 | || ANDN B$A[0][3],B$A[0][2],B$C[1] |
| 288 | || ANDN A$A[0][4],A$A[0][3],A$C[2] |
| 289 | || ANDN B$A[0][4],B$A[0][3],B$C[2] |
| 290 | ___ |
| 291 | &ROL64 ($C[3], $rhotates[0][3],$A[1][0]); |
| 292 | $code.=<<___; |
| 293 | || ANDN A$A[0][0],A$A[0][4],A$C[3] |
| 294 | || ANDN B$A[0][0],B$A[0][4],B$C[3] |
| 295 | || XOR A$C[4],A$A[0][0],A$A[0][0] |
| 296 | || XOR B$C[4],B$A[0][0],B$A[0][0] |
| 297 | || ANDN A$A[0][1],A$A[0][0],A$C[4] |
| 298 | || ANDN B$A[0][1],B$A[0][0],B$C[4] |
| 299 | XOR A$C[1],A$A[0][1],A$A[0][1] |
| 300 | || XOR B$C[1],B$A[0][1],B$A[0][1] |
| 301 | || XOR A$C[2],A$A[0][2],A$A[0][2] |
| 302 | || XOR B$C[2],B$A[0][2],B$A[0][2] |
| 303 | || XOR A$C[3],A$A[0][3],A$A[0][3] |
| 304 | || XOR B$C[3],B$A[0][3],B$A[0][3] |
| 305 | XOR A$C[4],A$A[0][4],A$A[0][4] |
| 306 | || XOR B$C[4],B$A[0][4],B$A[0][4] |
| 307 | || XOR A$C[0],A$A[0][0],A$A[0][0] ; A[0][0] ^= iotas[i++]; |
| 308 | || XOR B$C[0],B$A[0][0],B$A[0][0] |
| 309 | || EXTU $iotas,24,24,A0 ; A0 is A$C[0], as we done? |
| 310 | |
| 311 | ANDN A$A[1][2],A$A[1][1],A$C[4] |
| 312 | || ANDN B$A[1][2],B$A[1][1],B$C[4] |
| 313 | || ANDN A$A[1][3],A$A[1][2],A$C[1] |
| 314 | || ANDN B$A[1][3],B$A[1][2],B$C[1] |
| 315 | || ANDN A$A[1][4],A$A[1][3],A$C[2] |
| 316 | || ANDN B$A[1][4],B$A[1][3],B$C[2] |
| 317 | ANDN A$A[1][0],A$A[1][4],A$C[3] |
| 318 | || ANDN B$A[1][0],B$A[1][4],B$C[3] |
| 319 | || XOR A$C[4],A$A[1][0],A$A[1][0] |
| 320 | || XOR B$C[4],B$A[1][0],B$A[1][0] |
| 321 | || ANDN A$A[1][1],A$A[1][0],A$C[4] |
| 322 | || ANDN B$A[1][1],B$A[1][0],B$C[4] |
| 323 | XOR A$C[1],A$A[1][1],A$A[1][1] |
| 324 | || XOR B$C[1],B$A[1][1],B$A[1][1] |
| 325 | || XOR A$C[2],A$A[1][2],A$A[1][2] |
| 326 | || XOR B$C[2],B$A[1][2],B$A[1][2] |
| 327 | || XOR A$C[3],A$A[1][3],A$A[1][3] |
| 328 | || XOR B$C[3],B$A[1][3],B$A[1][3] |
| 329 | XOR A$C[4],A$A[1][4],A$A[1][4] |
| 330 | || XOR B$C[4],B$A[1][4],B$A[1][4] |
| 331 | |
| 332 | || ANDN A$A[2][2],A$A[2][1],A$C[4] |
| 333 | || ANDN B$A[2][2],B$A[2][1],B$C[4] |
| 334 | || ANDN A$A[2][3],A$A[2][2],A$C[1] |
| 335 | || ANDN B$A[2][3],B$A[2][2],B$C[1] |
| 336 | ANDN A$A[2][4],A$A[2][3],A$C[2] |
| 337 | || ANDN B$A[2][4],B$A[2][3],B$C[2] |
| 338 | || ANDN A$A[2][0],A$A[2][4],A$C[3] |
| 339 | || ANDN B$A[2][0],B$A[2][4],B$C[3] |
| 340 | || XOR A$C[4],A$A[2][0],A$A[2][0] |
| 341 | || XOR B$C[4],B$A[2][0],B$A[2][0] |
| 342 | ANDN A$A[2][1],A$A[2][0],A$C[4] |
| 343 | || ANDN B$A[2][1],B$A[2][0],B$C[4] |
| 344 | || XOR A$C[1],A$A[2][1],A$A[2][1] |
| 345 | || XOR B$C[1],B$A[2][1],B$A[2][1] |
| 346 | || XOR A$C[2],A$A[2][2],A$A[2][2] |
| 347 | || XOR B$C[2],B$A[2][2],B$A[2][2] |
| 348 | XOR A$C[3],A$A[2][3],A$A[2][3] |
| 349 | || XOR B$C[3],B$A[2][3],B$A[2][3] |
| 350 | || XOR A$C[4],A$A[2][4],A$A[2][4] |
| 351 | || XOR B$C[4],B$A[2][4],B$A[2][4] |
| 352 | |
| 353 | ANDN A$A[3][2],A$A[3][1],A$C[4] |
| 354 | || ANDN B$A[3][2],B$A[3][1],B$C[4] |
| 355 | || ANDN A$A[3][3],A$A[3][2],A$C[1] |
| 356 | || ANDN B$A[3][3],B$A[3][2],B$C[1] |
| 357 | || ANDN A$A[3][4],A$A[3][3],A$C[2] |
| 358 | || ANDN B$A[3][4],B$A[3][3],B$C[2] |
| 359 | ANDN A$A[3][0],A$A[3][4],A$C[3] |
| 360 | || ANDN B$A[3][0],B$A[3][4],B$C[3] |
| 361 | || XOR A$C[4],A$A[3][0],A$A[3][0] |
| 362 | || XOR B$C[4],B$A[3][0],B$A[3][0] |
| 363 | || ANDN A$A[3][1],A$A[3][0],A$C[4] |
| 364 | || ANDN B$A[3][1],B$A[3][0],B$C[4] |
| 365 | XOR A$C[1],A$A[3][1],A$A[3][1] |
| 366 | || XOR B$C[1],B$A[3][1],B$A[3][1] |
| 367 | || XOR A$C[2],A$A[3][2],A$A[3][2] |
| 368 | || XOR B$C[2],B$A[3][2],B$A[3][2] |
| 369 | || XOR A$C[3],A$A[3][3],A$A[3][3] |
| 370 | ||[A0] BNOP loop? |
| 371 | XOR B$C[3],B$A[3][3],B$A[3][3] |
| 372 | || XOR A$C[4],A$A[3][4],A$A[3][4] |
| 373 | || XOR B$C[4],B$A[3][4],B$A[3][4] |
| 374 | ||[!A0] LDDW *FP[-7],A3:A2 |
| 375 | ||[!A0] LDDW *SP[4], RA:B2 |
| 376 | |
| 377 | ANDN A$A[4][2],A$A[4][1],A$C[4] |
| 378 | || ANDN B$A[4][2],B$A[4][1],B$C[4] |
| 379 | || ANDN A$A[4][3],A$A[4][2],A$C[1] |
| 380 | || ANDN B$A[4][3],B$A[4][2],B$C[1] |
| 381 | || ANDN A$A[4][4],A$A[4][3],A$C[2] |
| 382 | || ANDN B$A[4][4],B$A[4][3],B$C[2] |
| 383 | ANDN A$A[4][0],A$A[4][4],A$C[3] |
| 384 | || ANDN B$A[4][0],B$A[4][4],B$C[3] |
| 385 | || XOR A$C[4],A$A[4][0],A$A[4][0] |
| 386 | || XOR B$C[4],B$A[4][0],B$A[4][0] |
| 387 | || ANDN A$A[4][1],A$A[4][0],A$C[4] |
| 388 | || ANDN B$A[4][1],B$A[4][0],B$C[4] |
| 389 | XOR A$C[1],A$A[4][1],A$A[4][1] |
| 390 | || XOR B$C[1],B$A[4][1],B$A[4][1] |
| 391 | || XOR A$C[2],A$A[4][2],A$A[4][2] |
| 392 | || XOR B$C[2],B$A[4][2],B$A[4][2] |
| 393 | || XOR A$C[3],A$A[4][3],A$A[4][3] |
| 394 | || XOR B$C[3],B$A[4][3],B$A[4][3] |
| 395 | XOR A$C[4],A$A[4][4],A$A[4][4] |
| 396 | || XOR B$C[4],B$A[4][4],B$A[4][4] |
| 397 | ;;===== branch to loop? is taken here |
| 398 | |
| 399 | BNOP RA,5 |
| 400 | .endasmfunc |
| 401 | |
| 402 | .newblock |
| 403 | .global _KeccakF1600 |
| 404 | .align 32 |
| 405 | _KeccakF1600: |
| 406 | .asmfunc stack_usage(80) |
| 407 | STW FP,*SP--(80) ; save frame pointer |
| 408 | || MV SP,FP |
| 409 | STDW B13:B12,*SP[9] |
| 410 | || STDW A13:A12,*FP[-4] |
| 411 | STDW B11:B10,*SP[8] |
| 412 | || STDW A11:A10,*FP[-5] |
| 413 | STW RA, *SP[15] |
| 414 | || STW A14,*FP[-6] |
| 415 | || MV A4,A2 |
| 416 | || ADD 4,A4,B2 |
| 417 | |
| 418 | LDW *A2++[2],A$A[0][0] ; load A[5][5] |
| 419 | || LDW *B2++[2],B$A[0][0] |
| 420 | LDW *A2++[2],A$A[0][1] |
| 421 | || LDW *B2++[2],B$A[0][1] |
| 422 | LDW *A2++[2],A$A[0][2] |
| 423 | || LDW *B2++[2],B$A[0][2] |
| 424 | LDW *A2++[2],A$A[0][3] |
| 425 | || LDW *B2++[2],B$A[0][3] |
| 426 | LDW *A2++[2],A$A[0][4] |
| 427 | || LDW *B2++[2],B$A[0][4] |
| 428 | |
| 429 | LDW *A2++[2],A$A[1][0] |
| 430 | || LDW *B2++[2],B$A[1][0] |
| 431 | LDW *A2++[2],A$A[1][1] |
| 432 | || LDW *B2++[2],B$A[1][1] |
| 433 | LDW *A2++[2],A$A[1][2] |
| 434 | || LDW *B2++[2],B$A[1][2] |
| 435 | LDW *A2++[2],A$A[1][3] |
| 436 | || LDW *B2++[2],B$A[1][3] |
| 437 | LDW *A2++[2],A$A[1][4] |
| 438 | || LDW *B2++[2],B$A[1][4] |
| 439 | |
| 440 | LDW *A2++[2],A$A[2][0] |
| 441 | || LDW *B2++[2],B$A[2][0] |
| 442 | LDW *A2++[2],A$A[2][1] |
| 443 | || LDW *B2++[2],B$A[2][1] |
| 444 | LDW *A2++[2],A$A[2][2] |
| 445 | || LDW *B2++[2],B$A[2][2] |
| 446 | LDW *A2++[2],A$A[2][3] |
| 447 | || LDW *B2++[2],B$A[2][3] |
| 448 | LDW *A2++[2],A$A[2][4] |
| 449 | || LDW *B2++[2],B$A[2][4] |
| 450 | |
| 451 | LDW *A2++[2],A$A[3][0] |
| 452 | || LDW *B2++[2],B$A[3][0] |
| 453 | LDW *A2++[2],A$A[3][1] |
| 454 | || LDW *B2++[2],B$A[3][1] |
| 455 | LDW *A2++[2],A$A[3][2] |
| 456 | || LDW *B2++[2],B$A[3][2] |
| 457 | LDW *A2++[2],A$A[3][3] |
| 458 | || LDW *B2++[2],B$A[3][3] |
| 459 | LDW *A2++[2],A$A[3][4] |
| 460 | || LDW *B2++[2],B$A[3][4] |
| 461 | || BNOP _KeccakF1600_int |
| 462 | |
| 463 | ADDKPC ret?,RA |
| 464 | || LDW *A2++[2],A$A[4][0] |
| 465 | || LDW *B2++[2],B$A[4][0] |
| 466 | LDW *A2++[2],A$A[4][1] |
| 467 | || LDW *B2++[2],B$A[4][1] |
| 468 | LDW *A2++[2],A$A[4][2] |
| 469 | || LDW *B2++[2],B$A[4][2] |
| 470 | LDW *A2++[2],A$A[4][3] |
| 471 | || LDW *B2++[2],B$A[4][3] |
| 472 | LDW *A2,A$A[4][4] |
| 473 | || LDW *B2,B$A[4][4] |
| 474 | || ADDK -192,A2 ; rewind |
| 475 | || ADDK -192,B2 |
| 476 | |
| 477 | .align 16 |
| 478 | ret?: |
| 479 | STW A$A[0][0],*A2++[2] ; store A[5][5] |
| 480 | || STW B$A[0][0],*B2++[2] |
| 481 | STW A$A[0][1],*A2++[2] |
| 482 | || STW B$A[0][1],*B2++[2] |
| 483 | STW A$A[0][2],*A2++[2] |
| 484 | || STW B$A[0][2],*B2++[2] |
| 485 | STW A$A[0][3],*A2++[2] |
| 486 | || STW B$A[0][3],*B2++[2] |
| 487 | STW A$A[0][4],*A2++[2] |
| 488 | || STW B$A[0][4],*B2++[2] |
| 489 | |
| 490 | STW A$A[1][0],*A2++[2] |
| 491 | || STW B$A[1][0],*B2++[2] |
| 492 | STW A$A[1][1],*A2++[2] |
| 493 | || STW B$A[1][1],*B2++[2] |
| 494 | STW A$A[1][2],*A2++[2] |
| 495 | || STW B$A[1][2],*B2++[2] |
| 496 | STW A$A[1][3],*A2++[2] |
| 497 | || STW B$A[1][3],*B2++[2] |
| 498 | STW A$A[1][4],*A2++[2] |
| 499 | || STW B$A[1][4],*B2++[2] |
| 500 | |
| 501 | STW A$A[2][0],*A2++[2] |
| 502 | || STW B$A[2][0],*B2++[2] |
| 503 | STW A$A[2][1],*A2++[2] |
| 504 | || STW B$A[2][1],*B2++[2] |
| 505 | STW A$A[2][2],*A2++[2] |
| 506 | || STW B$A[2][2],*B2++[2] |
| 507 | STW A$A[2][3],*A2++[2] |
| 508 | || STW B$A[2][3],*B2++[2] |
| 509 | STW A$A[2][4],*A2++[2] |
| 510 | || STW B$A[2][4],*B2++[2] |
| 511 | |
| 512 | STW A$A[3][0],*A2++[2] |
| 513 | || STW B$A[3][0],*B2++[2] |
| 514 | STW A$A[3][1],*A2++[2] |
| 515 | || STW B$A[3][1],*B2++[2] |
| 516 | STW A$A[3][2],*A2++[2] |
| 517 | || STW B$A[3][2],*B2++[2] |
| 518 | STW A$A[3][3],*A2++[2] |
| 519 | || STW B$A[3][3],*B2++[2] |
| 520 | STW A$A[3][4],*A2++[2] |
| 521 | || STW B$A[3][4],*B2++[2] |
| 522 | |
| 523 | LDW *SP[15],RA |
| 524 | || LDW *FP[-6],A14 |
| 525 | |
| 526 | STW A$A[4][0],*A2++[2] |
| 527 | || STW B$A[4][0],*B2++[2] |
| 528 | STW A$A[4][1],*A2++[2] |
| 529 | || STW B$A[4][1],*B2++[2] |
| 530 | STW A$A[4][2],*A2++[2] |
| 531 | || STW B$A[4][2],*B2++[2] |
| 532 | STW A$A[4][3],*A2++[2] |
| 533 | || STW B$A[4][3],*B2++[2] |
| 534 | STW A$A[4][4],*A2 |
| 535 | || STW B$A[4][4],*B2 |
| 536 | || ADDK -192,A2 ; rewind |
| 537 | |
| 538 | MV A2,A4 ; return original A4 |
| 539 | || LDDW *SP[8], B11:B10 |
| 540 | || LDDW *FP[-5],A11:A10 |
| 541 | LDDW *SP[9], B13:B12 |
| 542 | || LDDW *FP[-4],A13:A12 |
| 543 | || BNOP RA |
| 544 | LDW *++SP(80),FP ; restore frame pointer |
| 545 | NOP 4 ; wait till FP is committed |
| 546 | .endasmfunc |
| 547 | |
| 548 | .newblock |
| 549 | .asg B2,BSZ |
| 550 | .asg A2,INP |
| 551 | .asg A3,LEN |
| 552 | .global _SHA3_absorb |
| 553 | .align 32 |
| 554 | _SHA3_absorb: |
| 555 | .asmfunc stack_usage(80) |
| 556 | STW FP,*SP--(80) ; save frame pointer |
| 557 | || MV SP,FP |
| 558 | STDW B13:B12,*SP[9] |
| 559 | || STDW A13:A12,*FP[-4] |
| 560 | STDW B11:B10,*SP[8] |
| 561 | || STDW A11:A10,*FP[-5] |
| 562 | STW RA, *SP[15] |
| 563 | || STW A14,*FP[-6] |
| 564 | |
| 565 | STW A4,*SP[1] ; save A[][] |
| 566 | || MV B4,INP ; reassign arguments |
| 567 | || MV A6,LEN |
| 568 | || MV B6,BSZ |
| 569 | || ADD 4,A4,B4 |
| 570 | |
| 571 | LDW *A4++[2],A$A[0][0] ; load A[5][5] |
| 572 | || LDW *B4++[2],B$A[0][0] |
| 573 | LDW *A4++[2],A$A[0][1] |
| 574 | || LDW *B4++[2],B$A[0][1] |
| 575 | LDW *A4++[2],A$A[0][2] |
| 576 | || LDW *B4++[2],B$A[0][2] |
| 577 | LDW *A4++[2],A$A[0][3] |
| 578 | || LDW *B4++[2],B$A[0][3] |
| 579 | LDW *A4++[2],A$A[0][4] |
| 580 | || LDW *B4++[2],B$A[0][4] |
| 581 | |
| 582 | LDW *A4++[2],A$A[1][0] |
| 583 | || LDW *B4++[2],B$A[1][0] |
| 584 | LDW *A4++[2],A$A[1][1] |
| 585 | || LDW *B4++[2],B$A[1][1] |
| 586 | LDW *A4++[2],A$A[1][2] |
| 587 | || LDW *B4++[2],B$A[1][2] |
| 588 | LDW *A4++[2],A$A[1][3] |
| 589 | || LDW *B4++[2],B$A[1][3] |
| 590 | LDW *A4++[2],A$A[1][4] |
| 591 | || LDW *B4++[2],B$A[1][4] |
| 592 | |
| 593 | LDW *A4++[2],A$A[2][0] |
| 594 | || LDW *B4++[2],B$A[2][0] |
| 595 | LDW *A4++[2],A$A[2][1] |
| 596 | || LDW *B4++[2],B$A[2][1] |
| 597 | LDW *A4++[2],A$A[2][2] |
| 598 | || LDW *B4++[2],B$A[2][2] |
| 599 | LDW *A4++[2],A$A[2][3] |
| 600 | || LDW *B4++[2],B$A[2][3] |
| 601 | LDW *A4++[2],A$A[2][4] |
| 602 | || LDW *B4++[2],B$A[2][4] |
| 603 | |
| 604 | LDW *A4++[2],A$A[3][0] |
| 605 | || LDW *B4++[2],B$A[3][0] |
| 606 | LDW *A4++[2],A$A[3][1] |
| 607 | || LDW *B4++[2],B$A[3][1] |
| 608 | LDW *A4++[2],A$A[3][2] |
| 609 | || LDW *B4++[2],B$A[3][2] |
| 610 | LDW *A4++[2],A$A[3][3] |
| 611 | || LDW *B4++[2],B$A[3][3] |
| 612 | LDW *A4++[2],A$A[3][4] |
| 613 | || LDW *B4++[2],B$A[3][4] |
| 614 | |
| 615 | LDW *A4++[2],A$A[4][0] |
| 616 | || LDW *B4++[2],B$A[4][0] |
| 617 | LDW *A4++[2],A$A[4][1] |
| 618 | || LDW *B4++[2],B$A[4][1] |
| 619 | LDW *A4++[2],A$A[4][2] |
| 620 | || LDW *B4++[2],B$A[4][2] |
| 621 | LDW *A4++[2],A$A[4][3] |
| 622 | || LDW *B4++[2],B$A[4][3] |
| 623 | LDW *A4,A$A[4][4] |
| 624 | || LDW *B4,B$A[4][4] |
| 625 | || ADDKPC loop?,RA |
| 626 | STDW RA:BSZ,*SP[4] |
| 627 | |
| 628 | loop?: |
| 629 | CMPLTU LEN,BSZ,A0 ; len < bsz? |
| 630 | || SHRU BSZ,3,BSZ |
| 631 | [A0] BNOP ret? |
| 632 | ||[A0] ZERO BSZ |
| 633 | ||[A0] LDW *SP[1],A2 ; pull A[][] |
| 634 | [BSZ] LDNDW *INP++,A1:A0 |
| 635 | ||[BSZ] SUB LEN,8,LEN |
| 636 | ||[BSZ] SUB BSZ,1,BSZ |
| 637 | NOP 4 |
| 638 | ___ |
| 639 | for ($y = 0; $y < 5; $y++) { |
| 640 | for ($x = 0; $x < ($y<4 ? 5 : 4); $x++) { |
| 641 | $code.=<<___; |
| 642 | .if .BIG_ENDIAN |
| 643 | SWAP2 A0,A1 |
| 644 | || SWAP2 A1,A0 |
| 645 | SWAP4 A0,A0 |
| 646 | SWAP4 A1,A1 |
| 647 | ||[!BSZ]BNOP _KeccakF1600_cheat |
| 648 | ||[!BSZ]STDW LEN:INP,*SP[3] |
| 649 | || DEAL A0,A0 |
| 650 | .else |
| 651 | [!BSZ]BNOP _KeccakF1600_cheat |
| 652 | ||[!BSZ]STDW LEN:INP,*SP[3] |
| 653 | || DEAL A0,A0 |
| 654 | .endif |
| 655 | [BSZ] LDNDW *INP++,A1:A0 |
| 656 | || DEAL A1,A1 |
| 657 | [BSZ] SUB LEN,8,LEN |
| 658 | ||[BSZ] SUB BSZ,1,BSZ |
| 659 | PACK2 A1,A0,A0 |
| 660 | || PACKH2 A1,A0,A1 |
| 661 | XOR A0,A$A[$y][$x],A$A[$y][$x] |
| 662 | XOR A1,B$A[$y][$x],B$A[$y][$x] |
| 663 | ___ |
| 664 | } |
| 665 | } |
| 666 | $code.=<<___; |
| 667 | .if .BIG_ENDIAN |
| 668 | SWAP2 A0,A1 |
| 669 | || SWAP2 A1,A0 |
| 670 | SWAP4 A0,A0 |
| 671 | SWAP4 A1,A1 |
| 672 | .endif |
| 673 | BNOP _KeccakF1600_cheat |
| 674 | || STDW LEN:INP,*SP[3] |
| 675 | || DEAL A0,A0 |
| 676 | DEAL A1,A1 |
| 677 | NOP |
| 678 | PACK2 A1,A0,A0 |
| 679 | || PACKH2 A1,A0,A1 |
| 680 | XOR A0,A$A[4][4],A$A[4][4] |
| 681 | XOR A1,B$A[4][4],B$A[4][4] |
| 682 | |
| 683 | .align 16 |
| 684 | ret?: |
| 685 | MV LEN,A4 ; return value |
| 686 | || ADD 4,A2,B2 |
| 687 | |
| 688 | STW A$A[0][0],*A2++[2] ; store A[5][5] |
| 689 | || STW B$A[0][0],*B2++[2] |
| 690 | STW A$A[0][1],*A2++[2] |
| 691 | || STW B$A[0][1],*B2++[2] |
| 692 | STW A$A[0][2],*A2++[2] |
| 693 | || STW B$A[0][2],*B2++[2] |
| 694 | STW A$A[0][3],*A2++[2] |
| 695 | || STW B$A[0][3],*B2++[2] |
| 696 | STW A$A[0][4],*A2++[2] |
| 697 | || STW B$A[0][4],*B2++[2] |
| 698 | |
| 699 | STW A$A[1][0],*A2++[2] |
| 700 | || STW B$A[1][0],*B2++[2] |
| 701 | STW A$A[1][1],*A2++[2] |
| 702 | || STW B$A[1][1],*B2++[2] |
| 703 | STW A$A[1][2],*A2++[2] |
| 704 | || STW B$A[1][2],*B2++[2] |
| 705 | STW A$A[1][3],*A2++[2] |
| 706 | || STW B$A[1][3],*B2++[2] |
| 707 | STW A$A[1][4],*A2++[2] |
| 708 | || STW B$A[1][4],*B2++[2] |
| 709 | |
| 710 | STW A$A[2][0],*A2++[2] |
| 711 | || STW B$A[2][0],*B2++[2] |
| 712 | STW A$A[2][1],*A2++[2] |
| 713 | || STW B$A[2][1],*B2++[2] |
| 714 | STW A$A[2][2],*A2++[2] |
| 715 | || STW B$A[2][2],*B2++[2] |
| 716 | STW A$A[2][3],*A2++[2] |
| 717 | || STW B$A[2][3],*B2++[2] |
| 718 | STW A$A[2][4],*A2++[2] |
| 719 | || STW B$A[2][4],*B2++[2] |
| 720 | |
| 721 | LDW *SP[15],RA |
| 722 | || LDW *FP[-6],A14 |
| 723 | |
| 724 | STW A$A[3][0],*A2++[2] |
| 725 | || STW B$A[3][0],*B2++[2] |
| 726 | STW A$A[3][1],*A2++[2] |
| 727 | || STW B$A[3][1],*B2++[2] |
| 728 | STW A$A[3][2],*A2++[2] |
| 729 | || STW B$A[3][2],*B2++[2] |
| 730 | STW A$A[3][3],*A2++[2] |
| 731 | || STW B$A[3][3],*B2++[2] |
| 732 | STW A$A[3][4],*A2++[2] |
| 733 | || STW B$A[3][4],*B2++[2] |
| 734 | |
| 735 | LDDW *SP[8], B11:B10 |
| 736 | || LDDW *FP[-5],A11:A10 |
| 737 | LDDW *SP[9], B13:B12 |
| 738 | || LDDW *FP[-4],A13:A12 |
| 739 | BNOP RA |
| 740 | || LDW *++SP(80),FP ; restore frame pointer |
| 741 | |
| 742 | STW A$A[4][0],*A2++[2] |
| 743 | || STW B$A[4][0],*B2++[2] |
| 744 | STW A$A[4][1],*A2++[2] |
| 745 | || STW B$A[4][1],*B2++[2] |
| 746 | STW A$A[4][2],*A2++[2] |
| 747 | || STW B$A[4][2],*B2++[2] |
| 748 | STW A$A[4][3],*A2++[2] |
| 749 | || STW B$A[4][3],*B2++[2] |
| 750 | STW A$A[4][4],*A2++[2] |
| 751 | || STW B$A[4][4],*B2++[2] |
| 752 | .endasmfunc |
| 753 | |
| 754 | .newblock |
| 755 | .global _SHA3_squeeze |
| 756 | .asg A12,OUT |
| 757 | .asg A13,LEN |
| 758 | .asg A14,BSZ |
| 759 | .align 32 |
| 760 | _SHA3_squeeze: |
| 761 | .asmfunc stack_usage(24) |
| 762 | STW FP,*SP--(24) ; save frame pointer |
| 763 | || MV SP,FP |
| 764 | STW RA, *SP[5] |
| 765 | || STW A14,*FP[-2] |
| 766 | STDW A13:A12,*FP[-2] |
| 767 | || MV B4,OUT ; reassign arguments |
| 768 | MV A6,LEN |
| 769 | || MV B6,BSZ |
| 770 | |
| 771 | loop?: |
| 772 | LDW *SP[5],RA ; reload RA |
| 773 | || SHRU BSZ,3,A1 |
| 774 | || MV A4,A8 |
| 775 | || ADD 4,A4,B8 |
| 776 | block?: |
| 777 | CMPLTU LEN,8,A0 ; len < 8? |
| 778 | [A0] BNOP tail? |
| 779 | LDW *A8++[2],A9 |
| 780 | || LDW *B8++[2],B9 |
| 781 | || SUB LEN,8,LEN ; len -= 8 |
| 782 | MV LEN,A0 |
| 783 | || SUB A1,1,A1 ; bsz-- |
| 784 | || NOP 4 |
| 785 | .if .BIG_ENDIAN |
| 786 | SWAP4 A9,A9 |
| 787 | || SWAP4 B9,B9 |
| 788 | SWAP2 A9,A9 |
| 789 | || SWAP2 B9,B9 |
| 790 | .endif |
| 791 | [!A0] BNOP ret? |
| 792 | ||[!A0] ZERO A1 |
| 793 | PACK2 B9,A9,B7 |
| 794 | ||[A1] BNOP block? |
| 795 | PACKH2 B9,A9,B9 |
| 796 | || SHFL B7,B7 |
| 797 | SHFL B9,B9 |
| 798 | STNW B7,*OUT++ |
| 799 | STNW B9,*OUT++ |
| 800 | NOP |
| 801 | |
| 802 | BNOP _KeccakF1600,4 |
| 803 | ADDKPC loop?,RA |
| 804 | |
| 805 | .align 16 |
| 806 | tail?: |
| 807 | .if .BIG_ENDIAN |
| 808 | SWAP4 A9,A9 |
| 809 | || SWAP4 B9,B9 |
| 810 | SWAP2 A9,A9 |
| 811 | || SWAP2 B9,B9 |
| 812 | .endif |
| 813 | PACK2 B9,A9,B7 |
| 814 | PACKH2 B9,A9,B9 |
| 815 | || SHFL B7,B7 |
| 816 | SHFL B9,B9 |
| 817 | |
| 818 | STB B7,*OUT++ |
| 819 | || SHRU B7,8,B7 |
| 820 | || ADD LEN,7,A0 |
| 821 | [A0] STB B7,*OUT++ |
| 822 | ||[A0] SHRU B7,8,B7 |
| 823 | ||[A0] SUB A0,1,A0 |
| 824 | [A0] STB B7,*OUT++ |
| 825 | ||[A0] SHRU B7,8,B7 |
| 826 | ||[A0] SUB A0,1,A0 |
| 827 | [A0] STB B7,*OUT++ |
| 828 | ||[A0] SUB A0,1,A0 |
| 829 | [A0] STB B9,*OUT++ |
| 830 | ||[A0] SHRU B9,8,B9 |
| 831 | ||[A0] SUB A0,1,A0 |
| 832 | [A0] STB B9,*OUT++ |
| 833 | ||[A0] SHRU B9,8,B9 |
| 834 | ||[A0] SUB A0,1,A0 |
| 835 | [A0] STB B9,*OUT++ |
| 836 | |
| 837 | ret?: |
| 838 | LDDW *FP[-2],A13:A12 |
| 839 | BNOP RA |
| 840 | || LDW *FP[-2],A14 |
| 841 | LDW *++SP(24),FP ; restore frame pointer |
| 842 | NOP 4 ; wait till FP is committed |
| 843 | .endasmfunc |
| 844 | |
| 845 | .if __TI_EABI__ |
| 846 | .sect ".text:sha_asm.const" |
| 847 | .else |
| 848 | .sect ".const:sha_asm" |
| 849 | .endif |
| 850 | .align 256 |
| 851 | .uword 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
| 852 | iotas: |
| 853 | .uword 0x00000001, 0x00000000 |
| 854 | .uword 0x00000000, 0x00000089 |
| 855 | .uword 0x00000000, 0x8000008b |
| 856 | .uword 0x00000000, 0x80008080 |
| 857 | .uword 0x00000001, 0x0000008b |
| 858 | .uword 0x00000001, 0x00008000 |
| 859 | .uword 0x00000001, 0x80008088 |
| 860 | .uword 0x00000001, 0x80000082 |
| 861 | .uword 0x00000000, 0x0000000b |
| 862 | .uword 0x00000000, 0x0000000a |
| 863 | .uword 0x00000001, 0x00008082 |
| 864 | .uword 0x00000000, 0x00008003 |
| 865 | .uword 0x00000001, 0x0000808b |
| 866 | .uword 0x00000001, 0x8000000b |
| 867 | .uword 0x00000001, 0x8000008a |
| 868 | .uword 0x00000001, 0x80000081 |
| 869 | .uword 0x00000000, 0x80000081 |
| 870 | .uword 0x00000000, 0x80000008 |
| 871 | .uword 0x00000000, 0x00000083 |
| 872 | .uword 0x00000000, 0x80008003 |
| 873 | .uword 0x00000001, 0x80008088 |
| 874 | .uword 0x00000000, 0x80000088 |
| 875 | .uword 0x00000001, 0x00008000 |
| 876 | .uword 0x00000000, 0x80008082 |
| 877 | |
| 878 | .cstring "Keccak-1600 absorb and squeeze for C64x, CRYPTOGAMS by <appro\@openssl.org>" |
| 879 | .align 4 |
| 880 | ___ |
| 881 | |
| 882 | $output=pop; |
| 883 | open STDOUT,">$output"; |
| 884 | print $code; |
| 885 | close STDOUT or die "error closing STDOUT: $!"; |