| #! /usr/bin/env perl |
| # Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. |
| # |
| # Licensed under the OpenSSL license (the "License"). You may not use |
| # this file except in compliance with the License. You can obtain a copy |
| # in the file LICENSE in the source distribution or at |
| # https://www.openssl.org/source/license.html |
| |
| # |
| # ==================================================================== |
| # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| # project. The module is, however, dual licensed under OpenSSL and |
| # CRYPTOGAMS licenses depending on where you obtain it. For further |
| # details see http://www.openssl.org/~appro/cryptogams/. |
| # ==================================================================== |
| # |
| # This module implements Poly1305 hash for PowerPC. |
| # |
| # June 2015 |
| # |
| # Numbers are cycles per processed byte with poly1305_blocks alone, |
| # and improvement coefficients relative to gcc-generated code. |
| # |
| # -m32 -m64 |
| # |
| # Freescale e300 14.8/+80% - |
| # PPC74x0 7.60/+60% - |
| # PPC970 7.00/+114% 3.51/+205% |
| # POWER7 3.75/+260% 1.93/+100% |
| # POWER8 - 2.03/+200% |
| # POWER9 - 2.00/+150% |
| # |
| # Do we need floating-point implementation for PPC? Results presented |
| # in poly1305_ieee754.c are tricky to compare to, because they are for |
| # compiler-generated code. On the other hand it's known that floating- |
| # point performance can be dominated by FPU latency, which means that |
| # there is limit even for ideally optimized (and even vectorized) code. |
| # And this limit is estimated to be higher than above -m64 results. Or |
| # in other words floating-point implementation can be meaningful to |
| # consider only in 32-bit application context. We probably have to |
| # recognize that 32-bit builds are getting less popular on high-end |
| # systems and therefore tend to target embedded ones, which might not |
| # even have FPU... |
| # |
| # On side note, Power ISA 2.07 enables vector base 2^26 implementation, |
| # and POWER8 might have capacity to break 1.0 cycle per byte barrier... |
| |
| $flavour = shift; |
| |
| if ($flavour =~ /64/) { |
| $SIZE_T =8; |
| $LRSAVE =2*$SIZE_T; |
| $UCMP ="cmpld"; |
| $STU ="stdu"; |
| $POP ="ld"; |
| $PUSH ="std"; |
| } elsif ($flavour =~ /32/) { |
| $SIZE_T =4; |
| $LRSAVE =$SIZE_T; |
| $UCMP ="cmplw"; |
| $STU ="stwu"; |
| $POP ="lwz"; |
| $PUSH ="stw"; |
| } else { die "nonsense $flavour"; } |
| |
| # Define endianness based on flavour |
| # i.e.: linux64le |
| $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; |
| |
| $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or |
| ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or |
| die "can't locate ppc-xlate.pl"; |
| |
| open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; |
| |
| $FRAME=24*$SIZE_T; |
| |
| $sp="r1"; |
| my ($ctx,$inp,$len,$padbit) = map("r$_",(3..6)); |
| my ($mac,$nonce)=($inp,$len); |
| my $mask = "r0"; |
| |
| $code=<<___; |
| .machine "any" |
| .text |
| ___ |
| if ($flavour =~ /64/) { |
| ############################################################################### |
| # base 2^64 implementation |
| |
| my ($h0,$h1,$h2,$d0,$d1,$d2, $r0,$r1,$s1, $t0,$t1) = map("r$_",(7..12,27..31)); |
| |
| $code.=<<___; |
| .globl .poly1305_init_int |
| .align 4 |
| .poly1305_init_int: |
| xor r0,r0,r0 |
| std r0,0($ctx) # zero hash value |
| std r0,8($ctx) |
| std r0,16($ctx) |
| |
| $UCMP $inp,r0 |
| beq- Lno_key |
| ___ |
| $code.=<<___ if ($LITTLE_ENDIAN); |
| ld $d0,0($inp) # load key material |
| ld $d1,8($inp) |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| li $h0,4 |
| lwbrx $d0,0,$inp # load key material |
| li $d1,8 |
| lwbrx $h0,$h0,$inp |
| li $h1,12 |
| lwbrx $d1,$d1,$inp |
| lwbrx $h1,$h1,$inp |
| insrdi $d0,$h0,32,0 |
| insrdi $d1,$h1,32,0 |
| ___ |
| $code.=<<___; |
| lis $h1,0xfff # 0x0fff0000 |
| ori $h1,$h1,0xfffc # 0x0ffffffc |
| insrdi $h1,$h1,32,0 # 0x0ffffffc0ffffffc |
| ori $h0,$h1,3 # 0x0ffffffc0fffffff |
| |
| and $d0,$d0,$h0 |
| and $d1,$d1,$h1 |
| |
| std $d0,32($ctx) # store key |
| std $d1,40($ctx) |
| |
| Lno_key: |
| xor r3,r3,r3 |
| blr |
| .long 0 |
| .byte 0,12,0x14,0,0,0,2,0 |
| .size .poly1305_init_int,.-.poly1305_init_int |
| |
| .globl .poly1305_blocks |
| .align 4 |
| .poly1305_blocks: |
| srdi. $len,$len,4 |
| beq- Labort |
| |
| $STU $sp,-$FRAME($sp) |
| mflr r0 |
| $PUSH r27,`$FRAME-$SIZE_T*5`($sp) |
| $PUSH r28,`$FRAME-$SIZE_T*4`($sp) |
| $PUSH r29,`$FRAME-$SIZE_T*3`($sp) |
| $PUSH r30,`$FRAME-$SIZE_T*2`($sp) |
| $PUSH r31,`$FRAME-$SIZE_T*1`($sp) |
| $PUSH r0,`$FRAME+$LRSAVE`($sp) |
| |
| ld $r0,32($ctx) # load key |
| ld $r1,40($ctx) |
| |
| ld $h0,0($ctx) # load hash value |
| ld $h1,8($ctx) |
| ld $h2,16($ctx) |
| |
| srdi $s1,$r1,2 |
| mtctr $len |
| add $s1,$s1,$r1 # s1 = r1 + r1>>2 |
| li $mask,3 |
| b Loop |
| |
| .align 4 |
| Loop: |
| ___ |
| $code.=<<___ if ($LITTLE_ENDIAN); |
| ld $t0,0($inp) # load input |
| ld $t1,8($inp) |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| li $d0,4 |
| lwbrx $t0,0,$inp # load input |
| li $t1,8 |
| lwbrx $d0,$d0,$inp |
| li $d1,12 |
| lwbrx $t1,$t1,$inp |
| lwbrx $d1,$d1,$inp |
| insrdi $t0,$d0,32,0 |
| insrdi $t1,$d1,32,0 |
| ___ |
| $code.=<<___; |
| addi $inp,$inp,16 |
| |
| addc $h0,$h0,$t0 # accumulate input |
| adde $h1,$h1,$t1 |
| |
| mulld $d0,$h0,$r0 # h0*r0 |
| mulhdu $d1,$h0,$r0 |
| adde $h2,$h2,$padbit |
| |
| mulld $t0,$h1,$s1 # h1*5*r1 |
| mulhdu $t1,$h1,$s1 |
| addc $d0,$d0,$t0 |
| adde $d1,$d1,$t1 |
| |
| mulld $t0,$h0,$r1 # h0*r1 |
| mulhdu $d2,$h0,$r1 |
| addc $d1,$d1,$t0 |
| addze $d2,$d2 |
| |
| mulld $t0,$h1,$r0 # h1*r0 |
| mulhdu $t1,$h1,$r0 |
| addc $d1,$d1,$t0 |
| adde $d2,$d2,$t1 |
| |
| mulld $t0,$h2,$s1 # h2*5*r1 |
| mulld $t1,$h2,$r0 # h2*r0 |
| addc $d1,$d1,$t0 |
| adde $d2,$d2,$t1 |
| |
| andc $t0,$d2,$mask # final reduction step |
| and $h2,$d2,$mask |
| srdi $t1,$t0,2 |
| add $t0,$t0,$t1 |
| addc $h0,$d0,$t0 |
| addze $h1,$d1 |
| addze $h2,$h2 |
| |
| bdnz Loop |
| |
| std $h0,0($ctx) # store hash value |
| std $h1,8($ctx) |
| std $h2,16($ctx) |
| |
| $POP r27,`$FRAME-$SIZE_T*5`($sp) |
| $POP r28,`$FRAME-$SIZE_T*4`($sp) |
| $POP r29,`$FRAME-$SIZE_T*3`($sp) |
| $POP r30,`$FRAME-$SIZE_T*2`($sp) |
| $POP r31,`$FRAME-$SIZE_T*1`($sp) |
| addi $sp,$sp,$FRAME |
| Labort: |
| blr |
| .long 0 |
| .byte 0,12,4,1,0x80,5,4,0 |
| .size .poly1305_blocks,.-.poly1305_blocks |
| |
| .globl .poly1305_emit |
| .align 4 |
| .poly1305_emit: |
| ld $h0,0($ctx) # load hash |
| ld $h1,8($ctx) |
| ld $h2,16($ctx) |
| ld $padbit,0($nonce) # load nonce |
| ld $nonce,8($nonce) |
| |
| addic $d0,$h0,5 # compare to modulus |
| addze $d1,$h1 |
| addze $d2,$h2 |
| |
| srdi $mask,$d2,2 # did it carry/borrow? |
| neg $mask,$mask |
| |
| andc $h0,$h0,$mask |
| and $d0,$d0,$mask |
| andc $h1,$h1,$mask |
| and $d1,$d1,$mask |
| or $h0,$h0,$d0 |
| or $h1,$h1,$d1 |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| rotldi $padbit,$padbit,32 # flip nonce words |
| rotldi $nonce,$nonce,32 |
| ___ |
| $code.=<<___; |
| addc $h0,$h0,$padbit # accumulate nonce |
| adde $h1,$h1,$nonce |
| ___ |
| $code.=<<___ if ($LITTLE_ENDIAN); |
| std $h0,0($mac) # write result |
| std $h1,8($mac) |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| extrdi r0,$h0,32,0 |
| li $d0,4 |
| stwbrx $h0,0,$mac # write result |
| extrdi $h0,$h1,32,0 |
| li $d1,8 |
| stwbrx r0,$d0,$mac |
| li $d2,12 |
| stwbrx $h1,$d1,$mac |
| stwbrx $h0,$d2,$mac |
| ___ |
| $code.=<<___; |
| blr |
| .long 0 |
| .byte 0,12,0x14,0,0,0,3,0 |
| .size .poly1305_emit,.-.poly1305_emit |
| ___ |
| } else { |
| ############################################################################### |
| # base 2^32 implementation |
| |
| my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $s1,$s2,$s3, |
| $t0,$t1,$t2,$t3, $D0,$D1,$D2,$D3, $d0,$d1,$d2,$d3 |
| ) = map("r$_",(7..12,14..31)); |
| |
| $code.=<<___; |
| .globl .poly1305_init_int |
| .align 4 |
| .poly1305_init_int: |
| xor r0,r0,r0 |
| stw r0,0($ctx) # zero hash value |
| stw r0,4($ctx) |
| stw r0,8($ctx) |
| stw r0,12($ctx) |
| stw r0,16($ctx) |
| |
| $UCMP $inp,r0 |
| beq- Lno_key |
| ___ |
| $code.=<<___ if ($LITTLE_ENDIAN); |
| lw $h0,0($inp) # load key material |
| lw $h1,4($inp) |
| lw $h2,8($inp) |
| lw $h3,12($inp) |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| li $h1,4 |
| lwbrx $h0,0,$inp # load key material |
| li $h2,8 |
| lwbrx $h1,$h1,$inp |
| li $h3,12 |
| lwbrx $h2,$h2,$inp |
| lwbrx $h3,$h3,$inp |
| ___ |
| $code.=<<___; |
| lis $mask,0xf000 # 0xf0000000 |
| li $r0,-4 |
| andc $r0,$r0,$mask # 0x0ffffffc |
| |
| andc $h0,$h0,$mask |
| and $h1,$h1,$r0 |
| and $h2,$h2,$r0 |
| and $h3,$h3,$r0 |
| |
| stw $h0,32($ctx) # store key |
| stw $h1,36($ctx) |
| stw $h2,40($ctx) |
| stw $h3,44($ctx) |
| |
| Lno_key: |
| xor r3,r3,r3 |
| blr |
| .long 0 |
| .byte 0,12,0x14,0,0,0,2,0 |
| .size .poly1305_init_int,.-.poly1305_init_int |
| |
| .globl .poly1305_blocks |
| .align 4 |
| .poly1305_blocks: |
| srwi. $len,$len,4 |
| beq- Labort |
| |
| $STU $sp,-$FRAME($sp) |
| mflr r0 |
| $PUSH r14,`$FRAME-$SIZE_T*18`($sp) |
| $PUSH r15,`$FRAME-$SIZE_T*17`($sp) |
| $PUSH r16,`$FRAME-$SIZE_T*16`($sp) |
| $PUSH r17,`$FRAME-$SIZE_T*15`($sp) |
| $PUSH r18,`$FRAME-$SIZE_T*14`($sp) |
| $PUSH r19,`$FRAME-$SIZE_T*13`($sp) |
| $PUSH r20,`$FRAME-$SIZE_T*12`($sp) |
| $PUSH r21,`$FRAME-$SIZE_T*11`($sp) |
| $PUSH r22,`$FRAME-$SIZE_T*10`($sp) |
| $PUSH r23,`$FRAME-$SIZE_T*9`($sp) |
| $PUSH r24,`$FRAME-$SIZE_T*8`($sp) |
| $PUSH r25,`$FRAME-$SIZE_T*7`($sp) |
| $PUSH r26,`$FRAME-$SIZE_T*6`($sp) |
| $PUSH r27,`$FRAME-$SIZE_T*5`($sp) |
| $PUSH r28,`$FRAME-$SIZE_T*4`($sp) |
| $PUSH r29,`$FRAME-$SIZE_T*3`($sp) |
| $PUSH r30,`$FRAME-$SIZE_T*2`($sp) |
| $PUSH r31,`$FRAME-$SIZE_T*1`($sp) |
| $PUSH r0,`$FRAME+$LRSAVE`($sp) |
| |
| lwz $r0,32($ctx) # load key |
| lwz $r1,36($ctx) |
| lwz $r2,40($ctx) |
| lwz $r3,44($ctx) |
| |
| lwz $h0,0($ctx) # load hash value |
| lwz $h1,4($ctx) |
| lwz $h2,8($ctx) |
| lwz $h3,12($ctx) |
| lwz $h4,16($ctx) |
| |
| srwi $s1,$r1,2 |
| srwi $s2,$r2,2 |
| srwi $s3,$r3,2 |
| add $s1,$s1,$r1 # si = ri + ri>>2 |
| add $s2,$s2,$r2 |
| add $s3,$s3,$r3 |
| mtctr $len |
| li $mask,3 |
| b Loop |
| |
| .align 4 |
| Loop: |
| ___ |
| $code.=<<___ if ($LITTLE_ENDIAN); |
| lwz $d0,0($inp) # load input |
| lwz $d1,4($inp) |
| lwz $d2,8($inp) |
| lwz $d3,12($inp) |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| li $d1,4 |
| lwbrx $d0,0,$inp # load input |
| li $d2,8 |
| lwbrx $d1,$d1,$inp |
| li $d3,12 |
| lwbrx $d2,$d2,$inp |
| lwbrx $d3,$d3,$inp |
| ___ |
| $code.=<<___; |
| addi $inp,$inp,16 |
| |
| addc $h0,$h0,$d0 # accumulate input |
| adde $h1,$h1,$d1 |
| adde $h2,$h2,$d2 |
| |
| mullw $d0,$h0,$r0 # h0*r0 |
| mulhwu $D0,$h0,$r0 |
| |
| mullw $d1,$h0,$r1 # h0*r1 |
| mulhwu $D1,$h0,$r1 |
| |
| mullw $d2,$h0,$r2 # h0*r2 |
| mulhwu $D2,$h0,$r2 |
| |
| adde $h3,$h3,$d3 |
| adde $h4,$h4,$padbit |
| |
| mullw $d3,$h0,$r3 # h0*r3 |
| mulhwu $D3,$h0,$r3 |
| |
| mullw $t0,$h1,$s3 # h1*s3 |
| mulhwu $t1,$h1,$s3 |
| |
| mullw $t2,$h1,$r0 # h1*r0 |
| mulhwu $t3,$h1,$r0 |
| addc $d0,$d0,$t0 |
| adde $D0,$D0,$t1 |
| |
| mullw $t0,$h1,$r1 # h1*r1 |
| mulhwu $t1,$h1,$r1 |
| addc $d1,$d1,$t2 |
| adde $D1,$D1,$t3 |
| |
| mullw $t2,$h1,$r2 # h1*r2 |
| mulhwu $t3,$h1,$r2 |
| addc $d2,$d2,$t0 |
| adde $D2,$D2,$t1 |
| |
| mullw $t0,$h2,$s2 # h2*s2 |
| mulhwu $t1,$h2,$s2 |
| addc $d3,$d3,$t2 |
| adde $D3,$D3,$t3 |
| |
| mullw $t2,$h2,$s3 # h2*s3 |
| mulhwu $t3,$h2,$s3 |
| addc $d0,$d0,$t0 |
| adde $D0,$D0,$t1 |
| |
| mullw $t0,$h2,$r0 # h2*r0 |
| mulhwu $t1,$h2,$r0 |
| addc $d1,$d1,$t2 |
| adde $D1,$D1,$t3 |
| |
| mullw $t2,$h2,$r1 # h2*r1 |
| mulhwu $t3,$h2,$r1 |
| addc $d2,$d2,$t0 |
| adde $D2,$D2,$t1 |
| |
| mullw $t0,$h3,$s1 # h3*s1 |
| mulhwu $t1,$h3,$s1 |
| addc $d3,$d3,$t2 |
| adde $D3,$D3,$t3 |
| |
| mullw $t2,$h3,$s2 # h3*s2 |
| mulhwu $t3,$h3,$s2 |
| addc $d0,$d0,$t0 |
| adde $D0,$D0,$t1 |
| |
| mullw $t0,$h3,$s3 # h3*s3 |
| mulhwu $t1,$h3,$s3 |
| addc $d1,$d1,$t2 |
| adde $D1,$D1,$t3 |
| |
| mullw $t2,$h3,$r0 # h3*r0 |
| mulhwu $t3,$h3,$r0 |
| addc $d2,$d2,$t0 |
| adde $D2,$D2,$t1 |
| |
| mullw $t0,$h4,$s1 # h4*s1 |
| addc $d3,$d3,$t2 |
| adde $D3,$D3,$t3 |
| addc $d1,$d1,$t0 |
| |
| mullw $t1,$h4,$s2 # h4*s2 |
| addze $D1,$D1 |
| addc $d2,$d2,$t1 |
| addze $D2,$D2 |
| |
| mullw $t2,$h4,$s3 # h4*s3 |
| addc $d3,$d3,$t2 |
| addze $D3,$D3 |
| |
| mullw $h4,$h4,$r0 # h4*r0 |
| |
| addc $h1,$d1,$D0 |
| adde $h2,$d2,$D1 |
| adde $h3,$d3,$D2 |
| adde $h4,$h4,$D3 |
| |
| andc $D0,$h4,$mask # final reduction step |
| and $h4,$h4,$mask |
| srwi $D1,$D0,2 |
| add $D0,$D0,$D1 |
| addc $h0,$d0,$D0 |
| addze $h1,$h1 |
| addze $h2,$h2 |
| addze $h3,$h3 |
| addze $h4,$h4 |
| |
| bdnz Loop |
| |
| stw $h0,0($ctx) # store hash value |
| stw $h1,4($ctx) |
| stw $h2,8($ctx) |
| stw $h3,12($ctx) |
| stw $h4,16($ctx) |
| |
| $POP r14,`$FRAME-$SIZE_T*18`($sp) |
| $POP r15,`$FRAME-$SIZE_T*17`($sp) |
| $POP r16,`$FRAME-$SIZE_T*16`($sp) |
| $POP r17,`$FRAME-$SIZE_T*15`($sp) |
| $POP r18,`$FRAME-$SIZE_T*14`($sp) |
| $POP r19,`$FRAME-$SIZE_T*13`($sp) |
| $POP r20,`$FRAME-$SIZE_T*12`($sp) |
| $POP r21,`$FRAME-$SIZE_T*11`($sp) |
| $POP r22,`$FRAME-$SIZE_T*10`($sp) |
| $POP r23,`$FRAME-$SIZE_T*9`($sp) |
| $POP r24,`$FRAME-$SIZE_T*8`($sp) |
| $POP r25,`$FRAME-$SIZE_T*7`($sp) |
| $POP r26,`$FRAME-$SIZE_T*6`($sp) |
| $POP r27,`$FRAME-$SIZE_T*5`($sp) |
| $POP r28,`$FRAME-$SIZE_T*4`($sp) |
| $POP r29,`$FRAME-$SIZE_T*3`($sp) |
| $POP r30,`$FRAME-$SIZE_T*2`($sp) |
| $POP r31,`$FRAME-$SIZE_T*1`($sp) |
| addi $sp,$sp,$FRAME |
| Labort: |
| blr |
| .long 0 |
| .byte 0,12,4,1,0x80,18,4,0 |
| .size .poly1305_blocks,.-.poly1305_blocks |
| |
| .globl .poly1305_emit |
| .align 4 |
| .poly1305_emit: |
| $STU $sp,-$FRAME($sp) |
| mflr r0 |
| $PUSH r28,`$FRAME-$SIZE_T*4`($sp) |
| $PUSH r29,`$FRAME-$SIZE_T*3`($sp) |
| $PUSH r30,`$FRAME-$SIZE_T*2`($sp) |
| $PUSH r31,`$FRAME-$SIZE_T*1`($sp) |
| $PUSH r0,`$FRAME+$LRSAVE`($sp) |
| |
| lwz $h0,0($ctx) # load hash |
| lwz $h1,4($ctx) |
| lwz $h2,8($ctx) |
| lwz $h3,12($ctx) |
| lwz $h4,16($ctx) |
| |
| addic $d0,$h0,5 # compare to modulus |
| addze $d1,$h1 |
| addze $d2,$h2 |
| addze $d3,$h3 |
| addze $mask,$h4 |
| |
| srwi $mask,$mask,2 # did it carry/borrow? |
| neg $mask,$mask |
| |
| andc $h0,$h0,$mask |
| and $d0,$d0,$mask |
| andc $h1,$h1,$mask |
| and $d1,$d1,$mask |
| or $h0,$h0,$d0 |
| lwz $d0,0($nonce) # load nonce |
| andc $h2,$h2,$mask |
| and $d2,$d2,$mask |
| or $h1,$h1,$d1 |
| lwz $d1,4($nonce) |
| andc $h3,$h3,$mask |
| and $d3,$d3,$mask |
| or $h2,$h2,$d2 |
| lwz $d2,8($nonce) |
| or $h3,$h3,$d3 |
| lwz $d3,12($nonce) |
| |
| addc $h0,$h0,$d0 # accumulate nonce |
| adde $h1,$h1,$d1 |
| adde $h2,$h2,$d2 |
| adde $h3,$h3,$d3 |
| ___ |
| $code.=<<___ if ($LITTLE_ENDIAN); |
| stw $h0,0($mac) # write result |
| stw $h1,4($mac) |
| stw $h2,8($mac) |
| stw $h3,12($mac) |
| ___ |
| $code.=<<___ if (!$LITTLE_ENDIAN); |
| li $d1,4 |
| stwbrx $h0,0,$mac # write result |
| li $d2,8 |
| stwbrx $h1,$d1,$mac |
| li $d3,12 |
| stwbrx $h2,$d2,$mac |
| stwbrx $h3,$d3,$mac |
| ___ |
| $code.=<<___; |
| $POP r28,`$FRAME-$SIZE_T*4`($sp) |
| $POP r29,`$FRAME-$SIZE_T*3`($sp) |
| $POP r30,`$FRAME-$SIZE_T*2`($sp) |
| $POP r31,`$FRAME-$SIZE_T*1`($sp) |
| addi $sp,$sp,$FRAME |
| blr |
| .long 0 |
| .byte 0,12,4,1,0x80,4,3,0 |
| .size .poly1305_emit,.-.poly1305_emit |
| ___ |
| } |
| $code.=<<___; |
| .asciz "Poly1305 for PPC, CRYPTOGAMS by <appro\@openssl.org>" |
| ___ |
| |
| $code =~ s/\`([^\`]*)\`/eval $1/gem; |
| print $code; |
| close STDOUT or die "error closing STDOUT: $!"; |