Blame - ap/lib/libssl/openssl-1.1.1o/crypto/bn/asm/c64xplus-gf2m.pl - R306

blob: 3bb8d120e96a47df83235679579bf702a794f74a [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame^]	1	#! /usr/bin/env perl
				2	# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
				3	#
				4	# Licensed under the OpenSSL license (the "License"). You may not use
				5	# this file except in compliance with the License. You can obtain a copy
				6	# in the file LICENSE in the source distribution or at
				7	# https://www.openssl.org/source/license.html
				8
				9	#
				10	# ====================================================================
				11	# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
				12	# project. The module is, however, dual licensed under OpenSSL and
				13	# CRYPTOGAMS licenses depending on where you obtain it. For further
				14	# details see http://www.openssl.org/~appro/cryptogams/.
				15	# ====================================================================
				16	#
				17	# February 2012
				18	#
				19	# The module implements bn_GF2m_mul_2x2 polynomial multiplication
				20	# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
				21	# C for the time being... The subroutine runs in 37 cycles, which is
				22	# 4.5x faster than compiler-generated code. Though comparison is
				23	# totally unfair, because this module utilizes Galois Field Multiply
				24	# instruction.
				25
				26	while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
				27	open STDOUT,">$output";
				28
				29	($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
				30
				31	($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
				32	($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
				33	($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
				34	($A,$B)=($Alo,$B_1);
				35	$xFF="B1";
				36
				37	sub mul_1x1_upper {
				38	my ($A,$B)=@_;
				39	$code.=<<___;
				40	EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
				41	\|\| AND $B,$xFF,$B_0
				42	\|\| SHRU $B,24,$B_3
				43	SHRU $A,16, $Ahi ; smash $A to two halfwords
				44	\|\| EXTU $A,16,16,$Alo
				45
				46	XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication
				47	\|\| XORMPY $Ahi,$B_2,$Ahix2
				48	\|\| EXTU $B,16,24,$B_1
				49	XORMPY $Alo,$B_0,$Alox0
				50	\|\| XORMPY $Ahi,$B_0,$Ahix0
				51	XORMPY $Alo,$B_3,$Alox3
				52	\|\| XORMPY $Ahi,$B_3,$Ahix3
				53	XORMPY $Alo,$B_1,$Alox1
				54	\|\| XORMPY $Ahi,$B_1,$Ahix1
				55	___
				56	}
				57	sub mul_1x1_merged {
				58	my ($OUTlo,$OUThi,$A,$B)=@_;
				59	$code.=<<___;
				60	EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
				61	\|\| AND $B,$xFF,$B_0
				62	\|\| SHRU $B,24,$B_3
				63	SHRU $A,16, $Ahi ; smash $A to two halfwords
				64	\|\| EXTU $A,16,16,$Alo
				65
				66	XOR $Ahix0,$Alox2,$Ahix0
				67	\|\| MV $Ahix2,$OUThi
				68	\|\| XORMPY $Alo,$B_2,$Alox2
				69	XORMPY $Ahi,$B_2,$Ahix2
				70	\|\| EXTU $B,16,24,$B_1
				71	\|\| XORMPY $Alo,$B_0,A1 ; $Alox0
				72	XOR $Ahix1,$Alox3,$Ahix1
				73	\|\| SHL $Ahix0,16,$OUTlo
				74	\|\| SHRU $Ahix0,16,$Ahix0
				75	XOR $Alox0,$OUTlo,$OUTlo
				76	\|\| XOR $Ahix0,$OUThi,$OUThi
				77	\|\| XORMPY $Ahi,$B_0,$Ahix0
				78	\|\| XORMPY $Alo,$B_3,$Alox3
				79	\|\| SHL $Alox1,8,$Alox1
				80	\|\| SHL $Ahix3,8,$Ahix3
				81	XOR $Alox1,$OUTlo,$OUTlo
				82	\|\| XOR $Ahix3,$OUThi,$OUThi
				83	\|\| XORMPY $Ahi,$B_3,$Ahix3
				84	\|\| SHL $Ahix1,24,$Alox1
				85	\|\| SHRU $Ahix1,8, $Ahix1
				86	XOR $Alox1,$OUTlo,$OUTlo
				87	\|\| XOR $Ahix1,$OUThi,$OUThi
				88	\|\| XORMPY $Alo,$B_1,$Alox1
				89	\|\| XORMPY $Ahi,$B_1,$Ahix1
				90	\|\| MV A1,$Alox0
				91	___
				92	}
				93	sub mul_1x1_lower {
				94	my ($OUTlo,$OUThi)=@_;
				95	$code.=<<___;
				96	;NOP
				97	XOR $Ahix0,$Alox2,$Ahix0
				98	\|\| MV $Ahix2,$OUThi
				99	NOP
				100	XOR $Ahix1,$Alox3,$Ahix1
				101	\|\| SHL $Ahix0,16,$OUTlo
				102	\|\| SHRU $Ahix0,16,$Ahix0
				103	XOR $Alox0,$OUTlo,$OUTlo
				104	\|\| XOR $Ahix0,$OUThi,$OUThi
				105	\|\| SHL $Alox1,8,$Alox1
				106	\|\| SHL $Ahix3,8,$Ahix3
				107	XOR $Alox1,$OUTlo,$OUTlo
				108	\|\| XOR $Ahix3,$OUThi,$OUThi
				109	\|\| SHL $Ahix1,24,$Alox1
				110	\|\| SHRU $Ahix1,8, $Ahix1
				111	XOR $Alox1,$OUTlo,$OUTlo
				112	\|\| XOR $Ahix1,$OUThi,$OUThi
				113	___
				114	}
				115	$code.=<<___;
				116	.text
				117
				118	.if .ASSEMBLER_VERSION<7000000
				119	.asg 0,__TI_EABI__
				120	.endif
				121	.if __TI_EABI__
				122	.asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
				123	.endif
				124
				125	.global _bn_GF2m_mul_2x2
				126	_bn_GF2m_mul_2x2:
				127	.asmfunc
				128	MVK 0xFF,$xFF
				129	___
				130	&mul_1x1_upper($a0,$b0); # a0·b0
				131	$code.=<<___;
				132	\|\| MV $b1,$B
				133	MV $a1,$A
				134	___
				135	&mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
				136	$code.=<<___;
				137	\|\| XOR $b0,$b1,$B
				138	XOR $a0,$a1,$A
				139	___
				140	&mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
				141	$code.=<<___;
				142	XOR A28,A31,A29
				143	\|\| XOR B28,B31,B29 ; a0·b0+a1·b1
				144	___
				145	&mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
				146	$code.=<<___;
				147	\|\| BNOP B3
				148	XOR A29,A30,A30
				149	\|\| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
				150	XOR B28,A30,A30
				151	\|\| STW A28,*${rp}[0]
				152	XOR B30,A31,A31
				153	\|\| STW A30,*${rp}[1]
				154	STW A31,*${rp}[2]
				155	STW B31,*${rp}[3]
				156	.endasmfunc
				157	___
				158
				159	print $code;
				160	close STDOUT or die "error closing STDOUT: $!";