Blame - marvell/linux/arch/sh/kernel/cpu/sh4/softfloat.c - T108

blob: 42edf2e54e85430b1df930625cb0378d48711bc0 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* Floating point emulation support for subnormalised numbers on SH4
				3	* architecture This file is derived from the SoftFloat IEC/IEEE
				4	* Floating-point Arithmetic Package, Release 2 the original license of
				5	* which is reproduced below.
				6	*
				7	* ========================================================================
				8	*
				9	* This C source file is part of the SoftFloat IEC/IEEE Floating-point
				10	* Arithmetic Package, Release 2.
				11	*
				12	* Written by John R. Hauser. This work was made possible in part by the
				13	* International Computer Science Institute, located at Suite 600, 1947 Center
				14	* Street, Berkeley, California 94704. Funding was partially provided by the
				15	* National Science Foundation under grant MIP-9311980. The original version
				16	* of this code was written as part of a project to build a fixed-point vector
				17	* processor in collaboration with the University of California at Berkeley,
				18	* overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				19	* is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				20	* arithmetic/softfloat.html'.
				21	*
				22	* THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				23	* has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				24	* TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				25	* PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				26	* AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				27	*
				28	* Derivative works are acceptable, even for commercial purposes, so long as
				29	* (1) they include prominent notice that the work is derivative, and (2) they
				30	* include prominent notice akin to these three paragraphs for those parts of
				31	* this code that are retained.
				32	*
				33	* ========================================================================
				34	*
				35	* SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
				36	* and Kamel Khelifi <kamel.khelifi@st.com>
				37	*/
				38	#include <linux/kernel.h>
				39	#include <cpu/fpu.h>
				40	#include <asm/div64.h>
				41
				42	#define LIT64( a ) a##LL
				43
				44	typedef char flag;
				45	typedef unsigned char uint8;
				46	typedef signed char int8;
				47	typedef int uint16;
				48	typedef int int16;
				49	typedef unsigned int uint32;
				50	typedef signed int int32;
				51
				52	typedef unsigned long long int bits64;
				53	typedef signed long long int sbits64;
				54
				55	typedef unsigned char bits8;
				56	typedef signed char sbits8;
				57	typedef unsigned short int bits16;
				58	typedef signed short int sbits16;
				59	typedef unsigned int bits32;
				60	typedef signed int sbits32;
				61
				62	typedef unsigned long long int uint64;
				63	typedef signed long long int int64;
				64
				65	typedef unsigned long int float32;
				66	typedef unsigned long long float64;
				67
				68	extern void float_raise(unsigned int flags); /* in fpu.c */
				69	extern int float_rounding_mode(void); /* in fpu.c */
				70
				71	bits64 extractFloat64Frac(float64 a);
				72	flag extractFloat64Sign(float64 a);
				73	int16 extractFloat64Exp(float64 a);
				74	int16 extractFloat32Exp(float32 a);
				75	flag extractFloat32Sign(float32 a);
				76	bits32 extractFloat32Frac(float32 a);
				77	float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
				78	void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
				79	float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
				80	void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
				81	float64 float64_sub(float64 a, float64 b);
				82	float32 float32_sub(float32 a, float32 b);
				83	float32 float32_add(float32 a, float32 b);
				84	float64 float64_add(float64 a, float64 b);
				85	float64 float64_div(float64 a, float64 b);
				86	float32 float32_div(float32 a, float32 b);
				87	float32 float32_mul(float32 a, float32 b);
				88	float64 float64_mul(float64 a, float64 b);
				89	float32 float64_to_float32(float64 a);
				90	void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
				91	bits64 * z1Ptr);
				92	void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
				93	bits64 * z1Ptr);
				94	void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
				95
				96	static int8 countLeadingZeros32(bits32 a);
				97	static int8 countLeadingZeros64(bits64 a);
				98	static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
				99	bits64 zSig);
				100	static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
				101	static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
				102	static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
				103	static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
				104	bits32 zSig);
				105	static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
				106	static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
				107	static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
				108	static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
				109	bits64 * zSigPtr);
				110	static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
				111	static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
				112	bits32 * zSigPtr);
				113
				114	bits64 extractFloat64Frac(float64 a)
				115	{
				116	return a & LIT64(0x000FFFFFFFFFFFFF);
				117	}
				118
				119	flag extractFloat64Sign(float64 a)
				120	{
				121	return a >> 63;
				122	}
				123
				124	int16 extractFloat64Exp(float64 a)
				125	{
				126	return (a >> 52) & 0x7FF;
				127	}
				128
				129	int16 extractFloat32Exp(float32 a)
				130	{
				131	return (a >> 23) & 0xFF;
				132	}
				133
				134	flag extractFloat32Sign(float32 a)
				135	{
				136	return a >> 31;
				137	}
				138
				139	bits32 extractFloat32Frac(float32 a)
				140	{
				141	return a & 0x007FFFFF;
				142	}
				143
				144	float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
				145	{
				146	return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
				147	}
				148
				149	void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
				150	{
				151	bits64 z;
				152
				153	if (count == 0) {
				154	z = a;
				155	} else if (count < 64) {
				156	z = (a >> count) \| ((a << ((-count) & 63)) != 0);
				157	} else {
				158	z = (a != 0);
				159	}
				160	*zPtr = z;
				161	}
				162
				163	static int8 countLeadingZeros32(bits32 a)
				164	{
				165	static const int8 countLeadingZerosHigh[] = {
				166	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
				167	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
				168	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
				169	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
				170	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				171	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				172	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				173	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				174	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				175	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				176	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				177	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				178	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				179	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				180	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				181	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				182	};
				183	int8 shiftCount;
				184
				185	shiftCount = 0;
				186	if (a < 0x10000) {
				187	shiftCount += 16;
				188	a <<= 16;
				189	}
				190	if (a < 0x1000000) {
				191	shiftCount += 8;
				192	a <<= 8;
				193	}
				194	shiftCount += countLeadingZerosHigh[a >> 24];
				195	return shiftCount;
				196
				197	}
				198
				199	static int8 countLeadingZeros64(bits64 a)
				200	{
				201	int8 shiftCount;
				202
				203	shiftCount = 0;
				204	if (a < ((bits64) 1) << 32) {
				205	shiftCount += 32;
				206	} else {
				207	a >>= 32;
				208	}
				209	shiftCount += countLeadingZeros32(a);
				210	return shiftCount;
				211
				212	}
				213
				214	static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
				215	{
				216	int8 shiftCount;
				217
				218	shiftCount = countLeadingZeros64(zSig) - 1;
				219	return roundAndPackFloat64(zSign, zExp - shiftCount,
				220	zSig << shiftCount);
				221
				222	}
				223
				224	static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
				225	{
				226	int16 aExp, bExp, zExp;
				227	bits64 aSig, bSig, zSig;
				228	int16 expDiff;
				229
				230	aSig = extractFloat64Frac(a);
				231	aExp = extractFloat64Exp(a);
				232	bSig = extractFloat64Frac(b);
				233	bExp = extractFloat64Exp(b);
				234	expDiff = aExp - bExp;
				235	aSig <<= 10;
				236	bSig <<= 10;
				237	if (0 < expDiff)
				238	goto aExpBigger;
				239	if (expDiff < 0)
				240	goto bExpBigger;
				241	if (aExp == 0) {
				242	aExp = 1;
				243	bExp = 1;
				244	}
				245	if (bSig < aSig)
				246	goto aBigger;
				247	if (aSig < bSig)
				248	goto bBigger;
				249	return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
				250	bExpBigger:
				251	if (bExp == 0x7FF) {
				252	return packFloat64(zSign ^ 1, 0x7FF, 0);
				253	}
				254	if (aExp == 0) {
				255	++expDiff;
				256	} else {
				257	aSig \|= LIT64(0x4000000000000000);
				258	}
				259	shift64RightJamming(aSig, -expDiff, &aSig);
				260	bSig \|= LIT64(0x4000000000000000);
				261	bBigger:
				262	zSig = bSig - aSig;
				263	zExp = bExp;
				264	zSign ^= 1;
				265	goto normalizeRoundAndPack;
				266	aExpBigger:
				267	if (aExp == 0x7FF) {
				268	return a;
				269	}
				270	if (bExp == 0) {
				271	--expDiff;
				272	} else {
				273	bSig \|= LIT64(0x4000000000000000);
				274	}
				275	shift64RightJamming(bSig, expDiff, &bSig);
				276	aSig \|= LIT64(0x4000000000000000);
				277	aBigger:
				278	zSig = aSig - bSig;
				279	zExp = aExp;
				280	normalizeRoundAndPack:
				281	--zExp;
				282	return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
				283
				284	}
				285	static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
				286	{
				287	int16 aExp, bExp, zExp;
				288	bits64 aSig, bSig, zSig;
				289	int16 expDiff;
				290
				291	aSig = extractFloat64Frac(a);
				292	aExp = extractFloat64Exp(a);
				293	bSig = extractFloat64Frac(b);
				294	bExp = extractFloat64Exp(b);
				295	expDiff = aExp - bExp;
				296	aSig <<= 9;
				297	bSig <<= 9;
				298	if (0 < expDiff) {
				299	if (aExp == 0x7FF) {
				300	return a;
				301	}
				302	if (bExp == 0) {
				303	--expDiff;
				304	} else {
				305	bSig \|= LIT64(0x2000000000000000);
				306	}
				307	shift64RightJamming(bSig, expDiff, &bSig);
				308	zExp = aExp;
				309	} else if (expDiff < 0) {
				310	if (bExp == 0x7FF) {
				311	return packFloat64(zSign, 0x7FF, 0);
				312	}
				313	if (aExp == 0) {
				314	++expDiff;
				315	} else {
				316	aSig \|= LIT64(0x2000000000000000);
				317	}
				318	shift64RightJamming(aSig, -expDiff, &aSig);
				319	zExp = bExp;
				320	} else {
				321	if (aExp == 0x7FF) {
				322	return a;
				323	}
				324	if (aExp == 0)
				325	return packFloat64(zSign, 0, (aSig + bSig) >> 9);
				326	zSig = LIT64(0x4000000000000000) + aSig + bSig;
				327	zExp = aExp;
				328	goto roundAndPack;
				329	}
				330	aSig \|= LIT64(0x2000000000000000);
				331	zSig = (aSig + bSig) << 1;
				332	--zExp;
				333	if ((sbits64) zSig < 0) {
				334	zSig = aSig + bSig;
				335	++zExp;
				336	}
				337	roundAndPack:
				338	return roundAndPackFloat64(zSign, zExp, zSig);
				339
				340	}
				341
				342	float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
				343	{
				344	return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
				345	}
				346
				347	void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
				348	{
				349	bits32 z;
				350	if (count == 0) {
				351	z = a;
				352	} else if (count < 32) {
				353	z = (a >> count) \| ((a << ((-count) & 31)) != 0);
				354	} else {
				355	z = (a != 0);
				356	}
				357	*zPtr = z;
				358	}
				359
				360	static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
				361	{
				362	flag roundNearestEven;
				363	int8 roundIncrement, roundBits;
				364	flag isTiny;
				365
				366	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
				367	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
				368	roundIncrement = 0x40;
				369	if (!roundNearestEven) {
				370	roundIncrement = 0;
				371	}
				372	roundBits = zSig & 0x7F;
				373	if (0xFD <= (bits16) zExp) {
				374	if ((0xFD < zExp)
				375	\|\| ((zExp == 0xFD)
				376	&& ((sbits32) (zSig + roundIncrement) < 0))
				377	) {
				378	float_raise(FPSCR_CAUSE_OVERFLOW \| FPSCR_CAUSE_INEXACT);
				379	return packFloat32(zSign, 0xFF,
				380	0) - (roundIncrement == 0);
				381	}
				382	if (zExp < 0) {
				383	isTiny = (zExp < -1)
				384	\|\| (zSig + roundIncrement < 0x80000000);
				385	shift32RightJamming(zSig, -zExp, &zSig);
				386	zExp = 0;
				387	roundBits = zSig & 0x7F;
				388	if (isTiny && roundBits)
				389	float_raise(FPSCR_CAUSE_UNDERFLOW);
				390	}
				391	}
				392	if (roundBits)
				393	float_raise(FPSCR_CAUSE_INEXACT);
				394	zSig = (zSig + roundIncrement) >> 7;
				395	zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
				396	if (zSig == 0)
				397	zExp = 0;
				398	return packFloat32(zSign, zExp, zSig);
				399
				400	}
				401
				402	static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
				403	{
				404	int8 shiftCount;
				405
				406	shiftCount = countLeadingZeros32(zSig) - 1;
				407	return roundAndPackFloat32(zSign, zExp - shiftCount,
				408	zSig << shiftCount);
				409	}
				410
				411	static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
				412	{
				413	flag roundNearestEven;
				414	int16 roundIncrement, roundBits;
				415	flag isTiny;
				416
				417	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
				418	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
				419	roundIncrement = 0x200;
				420	if (!roundNearestEven) {
				421	roundIncrement = 0;
				422	}
				423	roundBits = zSig & 0x3FF;
				424	if (0x7FD <= (bits16) zExp) {
				425	if ((0x7FD < zExp)
				426	\|\| ((zExp == 0x7FD)
				427	&& ((sbits64) (zSig + roundIncrement) < 0))
				428	) {
				429	float_raise(FPSCR_CAUSE_OVERFLOW \| FPSCR_CAUSE_INEXACT);
				430	return packFloat64(zSign, 0x7FF,
				431	0) - (roundIncrement == 0);
				432	}
				433	if (zExp < 0) {
				434	isTiny = (zExp < -1)
				435	\|\| (zSig + roundIncrement <
				436	LIT64(0x8000000000000000));
				437	shift64RightJamming(zSig, -zExp, &zSig);
				438	zExp = 0;
				439	roundBits = zSig & 0x3FF;
				440	if (isTiny && roundBits)
				441	float_raise(FPSCR_CAUSE_UNDERFLOW);
				442	}
				443	}
				444	if (roundBits)
				445	float_raise(FPSCR_CAUSE_INEXACT);
				446	zSig = (zSig + roundIncrement) >> 10;
				447	zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
				448	if (zSig == 0)
				449	zExp = 0;
				450	return packFloat64(zSign, zExp, zSig);
				451
				452	}
				453
				454	static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
				455	{
				456	int16 aExp, bExp, zExp;
				457	bits32 aSig, bSig, zSig;
				458	int16 expDiff;
				459
				460	aSig = extractFloat32Frac(a);
				461	aExp = extractFloat32Exp(a);
				462	bSig = extractFloat32Frac(b);
				463	bExp = extractFloat32Exp(b);
				464	expDiff = aExp - bExp;
				465	aSig <<= 7;
				466	bSig <<= 7;
				467	if (0 < expDiff)
				468	goto aExpBigger;
				469	if (expDiff < 0)
				470	goto bExpBigger;
				471	if (aExp == 0) {
				472	aExp = 1;
				473	bExp = 1;
				474	}
				475	if (bSig < aSig)
				476	goto aBigger;
				477	if (aSig < bSig)
				478	goto bBigger;
				479	return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
				480	bExpBigger:
				481	if (bExp == 0xFF) {
				482	return packFloat32(zSign ^ 1, 0xFF, 0);
				483	}
				484	if (aExp == 0) {
				485	++expDiff;
				486	} else {
				487	aSig \|= 0x40000000;
				488	}
				489	shift32RightJamming(aSig, -expDiff, &aSig);
				490	bSig \|= 0x40000000;
				491	bBigger:
				492	zSig = bSig - aSig;
				493	zExp = bExp;
				494	zSign ^= 1;
				495	goto normalizeRoundAndPack;
				496	aExpBigger:
				497	if (aExp == 0xFF) {
				498	return a;
				499	}
				500	if (bExp == 0) {
				501	--expDiff;
				502	} else {
				503	bSig \|= 0x40000000;
				504	}
				505	shift32RightJamming(bSig, expDiff, &bSig);
				506	aSig \|= 0x40000000;
				507	aBigger:
				508	zSig = aSig - bSig;
				509	zExp = aExp;
				510	normalizeRoundAndPack:
				511	--zExp;
				512	return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
				513
				514	}
				515
				516	static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
				517	{
				518	int16 aExp, bExp, zExp;
				519	bits32 aSig, bSig, zSig;
				520	int16 expDiff;
				521
				522	aSig = extractFloat32Frac(a);
				523	aExp = extractFloat32Exp(a);
				524	bSig = extractFloat32Frac(b);
				525	bExp = extractFloat32Exp(b);
				526	expDiff = aExp - bExp;
				527	aSig <<= 6;
				528	bSig <<= 6;
				529	if (0 < expDiff) {
				530	if (aExp == 0xFF) {
				531	return a;
				532	}
				533	if (bExp == 0) {
				534	--expDiff;
				535	} else {
				536	bSig \|= 0x20000000;
				537	}
				538	shift32RightJamming(bSig, expDiff, &bSig);
				539	zExp = aExp;
				540	} else if (expDiff < 0) {
				541	if (bExp == 0xFF) {
				542	return packFloat32(zSign, 0xFF, 0);
				543	}
				544	if (aExp == 0) {
				545	++expDiff;
				546	} else {
				547	aSig \|= 0x20000000;
				548	}
				549	shift32RightJamming(aSig, -expDiff, &aSig);
				550	zExp = bExp;
				551	} else {
				552	if (aExp == 0xFF) {
				553	return a;
				554	}
				555	if (aExp == 0)
				556	return packFloat32(zSign, 0, (aSig + bSig) >> 6);
				557	zSig = 0x40000000 + aSig + bSig;
				558	zExp = aExp;
				559	goto roundAndPack;
				560	}
				561	aSig \|= 0x20000000;
				562	zSig = (aSig + bSig) << 1;
				563	--zExp;
				564	if ((sbits32) zSig < 0) {
				565	zSig = aSig + bSig;
				566	++zExp;
				567	}
				568	roundAndPack:
				569	return roundAndPackFloat32(zSign, zExp, zSig);
				570
				571	}
				572
				573	float64 float64_sub(float64 a, float64 b)
				574	{
				575	flag aSign, bSign;
				576
				577	aSign = extractFloat64Sign(a);
				578	bSign = extractFloat64Sign(b);
				579	if (aSign == bSign) {
				580	return subFloat64Sigs(a, b, aSign);
				581	} else {
				582	return addFloat64Sigs(a, b, aSign);
				583	}
				584
				585	}
				586
				587	float32 float32_sub(float32 a, float32 b)
				588	{
				589	flag aSign, bSign;
				590
				591	aSign = extractFloat32Sign(a);
				592	bSign = extractFloat32Sign(b);
				593	if (aSign == bSign) {
				594	return subFloat32Sigs(a, b, aSign);
				595	} else {
				596	return addFloat32Sigs(a, b, aSign);
				597	}
				598
				599	}
				600
				601	float32 float32_add(float32 a, float32 b)
				602	{
				603	flag aSign, bSign;
				604
				605	aSign = extractFloat32Sign(a);
				606	bSign = extractFloat32Sign(b);
				607	if (aSign == bSign) {
				608	return addFloat32Sigs(a, b, aSign);
				609	} else {
				610	return subFloat32Sigs(a, b, aSign);
				611	}
				612
				613	}
				614
				615	float64 float64_add(float64 a, float64 b)
				616	{
				617	flag aSign, bSign;
				618
				619	aSign = extractFloat64Sign(a);
				620	bSign = extractFloat64Sign(b);
				621	if (aSign == bSign) {
				622	return addFloat64Sigs(a, b, aSign);
				623	} else {
				624	return subFloat64Sigs(a, b, aSign);
				625	}
				626	}
				627
				628	static void
				629	normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
				630	{
				631	int8 shiftCount;
				632
				633	shiftCount = countLeadingZeros64(aSig) - 11;
				634	*zSigPtr = aSig << shiftCount;
				635	*zExpPtr = 1 - shiftCount;
				636	}
				637
				638	void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
				639	bits64 * z1Ptr)
				640	{
				641	bits64 z1;
				642
				643	z1 = a1 + b1;
				644	*z1Ptr = z1;
				645	*z0Ptr = a0 + b0 + (z1 < a1);
				646	}
				647
				648	void
				649	sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
				650	bits64 * z1Ptr)
				651	{
				652	*z1Ptr = a1 - b1;
				653	*z0Ptr = a0 - b0 - (a1 < b1);
				654	}
				655
				656	static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
				657	{
				658	bits64 b0, b1;
				659	bits64 rem0, rem1, term0, term1;
				660	bits64 z, tmp;
				661	if (b <= a0)
				662	return LIT64(0xFFFFFFFFFFFFFFFF);
				663	b0 = b >> 32;
				664	tmp = a0;
				665	do_div(tmp, b0);
				666
				667	z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
				668	mul64To128(b, z, &term0, &term1);
				669	sub128(a0, a1, term0, term1, &rem0, &rem1);
				670	while (((sbits64) rem0) < 0) {
				671	z -= LIT64(0x100000000);
				672	b1 = b << 32;
				673	add128(rem0, rem1, b0, b1, &rem0, &rem1);
				674	}
				675	rem0 = (rem0 << 32) \| (rem1 >> 32);
				676	tmp = rem0;
				677	do_div(tmp, b0);
				678	z \|= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
				679	return z;
				680	}
				681
				682	void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
				683	{
				684	bits32 aHigh, aLow, bHigh, bLow;
				685	bits64 z0, zMiddleA, zMiddleB, z1;
				686
				687	aLow = a;
				688	aHigh = a >> 32;
				689	bLow = b;
				690	bHigh = b >> 32;
				691	z1 = ((bits64) aLow) * bLow;
				692	zMiddleA = ((bits64) aLow) * bHigh;
				693	zMiddleB = ((bits64) aHigh) * bLow;
				694	z0 = ((bits64) aHigh) * bHigh;
				695	zMiddleA += zMiddleB;
				696	z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
				697	zMiddleA <<= 32;
				698	z1 += zMiddleA;
				699	z0 += (z1 < zMiddleA);
				700	*z1Ptr = z1;
				701	*z0Ptr = z0;
				702
				703	}
				704
				705	static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
				706	bits32 * zSigPtr)
				707	{
				708	int8 shiftCount;
				709
				710	shiftCount = countLeadingZeros32(aSig) - 8;
				711	*zSigPtr = aSig << shiftCount;
				712	*zExpPtr = 1 - shiftCount;
				713
				714	}
				715
				716	float64 float64_div(float64 a, float64 b)
				717	{
				718	flag aSign, bSign, zSign;
				719	int16 aExp, bExp, zExp;
				720	bits64 aSig, bSig, zSig;
				721	bits64 rem0, rem1;
				722	bits64 term0, term1;
				723
				724	aSig = extractFloat64Frac(a);
				725	aExp = extractFloat64Exp(a);
				726	aSign = extractFloat64Sign(a);
				727	bSig = extractFloat64Frac(b);
				728	bExp = extractFloat64Exp(b);
				729	bSign = extractFloat64Sign(b);
				730	zSign = aSign ^ bSign;
				731	if (aExp == 0x7FF) {
				732	if (bExp == 0x7FF) {
				733	}
				734	return packFloat64(zSign, 0x7FF, 0);
				735	}
				736	if (bExp == 0x7FF) {
				737	return packFloat64(zSign, 0, 0);
				738	}
				739	if (bExp == 0) {
				740	if (bSig == 0) {
				741	if ((aExp \| aSig) == 0) {
				742	float_raise(FPSCR_CAUSE_INVALID);
				743	}
				744	return packFloat64(zSign, 0x7FF, 0);
				745	}
				746	normalizeFloat64Subnormal(bSig, &bExp, &bSig);
				747	}
				748	if (aExp == 0) {
				749	if (aSig == 0)
				750	return packFloat64(zSign, 0, 0);
				751	normalizeFloat64Subnormal(aSig, &aExp, &aSig);
				752	}
				753	zExp = aExp - bExp + 0x3FD;
				754	aSig = (aSig \| LIT64(0x0010000000000000)) << 10;
				755	bSig = (bSig \| LIT64(0x0010000000000000)) << 11;
				756	if (bSig <= (aSig + aSig)) {
				757	aSig >>= 1;
				758	++zExp;
				759	}
				760	zSig = estimateDiv128To64(aSig, 0, bSig);
				761	if ((zSig & 0x1FF) <= 2) {
				762	mul64To128(bSig, zSig, &term0, &term1);
				763	sub128(aSig, 0, term0, term1, &rem0, &rem1);
				764	while ((sbits64) rem0 < 0) {
				765	--zSig;
				766	add128(rem0, rem1, 0, bSig, &rem0, &rem1);
				767	}
				768	zSig \|= (rem1 != 0);
				769	}
				770	return roundAndPackFloat64(zSign, zExp, zSig);
				771
				772	}
				773
				774	float32 float32_div(float32 a, float32 b)
				775	{
				776	flag aSign, bSign, zSign;
				777	int16 aExp, bExp, zExp;
				778	bits32 aSig, bSig;
				779	uint64_t zSig;
				780
				781	aSig = extractFloat32Frac(a);
				782	aExp = extractFloat32Exp(a);
				783	aSign = extractFloat32Sign(a);
				784	bSig = extractFloat32Frac(b);
				785	bExp = extractFloat32Exp(b);
				786	bSign = extractFloat32Sign(b);
				787	zSign = aSign ^ bSign;
				788	if (aExp == 0xFF) {
				789	if (bExp == 0xFF) {
				790	}
				791	return packFloat32(zSign, 0xFF, 0);
				792	}
				793	if (bExp == 0xFF) {
				794	return packFloat32(zSign, 0, 0);
				795	}
				796	if (bExp == 0) {
				797	if (bSig == 0) {
				798	return packFloat32(zSign, 0xFF, 0);
				799	}
				800	normalizeFloat32Subnormal(bSig, &bExp, &bSig);
				801	}
				802	if (aExp == 0) {
				803	if (aSig == 0)
				804	return packFloat32(zSign, 0, 0);
				805	normalizeFloat32Subnormal(aSig, &aExp, &aSig);
				806	}
				807	zExp = aExp - bExp + 0x7D;
				808	aSig = (aSig \| 0x00800000) << 7;
				809	bSig = (bSig \| 0x00800000) << 8;
				810	if (bSig <= (aSig + aSig)) {
				811	aSig >>= 1;
				812	++zExp;
				813	}
				814	zSig = (((bits64) aSig) << 32);
				815	do_div(zSig, bSig);
				816
				817	if ((zSig & 0x3F) == 0) {
				818	zSig \|= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
				819	}
				820	return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
				821
				822	}
				823
				824	float32 float32_mul(float32 a, float32 b)
				825	{
				826	char aSign, bSign, zSign;
				827	int aExp, bExp, zExp;
				828	unsigned int aSig, bSig;
				829	unsigned long long zSig64;
				830	unsigned int zSig;
				831
				832	aSig = extractFloat32Frac(a);
				833	aExp = extractFloat32Exp(a);
				834	aSign = extractFloat32Sign(a);
				835	bSig = extractFloat32Frac(b);
				836	bExp = extractFloat32Exp(b);
				837	bSign = extractFloat32Sign(b);
				838	zSign = aSign ^ bSign;
				839	if (aExp == 0) {
				840	if (aSig == 0)
				841	return packFloat32(zSign, 0, 0);
				842	normalizeFloat32Subnormal(aSig, &aExp, &aSig);
				843	}
				844	if (bExp == 0) {
				845	if (bSig == 0)
				846	return packFloat32(zSign, 0, 0);
				847	normalizeFloat32Subnormal(bSig, &bExp, &bSig);
				848	}
				849	if ((bExp == 0xff && bSig == 0) \|\| (aExp == 0xff && aSig == 0))
				850	return roundAndPackFloat32(zSign, 0xff, 0);
				851
				852	zExp = aExp + bExp - 0x7F;
				853	aSig = (aSig \| 0x00800000) << 7;
				854	bSig = (bSig \| 0x00800000) << 8;
				855	shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
				856	zSig = zSig64;
				857	if (0 <= (signed int)(zSig << 1)) {
				858	zSig <<= 1;
				859	--zExp;
				860	}
				861	return roundAndPackFloat32(zSign, zExp, zSig);
				862
				863	}
				864
				865	float64 float64_mul(float64 a, float64 b)
				866	{
				867	char aSign, bSign, zSign;
				868	int aExp, bExp, zExp;
				869	unsigned long long int aSig, bSig, zSig0, zSig1;
				870
				871	aSig = extractFloat64Frac(a);
				872	aExp = extractFloat64Exp(a);
				873	aSign = extractFloat64Sign(a);
				874	bSig = extractFloat64Frac(b);
				875	bExp = extractFloat64Exp(b);
				876	bSign = extractFloat64Sign(b);
				877	zSign = aSign ^ bSign;
				878
				879	if (aExp == 0) {
				880	if (aSig == 0)
				881	return packFloat64(zSign, 0, 0);
				882	normalizeFloat64Subnormal(aSig, &aExp, &aSig);
				883	}
				884	if (bExp == 0) {
				885	if (bSig == 0)
				886	return packFloat64(zSign, 0, 0);
				887	normalizeFloat64Subnormal(bSig, &bExp, &bSig);
				888	}
				889	if ((aExp == 0x7ff && aSig == 0) \|\| (bExp == 0x7ff && bSig == 0))
				890	return roundAndPackFloat64(zSign, 0x7ff, 0);
				891
				892	zExp = aExp + bExp - 0x3FF;
				893	aSig = (aSig \| 0x0010000000000000LL) << 10;
				894	bSig = (bSig \| 0x0010000000000000LL) << 11;
				895	mul64To128(aSig, bSig, &zSig0, &zSig1);
				896	zSig0 \|= (zSig1 != 0);
				897	if (0 <= (signed long long int)(zSig0 << 1)) {
				898	zSig0 <<= 1;
				899	--zExp;
				900	}
				901	return roundAndPackFloat64(zSign, zExp, zSig0);
				902	}
				903
				904	/*
				905	* -------------------------------------------------------------------------------
				906	* Returns the result of converting the double-precision floating-point value
				907	* `a' to the single-precision floating-point format. The conversion is
				908	* performed according to the IEC/IEEE Standard for Binary Floating-point
				909	* Arithmetic.
				910	* -------------------------------------------------------------------------------
				911	* */
				912	float32 float64_to_float32(float64 a)
				913	{
				914	flag aSign;
				915	int16 aExp;
				916	bits64 aSig;
				917	bits32 zSig;
				918
				919	aSig = extractFloat64Frac( a );
				920	aExp = extractFloat64Exp( a );
				921	aSign = extractFloat64Sign( a );
				922
				923	shift64RightJamming( aSig, 22, &aSig );
				924	zSig = aSig;
				925	if ( aExp \|\| zSig ) {
				926	zSig \|= 0x40000000;
				927	aExp -= 0x381;
				928	}
				929	return roundAndPackFloat32(aSign, aExp, zSig);
				930	}