| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 1996-2016 Free Software Foundation, Inc. | 
 | 2 |    This file is part of the GNU C Library. | 
 | 3 |    Contributed by Richard Henderson <rth@tamu.edu>. | 
 | 4 |  | 
 | 5 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 6 |    modify it under the terms of the GNU Lesser General Public | 
 | 7 |    License as published by the Free Software Foundation; either | 
 | 8 |    version 2.1 of the License, or (at your option) any later version. | 
 | 9 |  | 
 | 10 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 11 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 12 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 13 |    Lesser General Public License for more details. | 
 | 14 |  | 
 | 15 |    You should have received a copy of the GNU Lesser General Public | 
 | 16 |    License along with the GNU C Library.  If not, see | 
 | 17 |    <http://www.gnu.org/licenses/>.  */ | 
 | 18 |  | 
 | 19 | #include "div_libc.h" | 
 | 20 |  | 
 | 21 | #undef FRAME | 
 | 22 | #ifdef __alpha_fix__ | 
 | 23 | #define FRAME 0 | 
 | 24 | #else | 
 | 25 | #define FRAME 16 | 
 | 26 | #endif | 
 | 27 |  | 
 | 28 | #undef X | 
 | 29 | #undef Y | 
 | 30 | #define X $17 | 
 | 31 | #define Y $18 | 
 | 32 |  | 
 | 33 | 	.set noat | 
 | 34 |  | 
 | 35 | 	.align 4 | 
 | 36 | 	.globl ldiv | 
 | 37 | 	.ent ldiv | 
 | 38 | ldiv: | 
 | 39 | 	.frame sp, FRAME, ra | 
 | 40 | #if FRAME > 0 | 
 | 41 | 	lda	sp, -FRAME(sp) | 
 | 42 | #endif | 
 | 43 | #ifdef PROF | 
 | 44 | 	.set	macro | 
 | 45 | 	ldgp	gp, 0(pv) | 
 | 46 | 	lda	AT, _mcount | 
 | 47 | 	jsr	AT, (AT), _mcount | 
 | 48 | 	.set	nomacro | 
 | 49 | 	.prologue 1 | 
 | 50 | #else | 
 | 51 | 	.prologue 0 | 
 | 52 | #endif | 
 | 53 |  | 
 | 54 | 	beq	Y, $divbyzero | 
 | 55 | 	excb | 
 | 56 | 	mf_fpcr	$f10 | 
 | 57 |  | 
 | 58 | 	_ITOFT2	X, $f0, 0, Y, $f1, 8 | 
 | 59 |  | 
 | 60 | 	.align	4 | 
 | 61 | 	cvtqt	$f0, $f0 | 
 | 62 | 	cvtqt	$f1, $f1 | 
 | 63 | 	divt/c	$f0, $f1, $f0 | 
 | 64 | 	unop | 
 | 65 |  | 
 | 66 | 	/* Check to see if X fit in the double as an exact value.  */ | 
 | 67 | 	sll	X, (64-53), AT | 
 | 68 | 	sra	AT, (64-53), AT | 
 | 69 | 	cmpeq	X, AT, AT | 
 | 70 | 	beq	AT, $x_big | 
 | 71 |  | 
 | 72 | 	/* If we get here, we're expecting exact results from the division. | 
 | 73 | 	   Do nothing else besides convert and clean up.  */ | 
 | 74 | 	cvttq/c	$f0, $f0 | 
 | 75 | 	excb | 
 | 76 | 	mt_fpcr	$f10 | 
 | 77 | 	_FTOIT	$f0, $0, 0 | 
 | 78 |  | 
 | 79 | $egress: | 
 | 80 | 	mulq	$0, Y, $1 | 
 | 81 | 	subq	X, $1, $1 | 
 | 82 |  | 
 | 83 | 	stq	$0, 0($16) | 
 | 84 | 	stq	$1, 8($16) | 
 | 85 | 	mov	$16, $0 | 
 | 86 |  | 
 | 87 | #if FRAME > 0 | 
 | 88 | 	lda	sp, FRAME(sp) | 
 | 89 | #endif | 
 | 90 | 	ret | 
 | 91 |  | 
 | 92 | 	.align	4 | 
 | 93 | $x_big: | 
 | 94 | 	/* If we get here, X is large enough that we don't expect exact | 
 | 95 | 	   results, and neither X nor Y got mis-translated for the fp | 
 | 96 | 	   division.  Our task is to take the fp result, figure out how | 
 | 97 | 	   far it's off from the correct result and compute a fixup.  */ | 
 | 98 |  | 
 | 99 | #define Q	v0		/* quotient */ | 
 | 100 | #define R	t0		/* remainder */ | 
 | 101 | #define SY	t1		/* scaled Y */ | 
 | 102 | #define S	t2		/* scalar */ | 
 | 103 | #define QY	t3		/* Q*Y */ | 
 | 104 |  | 
 | 105 | 	/* The fixup code below can only handle unsigned values.  */ | 
 | 106 | 	or	X, Y, AT | 
 | 107 | 	mov	$31, t5 | 
 | 108 | 	blt	AT, $fix_sign_in | 
 | 109 | $fix_sign_in_ret1: | 
 | 110 | 	cvttq/c	$f0, $f0 | 
 | 111 |  | 
 | 112 | 	_FTOIT	$f0, Q, 8 | 
 | 113 | $fix_sign_in_ret2: | 
 | 114 | 	mulq	Q, Y, QY | 
 | 115 | 	excb | 
 | 116 | 	mt_fpcr	$f10 | 
 | 117 |  | 
 | 118 | 	.align	4 | 
 | 119 | 	subq	QY, X, R | 
 | 120 | 	mov	Y, SY | 
 | 121 | 	mov	1, S | 
 | 122 | 	bgt	R, $q_high | 
 | 123 |  | 
 | 124 | $q_high_ret: | 
 | 125 | 	subq	X, QY, R | 
 | 126 | 	mov	Y, SY | 
 | 127 | 	mov	1, S | 
 | 128 | 	bgt	R, $q_low | 
 | 129 |  | 
 | 130 | $q_low_ret: | 
 | 131 | 	negq	Q, t4 | 
 | 132 | 	cmovlbs	t5, t4, Q | 
 | 133 | 	br	$egress | 
 | 134 |  | 
 | 135 | 	.align	4 | 
 | 136 | 	/* The quotient that we computed was too large.  We need to reduce | 
 | 137 | 	   it by S such that Y*S >= R.  Obviously the closer we get to the | 
 | 138 | 	   correct value the better, but overshooting high is ok, as we'll | 
 | 139 | 	   fix that up later.  */ | 
 | 140 | 0: | 
 | 141 | 	addq	SY, SY, SY | 
 | 142 | 	addq	S, S, S | 
 | 143 | $q_high: | 
 | 144 | 	cmpult	SY, R, AT | 
 | 145 | 	bne	AT, 0b | 
 | 146 |  | 
 | 147 | 	subq	Q, S, Q | 
 | 148 | 	unop | 
 | 149 | 	subq	QY, SY, QY | 
 | 150 | 	br	$q_high_ret | 
 | 151 |  | 
 | 152 | 	.align	4 | 
 | 153 | 	/* The quotient that we computed was too small.  Divide Y by the | 
 | 154 | 	   current remainder (R) and add that to the existing quotient (Q). | 
 | 155 | 	   The expectation, of course, is that R is much smaller than X.  */ | 
 | 156 | 	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We | 
 | 157 | 	   already have a copy of Y in SY and the value 1 in S.  */ | 
 | 158 | 0: | 
 | 159 | 	addq	SY, SY, SY | 
 | 160 | 	addq	S, S, S | 
 | 161 | $q_low: | 
 | 162 | 	cmpult	SY, R, AT | 
 | 163 | 	bne	AT, 0b | 
 | 164 |  | 
 | 165 | 	/* Shift-down and subtract loop.  Each iteration compares our scaled | 
 | 166 | 	   Y (SY) with the remainder (R); if SY <= R then X is divisible by | 
 | 167 | 	   Y's scalar (S) so add it to the quotient (Q).  */ | 
 | 168 | 2:	addq	Q, S, t3 | 
 | 169 | 	srl	S, 1, S | 
 | 170 | 	cmpule	SY, R, AT | 
 | 171 | 	subq	R, SY, t4 | 
 | 172 |  | 
 | 173 | 	cmovne	AT, t3, Q | 
 | 174 | 	cmovne	AT, t4, R | 
 | 175 | 	srl	SY, 1, SY | 
 | 176 | 	bne	S, 2b | 
 | 177 |  | 
 | 178 | 	br	$q_low_ret | 
 | 179 |  | 
 | 180 | 	.align	4 | 
 | 181 | $fix_sign_in: | 
 | 182 | 	/* If we got here, then X|Y is negative.  Need to adjust everything | 
 | 183 | 	   such that we're doing unsigned division in the fixup loop.  */ | 
 | 184 | 	/* T5 is true if result should be negative.  */ | 
 | 185 | 	xor	X, Y, AT | 
 | 186 | 	cmplt	AT, 0, t5 | 
 | 187 | 	cmplt	X, 0, AT | 
 | 188 | 	negq	X, t0 | 
 | 189 |  | 
 | 190 | 	cmovne	AT, t0, X | 
 | 191 | 	cmplt	Y, 0, AT | 
 | 192 | 	negq	Y, t0 | 
 | 193 |  | 
 | 194 | 	cmovne	AT, t0, Y | 
 | 195 | 	blbc	t5, $fix_sign_in_ret1 | 
 | 196 |  | 
 | 197 | 	cvttq/c	$f0, $f0 | 
 | 198 | 	_FTOIT	$f0, Q, 8 | 
 | 199 | 	.align	3 | 
 | 200 | 	negq	Q, Q | 
 | 201 | 	br	$fix_sign_in_ret2 | 
 | 202 |  | 
 | 203 | $divbyzero: | 
 | 204 | 	mov	a0, v0 | 
 | 205 | 	lda	a0, GEN_INTDIV | 
 | 206 | 	call_pal PAL_gentrap | 
 | 207 | 	stq	zero, 0(v0) | 
 | 208 | 	stq	zero, 8(v0) | 
 | 209 |  | 
 | 210 | #if FRAME > 0 | 
 | 211 | 	lda	sp, FRAME(sp) | 
 | 212 | #endif | 
 | 213 | 	ret | 
 | 214 |  | 
 | 215 | 	.end	ldiv | 
 | 216 |  | 
 | 217 | weak_alias (ldiv, lldiv) | 
 | 218 | weak_alias (ldiv, imaxdiv) |