| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 2004-2016 Free Software Foundation, Inc. | 
 | 2 |    This file is part of the GNU C Library. | 
 | 3 |  | 
 | 4 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 5 |    modify it under the terms of the GNU Lesser General Public | 
 | 6 |    License as published by the Free Software Foundation; either | 
 | 7 |    version 2.1 of the License, or (at your option) any later version. | 
 | 8 |  | 
 | 9 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 10 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 11 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 12 |    Lesser General Public License for more details. | 
 | 13 |  | 
 | 14 |    You should have received a copy of the GNU Lesser General Public | 
 | 15 |    License along with the GNU C Library.  If not, see | 
 | 16 |    <http://www.gnu.org/licenses/>.  */ | 
 | 17 |  | 
 | 18 | #include "div_libc.h" | 
 | 19 |  | 
 | 20 |  | 
 | 21 | /* 64-bit unsigned long remainder.  These are not normal C functions.  Argument | 
 | 22 |    registers are t10 and t11, the result goes in t12.  Only t12 and AT may be | 
 | 23 |    clobbered. | 
 | 24 |  | 
 | 25 |    Theory of operation here is that we can use the FPU divider for virtually | 
 | 26 |    all operands that we see: all dividend values between -2**53 and 2**53-1 | 
 | 27 |    can be computed directly.  Note that divisor values need not be checked | 
 | 28 |    against that range because the rounded fp value will be close enough such | 
 | 29 |    that the quotient is < 1, which will properly be truncated to zero when we | 
 | 30 |    convert back to integer. | 
 | 31 |  | 
 | 32 |    When the dividend is outside the range for which we can compute exact | 
 | 33 |    results, we use the fp quotent as an estimate from which we begin refining | 
 | 34 |    an exact integral value.  This reduces the number of iterations in the | 
 | 35 |    shift-and-subtract loop significantly. | 
 | 36 |  | 
 | 37 |    The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE | 
 | 38 |    for cvttq/c even without /sui being set.  It will not, however, properly | 
 | 39 |    raise the exception, so we don't have to worry about FPCR_INED being clear | 
 | 40 |    and so dying by SIGFPE.  */ | 
 | 41 |  | 
 | 42 | 	.text | 
 | 43 | 	.align	4 | 
 | 44 | 	.globl	__remqu | 
 | 45 | 	.type	__remqu, @funcnoplt | 
 | 46 | 	.usepv	__remqu, no | 
 | 47 |  | 
 | 48 | 	cfi_startproc | 
 | 49 | 	cfi_return_column (RA) | 
 | 50 | __remqu: | 
 | 51 | 	lda	sp, -FRAME(sp) | 
 | 52 | 	cfi_def_cfa_offset (FRAME) | 
 | 53 | 	CALL_MCOUNT | 
 | 54 |  | 
 | 55 | 	/* Get the fp divide insn issued as quickly as possible.  After | 
 | 56 | 	   that's done, we have at least 22 cycles until its results are | 
 | 57 | 	   ready -- all the time in the world to figure out how we're | 
 | 58 | 	   going to use the results.  */ | 
 | 59 | 	subq	Y, 1, AT | 
 | 60 | 	stt	$f0, 0(sp) | 
 | 61 | 	and	Y, AT, AT | 
 | 62 |  | 
 | 63 | 	stt	$f1, 8(sp) | 
 | 64 | 	excb | 
 | 65 | 	stt	$f3, 48(sp) | 
 | 66 | 	beq	AT, $powerof2 | 
 | 67 | 	cfi_rel_offset ($f0, 0) | 
 | 68 | 	cfi_rel_offset ($f1, 8) | 
 | 69 | 	cfi_rel_offset ($f3, 48) | 
 | 70 |  | 
 | 71 | 	_ITOFT2	X, $f0, 16, Y, $f1, 24 | 
 | 72 | 	mf_fpcr	$f3 | 
 | 73 | 	cvtqt	$f0, $f0 | 
 | 74 | 	cvtqt	$f1, $f1 | 
 | 75 |  | 
 | 76 | 	blt	X, $x_is_neg | 
 | 77 | 	divt/c	$f0, $f1, $f0 | 
 | 78 |  | 
 | 79 | 	/* Check to see if Y was mis-converted as signed value.  */ | 
 | 80 | 	ldt	$f1, 8(sp) | 
 | 81 | 	blt	Y, $y_is_neg | 
 | 82 |  | 
 | 83 | 	/* Check to see if X fit in the double as an exact value.  */ | 
 | 84 | 	srl	X, 53, AT | 
 | 85 | 	bne	AT, $x_big | 
 | 86 |  | 
 | 87 | 	/* If we get here, we're expecting exact results from the division. | 
 | 88 | 	   Do nothing else besides convert, compute remainder, clean up.  */ | 
 | 89 | 	cvttq/c	$f0, $f0 | 
 | 90 | 	excb | 
 | 91 | 	mt_fpcr	$f3 | 
 | 92 | 	_FTOIT	$f0, AT, 16 | 
 | 93 |  | 
 | 94 | 	mulq	AT, Y, AT | 
 | 95 | 	ldt	$f0, 0(sp) | 
 | 96 | 	ldt	$f3, 48(sp) | 
 | 97 | 	lda	sp, FRAME(sp) | 
 | 98 | 	cfi_remember_state | 
 | 99 | 	cfi_restore ($f0) | 
 | 100 | 	cfi_restore ($f1) | 
 | 101 | 	cfi_restore ($f3) | 
 | 102 | 	cfi_def_cfa_offset (0) | 
 | 103 |  | 
 | 104 | 	.align	4 | 
 | 105 | 	subq	X, AT, RV | 
 | 106 | 	ret	$31, (RA), 1 | 
 | 107 |  | 
 | 108 | 	.align	4 | 
 | 109 | 	cfi_restore_state | 
 | 110 | $x_is_neg: | 
 | 111 | 	/* If we get here, X is so big that bit 63 is set, which made the | 
 | 112 | 	   conversion come out negative.  Fix it up lest we not even get | 
 | 113 | 	   a good estimate.  */ | 
 | 114 | 	ldah	AT, 0x5f80		/* 2**64 as float.  */ | 
 | 115 | 	stt	$f2, 24(sp) | 
 | 116 | 	cfi_rel_offset ($f2, 24) | 
 | 117 | 	_ITOFS	AT, $f2, 16 | 
 | 118 |  | 
 | 119 | 	addt	$f0, $f2, $f0 | 
 | 120 | 	divt/c	$f0, $f1, $f0 | 
 | 121 |  | 
 | 122 | 	/* Ok, we've now the divide issued.  Continue with other checks.  */ | 
 | 123 | 	.align	4 | 
 | 124 | 	ldt	$f1, 8(sp) | 
 | 125 | 	unop | 
 | 126 | 	ldt	$f2, 24(sp) | 
 | 127 | 	blt	Y, $y_is_neg | 
 | 128 | 	cfi_restore ($f1) | 
 | 129 | 	cfi_restore ($f2) | 
 | 130 | 	cfi_remember_state	/* for y_is_neg */ | 
 | 131 |  | 
 | 132 | 	.align	4 | 
 | 133 | $x_big: | 
 | 134 | 	/* If we get here, X is large enough that we don't expect exact | 
 | 135 | 	   results, and neither X nor Y got mis-translated for the fp | 
 | 136 | 	   division.  Our task is to take the fp result, figure out how | 
 | 137 | 	   far it's off from the correct result and compute a fixup.  */ | 
 | 138 | 	stq	t0, 16(sp) | 
 | 139 | 	stq	t1, 24(sp) | 
 | 140 | 	stq	t2, 32(sp) | 
 | 141 | 	stq	t3, 40(sp) | 
 | 142 | 	cfi_rel_offset (t0, 16) | 
 | 143 | 	cfi_rel_offset (t1, 24) | 
 | 144 | 	cfi_rel_offset (t2, 32) | 
 | 145 | 	cfi_rel_offset (t3, 40) | 
 | 146 |  | 
 | 147 | #define Q	t0		/* quotient */ | 
 | 148 | #define R	RV		/* remainder */ | 
 | 149 | #define SY	t1		/* scaled Y */ | 
 | 150 | #define S	t2		/* scalar */ | 
 | 151 | #define QY	t3		/* Q*Y */ | 
 | 152 |  | 
 | 153 | 	cvttq/c	$f0, $f0 | 
 | 154 | 	_FTOIT	$f0, Q, 8 | 
 | 155 | 	mulq	Q, Y, QY | 
 | 156 |  | 
 | 157 | 	.align	4 | 
 | 158 | 	stq	t4, 8(sp) | 
 | 159 | 	excb | 
 | 160 | 	ldt	$f0, 0(sp) | 
 | 161 | 	mt_fpcr	$f3 | 
 | 162 | 	cfi_rel_offset (t4, 8) | 
 | 163 | 	cfi_restore ($f0) | 
 | 164 |  | 
 | 165 | 	subq	QY, X, R | 
 | 166 | 	mov	Y, SY | 
 | 167 | 	mov	1, S | 
 | 168 | 	bgt	R, $q_high | 
 | 169 |  | 
 | 170 | $q_high_ret: | 
 | 171 | 	subq	X, QY, R | 
 | 172 | 	mov	Y, SY | 
 | 173 | 	mov	1, S | 
 | 174 | 	bgt	R, $q_low | 
 | 175 |  | 
 | 176 | $q_low_ret: | 
 | 177 | 	ldq	t4, 8(sp) | 
 | 178 | 	ldq	t0, 16(sp) | 
 | 179 | 	ldq	t1, 24(sp) | 
 | 180 | 	ldq	t2, 32(sp) | 
 | 181 |  | 
 | 182 | 	ldq	t3, 40(sp) | 
 | 183 | 	ldt	$f3, 48(sp) | 
 | 184 | 	lda	sp, FRAME(sp) | 
 | 185 | 	cfi_remember_state | 
 | 186 | 	cfi_restore (t0) | 
 | 187 | 	cfi_restore (t1) | 
 | 188 | 	cfi_restore (t2) | 
 | 189 | 	cfi_restore (t3) | 
 | 190 | 	cfi_restore (t4) | 
 | 191 | 	cfi_restore ($f3) | 
 | 192 | 	cfi_def_cfa_offset (0) | 
 | 193 | 	ret	$31, (RA), 1 | 
 | 194 |  | 
 | 195 | 	.align	4 | 
 | 196 | 	cfi_restore_state | 
 | 197 | 	/* The quotient that we computed was too large.  We need to reduce | 
 | 198 | 	   it by S such that Y*S >= R.  Obviously the closer we get to the | 
 | 199 | 	   correct value the better, but overshooting high is ok, as we'll | 
 | 200 | 	   fix that up later.  */ | 
 | 201 | 0: | 
 | 202 | 	addq	SY, SY, SY | 
 | 203 | 	addq	S, S, S | 
 | 204 | $q_high: | 
 | 205 | 	cmpult	SY, R, AT | 
 | 206 | 	bne	AT, 0b | 
 | 207 |  | 
 | 208 | 	subq	Q, S, Q | 
 | 209 | 	unop | 
 | 210 | 	subq	QY, SY, QY | 
 | 211 | 	br	$q_high_ret | 
 | 212 |  | 
 | 213 | 	.align	4 | 
 | 214 | 	/* The quotient that we computed was too small.  Divide Y by the | 
 | 215 | 	   current remainder (R) and add that to the existing quotient (Q). | 
 | 216 | 	   The expectation, of course, is that R is much smaller than X.  */ | 
 | 217 | 	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We | 
 | 218 | 	   already have a copy of Y in SY and the value 1 in S.  */ | 
 | 219 | 0: | 
 | 220 | 	addq	SY, SY, SY | 
 | 221 | 	addq	S, S, S | 
 | 222 | $q_low: | 
 | 223 | 	cmpult	SY, R, AT | 
 | 224 | 	bne	AT, 0b | 
 | 225 |  | 
 | 226 | 	/* Shift-down and subtract loop.  Each iteration compares our scaled | 
 | 227 | 	   Y (SY) with the remainder (R); if SY <= R then X is divisible by | 
 | 228 | 	   Y's scalar (S) so add it to the quotient (Q).  */ | 
 | 229 | 2:	addq	Q, S, t3 | 
 | 230 | 	srl	S, 1, S | 
 | 231 | 	cmpule	SY, R, AT | 
 | 232 | 	subq	R, SY, t4 | 
 | 233 |  | 
 | 234 | 	cmovne	AT, t3, Q | 
 | 235 | 	cmovne	AT, t4, R | 
 | 236 | 	srl	SY, 1, SY | 
 | 237 | 	bne	S, 2b | 
 | 238 |  | 
 | 239 | 	br	$q_low_ret | 
 | 240 |  | 
 | 241 | 	.align	4 | 
 | 242 | 	cfi_restore_state | 
 | 243 | $y_is_neg: | 
 | 244 | 	/* If we get here, Y is so big that bit 63 is set.  The results | 
 | 245 | 	   from the divide will be completely wrong.  Fortunately, the | 
 | 246 | 	   quotient must be either 0 or 1, so the remainder must be X | 
 | 247 | 	   or X-Y, so just compute it directly.  */ | 
 | 248 | 	cmpule	Y, X, AT | 
 | 249 | 	subq	X, Y, RV | 
 | 250 | 	ldt	$f0, 0(sp) | 
 | 251 | 	cmoveq	AT, X, RV | 
 | 252 |  | 
 | 253 | 	lda	sp, FRAME(sp) | 
 | 254 | 	cfi_restore ($f0) | 
 | 255 | 	cfi_def_cfa_offset (0) | 
 | 256 | 	ret	$31, (RA), 1 | 
 | 257 |  | 
 | 258 | 	.align	4 | 
 | 259 | 	cfi_def_cfa_offset (FRAME) | 
 | 260 | $powerof2: | 
 | 261 | 	subq	Y, 1, AT | 
 | 262 | 	beq	Y, DIVBYZERO | 
 | 263 | 	and	X, AT, RV | 
 | 264 | 	lda	sp, FRAME(sp) | 
 | 265 | 	cfi_def_cfa_offset (0) | 
 | 266 | 	ret	$31, (RA), 1 | 
 | 267 |  | 
 | 268 | 	cfi_endproc | 
 | 269 | 	.size	__remqu, .-__remqu | 
 | 270 |  | 
 | 271 | 	DO_DIVBYZERO |