| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 2004-2016 Free Software Foundation, Inc. | 
|  | 2 | This file is part of the GNU C Library. | 
|  | 3 |  | 
|  | 4 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 5 | modify it under the terms of the GNU Lesser General Public | 
|  | 6 | License as published by the Free Software Foundation; either | 
|  | 7 | version 2.1 of the License, or (at your option) any later version. | 
|  | 8 |  | 
|  | 9 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 12 | Lesser General Public License for more details. | 
|  | 13 |  | 
|  | 14 | You should have received a copy of the GNU Lesser General Public | 
|  | 15 | License along with the GNU C Library.  If not, see | 
|  | 16 | <http://www.gnu.org/licenses/>.  */ | 
|  | 17 |  | 
|  | 18 | #include "div_libc.h" | 
|  | 19 |  | 
|  | 20 |  | 
|  | 21 | /* 64-bit unsigned long remainder.  These are not normal C functions.  Argument | 
|  | 22 | registers are t10 and t11, the result goes in t12.  Only t12 and AT may be | 
|  | 23 | clobbered. | 
|  | 24 |  | 
|  | 25 | Theory of operation here is that we can use the FPU divider for virtually | 
|  | 26 | all operands that we see: all dividend values between -2**53 and 2**53-1 | 
|  | 27 | can be computed directly.  Note that divisor values need not be checked | 
|  | 28 | against that range because the rounded fp value will be close enough such | 
|  | 29 | that the quotient is < 1, which will properly be truncated to zero when we | 
|  | 30 | convert back to integer. | 
|  | 31 |  | 
|  | 32 | When the dividend is outside the range for which we can compute exact | 
|  | 33 | results, we use the fp quotent as an estimate from which we begin refining | 
|  | 34 | an exact integral value.  This reduces the number of iterations in the | 
|  | 35 | shift-and-subtract loop significantly. | 
|  | 36 |  | 
|  | 37 | The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE | 
|  | 38 | for cvttq/c even without /sui being set.  It will not, however, properly | 
|  | 39 | raise the exception, so we don't have to worry about FPCR_INED being clear | 
|  | 40 | and so dying by SIGFPE.  */ | 
|  | 41 |  | 
|  | 42 | .text | 
|  | 43 | .align	4 | 
|  | 44 | .globl	__remqu | 
|  | 45 | .type	__remqu, @funcnoplt | 
|  | 46 | .usepv	__remqu, no | 
|  | 47 |  | 
|  | 48 | cfi_startproc | 
|  | 49 | cfi_return_column (RA) | 
|  | 50 | __remqu: | 
|  | 51 | lda	sp, -FRAME(sp) | 
|  | 52 | cfi_def_cfa_offset (FRAME) | 
|  | 53 | CALL_MCOUNT | 
|  | 54 |  | 
|  | 55 | /* Get the fp divide insn issued as quickly as possible.  After | 
|  | 56 | that's done, we have at least 22 cycles until its results are | 
|  | 57 | ready -- all the time in the world to figure out how we're | 
|  | 58 | going to use the results.  */ | 
|  | 59 | subq	Y, 1, AT | 
|  | 60 | stt	$f0, 0(sp) | 
|  | 61 | and	Y, AT, AT | 
|  | 62 |  | 
|  | 63 | stt	$f1, 8(sp) | 
|  | 64 | excb | 
|  | 65 | stt	$f3, 48(sp) | 
|  | 66 | beq	AT, $powerof2 | 
|  | 67 | cfi_rel_offset ($f0, 0) | 
|  | 68 | cfi_rel_offset ($f1, 8) | 
|  | 69 | cfi_rel_offset ($f3, 48) | 
|  | 70 |  | 
|  | 71 | _ITOFT2	X, $f0, 16, Y, $f1, 24 | 
|  | 72 | mf_fpcr	$f3 | 
|  | 73 | cvtqt	$f0, $f0 | 
|  | 74 | cvtqt	$f1, $f1 | 
|  | 75 |  | 
|  | 76 | blt	X, $x_is_neg | 
|  | 77 | divt/c	$f0, $f1, $f0 | 
|  | 78 |  | 
|  | 79 | /* Check to see if Y was mis-converted as signed value.  */ | 
|  | 80 | ldt	$f1, 8(sp) | 
|  | 81 | blt	Y, $y_is_neg | 
|  | 82 |  | 
|  | 83 | /* Check to see if X fit in the double as an exact value.  */ | 
|  | 84 | srl	X, 53, AT | 
|  | 85 | bne	AT, $x_big | 
|  | 86 |  | 
|  | 87 | /* If we get here, we're expecting exact results from the division. | 
|  | 88 | Do nothing else besides convert, compute remainder, clean up.  */ | 
|  | 89 | cvttq/c	$f0, $f0 | 
|  | 90 | excb | 
|  | 91 | mt_fpcr	$f3 | 
|  | 92 | _FTOIT	$f0, AT, 16 | 
|  | 93 |  | 
|  | 94 | mulq	AT, Y, AT | 
|  | 95 | ldt	$f0, 0(sp) | 
|  | 96 | ldt	$f3, 48(sp) | 
|  | 97 | lda	sp, FRAME(sp) | 
|  | 98 | cfi_remember_state | 
|  | 99 | cfi_restore ($f0) | 
|  | 100 | cfi_restore ($f1) | 
|  | 101 | cfi_restore ($f3) | 
|  | 102 | cfi_def_cfa_offset (0) | 
|  | 103 |  | 
|  | 104 | .align	4 | 
|  | 105 | subq	X, AT, RV | 
|  | 106 | ret	$31, (RA), 1 | 
|  | 107 |  | 
|  | 108 | .align	4 | 
|  | 109 | cfi_restore_state | 
|  | 110 | $x_is_neg: | 
|  | 111 | /* If we get here, X is so big that bit 63 is set, which made the | 
|  | 112 | conversion come out negative.  Fix it up lest we not even get | 
|  | 113 | a good estimate.  */ | 
|  | 114 | ldah	AT, 0x5f80		/* 2**64 as float.  */ | 
|  | 115 | stt	$f2, 24(sp) | 
|  | 116 | cfi_rel_offset ($f2, 24) | 
|  | 117 | _ITOFS	AT, $f2, 16 | 
|  | 118 |  | 
|  | 119 | addt	$f0, $f2, $f0 | 
|  | 120 | divt/c	$f0, $f1, $f0 | 
|  | 121 |  | 
|  | 122 | /* Ok, we've now the divide issued.  Continue with other checks.  */ | 
|  | 123 | .align	4 | 
|  | 124 | ldt	$f1, 8(sp) | 
|  | 125 | unop | 
|  | 126 | ldt	$f2, 24(sp) | 
|  | 127 | blt	Y, $y_is_neg | 
|  | 128 | cfi_restore ($f1) | 
|  | 129 | cfi_restore ($f2) | 
|  | 130 | cfi_remember_state	/* for y_is_neg */ | 
|  | 131 |  | 
|  | 132 | .align	4 | 
|  | 133 | $x_big: | 
|  | 134 | /* If we get here, X is large enough that we don't expect exact | 
|  | 135 | results, and neither X nor Y got mis-translated for the fp | 
|  | 136 | division.  Our task is to take the fp result, figure out how | 
|  | 137 | far it's off from the correct result and compute a fixup.  */ | 
|  | 138 | stq	t0, 16(sp) | 
|  | 139 | stq	t1, 24(sp) | 
|  | 140 | stq	t2, 32(sp) | 
|  | 141 | stq	t3, 40(sp) | 
|  | 142 | cfi_rel_offset (t0, 16) | 
|  | 143 | cfi_rel_offset (t1, 24) | 
|  | 144 | cfi_rel_offset (t2, 32) | 
|  | 145 | cfi_rel_offset (t3, 40) | 
|  | 146 |  | 
|  | 147 | #define Q	t0		/* quotient */ | 
|  | 148 | #define R	RV		/* remainder */ | 
|  | 149 | #define SY	t1		/* scaled Y */ | 
|  | 150 | #define S	t2		/* scalar */ | 
|  | 151 | #define QY	t3		/* Q*Y */ | 
|  | 152 |  | 
|  | 153 | cvttq/c	$f0, $f0 | 
|  | 154 | _FTOIT	$f0, Q, 8 | 
|  | 155 | mulq	Q, Y, QY | 
|  | 156 |  | 
|  | 157 | .align	4 | 
|  | 158 | stq	t4, 8(sp) | 
|  | 159 | excb | 
|  | 160 | ldt	$f0, 0(sp) | 
|  | 161 | mt_fpcr	$f3 | 
|  | 162 | cfi_rel_offset (t4, 8) | 
|  | 163 | cfi_restore ($f0) | 
|  | 164 |  | 
|  | 165 | subq	QY, X, R | 
|  | 166 | mov	Y, SY | 
|  | 167 | mov	1, S | 
|  | 168 | bgt	R, $q_high | 
|  | 169 |  | 
|  | 170 | $q_high_ret: | 
|  | 171 | subq	X, QY, R | 
|  | 172 | mov	Y, SY | 
|  | 173 | mov	1, S | 
|  | 174 | bgt	R, $q_low | 
|  | 175 |  | 
|  | 176 | $q_low_ret: | 
|  | 177 | ldq	t4, 8(sp) | 
|  | 178 | ldq	t0, 16(sp) | 
|  | 179 | ldq	t1, 24(sp) | 
|  | 180 | ldq	t2, 32(sp) | 
|  | 181 |  | 
|  | 182 | ldq	t3, 40(sp) | 
|  | 183 | ldt	$f3, 48(sp) | 
|  | 184 | lda	sp, FRAME(sp) | 
|  | 185 | cfi_remember_state | 
|  | 186 | cfi_restore (t0) | 
|  | 187 | cfi_restore (t1) | 
|  | 188 | cfi_restore (t2) | 
|  | 189 | cfi_restore (t3) | 
|  | 190 | cfi_restore (t4) | 
|  | 191 | cfi_restore ($f3) | 
|  | 192 | cfi_def_cfa_offset (0) | 
|  | 193 | ret	$31, (RA), 1 | 
|  | 194 |  | 
|  | 195 | .align	4 | 
|  | 196 | cfi_restore_state | 
|  | 197 | /* The quotient that we computed was too large.  We need to reduce | 
|  | 198 | it by S such that Y*S >= R.  Obviously the closer we get to the | 
|  | 199 | correct value the better, but overshooting high is ok, as we'll | 
|  | 200 | fix that up later.  */ | 
|  | 201 | 0: | 
|  | 202 | addq	SY, SY, SY | 
|  | 203 | addq	S, S, S | 
|  | 204 | $q_high: | 
|  | 205 | cmpult	SY, R, AT | 
|  | 206 | bne	AT, 0b | 
|  | 207 |  | 
|  | 208 | subq	Q, S, Q | 
|  | 209 | unop | 
|  | 210 | subq	QY, SY, QY | 
|  | 211 | br	$q_high_ret | 
|  | 212 |  | 
|  | 213 | .align	4 | 
|  | 214 | /* The quotient that we computed was too small.  Divide Y by the | 
|  | 215 | current remainder (R) and add that to the existing quotient (Q). | 
|  | 216 | The expectation, of course, is that R is much smaller than X.  */ | 
|  | 217 | /* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We | 
|  | 218 | already have a copy of Y in SY and the value 1 in S.  */ | 
|  | 219 | 0: | 
|  | 220 | addq	SY, SY, SY | 
|  | 221 | addq	S, S, S | 
|  | 222 | $q_low: | 
|  | 223 | cmpult	SY, R, AT | 
|  | 224 | bne	AT, 0b | 
|  | 225 |  | 
|  | 226 | /* Shift-down and subtract loop.  Each iteration compares our scaled | 
|  | 227 | Y (SY) with the remainder (R); if SY <= R then X is divisible by | 
|  | 228 | Y's scalar (S) so add it to the quotient (Q).  */ | 
|  | 229 | 2:	addq	Q, S, t3 | 
|  | 230 | srl	S, 1, S | 
|  | 231 | cmpule	SY, R, AT | 
|  | 232 | subq	R, SY, t4 | 
|  | 233 |  | 
|  | 234 | cmovne	AT, t3, Q | 
|  | 235 | cmovne	AT, t4, R | 
|  | 236 | srl	SY, 1, SY | 
|  | 237 | bne	S, 2b | 
|  | 238 |  | 
|  | 239 | br	$q_low_ret | 
|  | 240 |  | 
|  | 241 | .align	4 | 
|  | 242 | cfi_restore_state | 
|  | 243 | $y_is_neg: | 
|  | 244 | /* If we get here, Y is so big that bit 63 is set.  The results | 
|  | 245 | from the divide will be completely wrong.  Fortunately, the | 
|  | 246 | quotient must be either 0 or 1, so the remainder must be X | 
|  | 247 | or X-Y, so just compute it directly.  */ | 
|  | 248 | cmpule	Y, X, AT | 
|  | 249 | subq	X, Y, RV | 
|  | 250 | ldt	$f0, 0(sp) | 
|  | 251 | cmoveq	AT, X, RV | 
|  | 252 |  | 
|  | 253 | lda	sp, FRAME(sp) | 
|  | 254 | cfi_restore ($f0) | 
|  | 255 | cfi_def_cfa_offset (0) | 
|  | 256 | ret	$31, (RA), 1 | 
|  | 257 |  | 
|  | 258 | .align	4 | 
|  | 259 | cfi_def_cfa_offset (FRAME) | 
|  | 260 | $powerof2: | 
|  | 261 | subq	Y, 1, AT | 
|  | 262 | beq	Y, DIVBYZERO | 
|  | 263 | and	X, AT, RV | 
|  | 264 | lda	sp, FRAME(sp) | 
|  | 265 | cfi_def_cfa_offset (0) | 
|  | 266 | ret	$31, (RA), 1 | 
|  | 267 |  | 
|  | 268 | cfi_endproc | 
|  | 269 | .size	__remqu, .-__remqu | 
|  | 270 |  | 
|  | 271 | DO_DIVBYZERO |