blob: 280405b47e71942b01ed7d8afdff6cc22f33cfcc [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2 * Note: I added some stuff for use with gnupg
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
6 *
7 * This file is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
11 *
12 * This file is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 * License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this file; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 * MA 02111-1307, USA. */
21
22/* You have to define the following before including this file:
23 *
24 * UWtype -- An unsigned type, default type for operations (typically a "word")
25 * UHWtype -- An unsigned type, at least half the size of UWtype.
26 * UDWtype -- An unsigned type, at least twice as large a UWtype
27 * W_TYPE_SIZE -- size in bits of UWtype
28 *
29 * SItype, USItype -- Signed and unsigned 32 bit types.
30 * DItype, UDItype -- Signed and unsigned 64 bit types.
31 *
32 * On a 32 bit machine UWtype should typically be USItype;
33 * on a 64 bit machine, UWtype should typically be UDItype.
34*/
35
36#define __BITS4 (W_TYPE_SIZE / 4)
37#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
38#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
39#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
40
41/* This is used to make sure no undesirable sharing between different libraries
42 that use this file takes place. */
43#ifndef __MPN
44#define __MPN(x) __##x
45#endif
46
47/* Define auxiliary asm macros.
48 *
49 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
50 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
51 * word product in HIGH_PROD and LOW_PROD.
52 *
53 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
54 * UDWtype product. This is just a variant of umul_ppmm.
55
56 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
57 * denominator) divides a UDWtype, composed by the UWtype integers
58 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
59 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
60 * than DENOMINATOR for correct operation. If, in addition, the most
61 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
62 * UDIV_NEEDS_NORMALIZATION is defined to 1.
63 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
64 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient
65 * is rounded towards 0.
66 *
67 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
68 * msb to the first non-zero bit in the UWtype X. This is the number of
69 * steps X needs to be shifted left to set the msb. Undefined for X == 0,
70 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
71 *
72 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
73 * from the least significant end.
74 *
75 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
76 * high_addend_2, low_addend_2) adds two UWtype integers, composed by
77 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
78 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
79 * (i.e. carry out) is not stored anywhere, and is lost.
80 *
81 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
82 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
83 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
84 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
85 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
86 * and is lost.
87 *
88 * If any of these macros are left undefined for a particular CPU,
89 * C macros are used. */
90
91/* The CPUs come in alphabetical order below.
92 *
93 * Please add support for more CPUs here, or improve the current support
94 * for the CPUs below! */
95
96#if defined(__GNUC__) && !defined(NO_ASM)
97
98/* We sometimes need to clobber "cc" with gcc2, but that would not be
99 understood by gcc1. Use cpp to avoid major code duplication. */
100#if __GNUC__ < 2
101#define __CLOBBER_CC
102#define __AND_CLOBBER_CC
103#else /* __GNUC__ >= 2 */
104#define __CLOBBER_CC : "cc"
105#define __AND_CLOBBER_CC , "cc"
106#endif /* __GNUC__ < 2 */
107
108/***************************************
109 ************** A29K *****************
110 ***************************************/
111#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
112#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
113 __asm__ ("add %1,%4,%5\n" \
114 "addc %0,%2,%3" \
115 : "=r" ((USItype)(sh)), \
116 "=&r" ((USItype)(sl)) \
117 : "%r" ((USItype)(ah)), \
118 "rI" ((USItype)(bh)), \
119 "%r" ((USItype)(al)), \
120 "rI" ((USItype)(bl)))
121#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
122 __asm__ ("sub %1,%4,%5\n" \
123 "subc %0,%2,%3" \
124 : "=r" ((USItype)(sh)), \
125 "=&r" ((USItype)(sl)) \
126 : "r" ((USItype)(ah)), \
127 "rI" ((USItype)(bh)), \
128 "r" ((USItype)(al)), \
129 "rI" ((USItype)(bl)))
130#define umul_ppmm(xh, xl, m0, m1) \
131do { \
132 USItype __m0 = (m0), __m1 = (m1); \
133 __asm__ ("multiplu %0,%1,%2" \
134 : "=r" ((USItype)(xl)) \
135 : "r" (__m0), \
136 "r" (__m1)); \
137 __asm__ ("multmu %0,%1,%2" \
138 : "=r" ((USItype)(xh)) \
139 : "r" (__m0), \
140 "r" (__m1)); \
141} while (0)
142#define udiv_qrnnd(q, r, n1, n0, d) \
143 __asm__ ("dividu %0,%3,%4" \
144 : "=r" ((USItype)(q)), \
145 "=q" ((USItype)(r)) \
146 : "1" ((USItype)(n1)), \
147 "r" ((USItype)(n0)), \
148 "r" ((USItype)(d)))
149
150#define count_leading_zeros(count, x) \
151 __asm__ ("clz %0,%1" \
152 : "=r" ((USItype)(count)) \
153 : "r" ((USItype)(x)))
154#define COUNT_LEADING_ZEROS_0 32
155#endif /* __a29k__ */
156
157#if defined(__alpha) && W_TYPE_SIZE == 64
158#define umul_ppmm(ph, pl, m0, m1) \
159do { \
160 UDItype __m0 = (m0), __m1 = (m1); \
161 __asm__ ("umulh %r1,%2,%0" \
162 : "=r" ((UDItype) ph) \
163 : "%rJ" (__m0), \
164 "rI" (__m1)); \
165 (pl) = __m0 * __m1; \
166 } while (0)
167#define UMUL_TIME 46
168#ifndef LONGLONG_STANDALONE
169#define udiv_qrnnd(q, r, n1, n0, d) \
170do { UDItype __r; \
171 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
172 (r) = __r; \
173} while (0)
174extern UDItype __udiv_qrnnd();
175#define UDIV_TIME 220
176#endif /* LONGLONG_STANDALONE */
177#endif /* __alpha */
178
179/***************************************
180 ************** ARM ******************
181 ***************************************/
182#if defined(__arm__) && W_TYPE_SIZE == 32
183#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
184 __asm__ ("adds %1, %4, %5\n" \
185 "adc %0, %2, %3" \
186 : "=r" ((USItype)(sh)), \
187 "=&r" ((USItype)(sl)) \
188 : "%r" ((USItype)(ah)), \
189 "rI" ((USItype)(bh)), \
190 "%r" ((USItype)(al)), \
191 "rI" ((USItype)(bl)))
192#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
193 __asm__ ("subs %1, %4, %5\n" \
194 "sbc %0, %2, %3" \
195 : "=r" ((USItype)(sh)), \
196 "=&r" ((USItype)(sl)) \
197 : "r" ((USItype)(ah)), \
198 "rI" ((USItype)(bh)), \
199 "r" ((USItype)(al)), \
200 "rI" ((USItype)(bl)))
201#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
202#define umul_ppmm(xh, xl, a, b) \
203 __asm__ ("%@ Inlined umul_ppmm\n" \
204 "mov %|r0, %2, lsr #16 @ AAAA\n" \
205 "mov %|r2, %3, lsr #16 @ BBBB\n" \
206 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \
207 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \
208 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \
209 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \
210 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \
211 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \
212 "adds %|r0, %1, %0 @ central sum\n" \
213 "addcs %|r2, %|r2, #65536\n" \
214 "adds %1, %|r1, %|r0, lsl #16\n" \
215 "adc %0, %|r2, %|r0, lsr #16" \
216 : "=&r" ((USItype)(xh)), \
217 "=r" ((USItype)(xl)) \
218 : "r" ((USItype)(a)), \
219 "r" ((USItype)(b)) \
220 : "r0", "r1", "r2")
221#else
222#define umul_ppmm(xh, xl, a, b) \
223 __asm__ ("%@ Inlined umul_ppmm\n" \
224 "umull %r1, %r0, %r2, %r3" \
225 : "=&r" ((USItype)(xh)), \
226 "=r" ((USItype)(xl)) \
227 : "r" ((USItype)(a)), \
228 "r" ((USItype)(b)) \
229 : "r0", "r1")
230#endif
231#define UMUL_TIME 20
232#define UDIV_TIME 100
233#endif /* __arm__ */
234
235/***************************************
236 ************** CLIPPER **************
237 ***************************************/
238#if defined(__clipper__) && W_TYPE_SIZE == 32
239#define umul_ppmm(w1, w0, u, v) \
240 ({union {UDItype __ll; \
241 struct {USItype __l, __h; } __i; \
242 } __xx; \
243 __asm__ ("mulwux %2,%0" \
244 : "=r" (__xx.__ll) \
245 : "%0" ((USItype)(u)), \
246 "r" ((USItype)(v))); \
247 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
248#define smul_ppmm(w1, w0, u, v) \
249 ({union {DItype __ll; \
250 struct {SItype __l, __h; } __i; \
251 } __xx; \
252 __asm__ ("mulwx %2,%0" \
253 : "=r" (__xx.__ll) \
254 : "%0" ((SItype)(u)), \
255 "r" ((SItype)(v))); \
256 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
257#define __umulsidi3(u, v) \
258 ({UDItype __w; \
259 __asm__ ("mulwux %2,%0" \
260 : "=r" (__w) \
261 : "%0" ((USItype)(u)), \
262 "r" ((USItype)(v))); \
263 __w; })
264#endif /* __clipper__ */
265
266/***************************************
267 ************** GMICRO ***************
268 ***************************************/
269#if defined(__gmicro__) && W_TYPE_SIZE == 32
270#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
271 __asm__ ("add.w %5,%1\n" \
272 "addx %3,%0" \
273 : "=g" ((USItype)(sh)), \
274 "=&g" ((USItype)(sl)) \
275 : "%0" ((USItype)(ah)), \
276 "g" ((USItype)(bh)), \
277 "%1" ((USItype)(al)), \
278 "g" ((USItype)(bl)))
279#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
280 __asm__ ("sub.w %5,%1\n" \
281 "subx %3,%0" \
282 : "=g" ((USItype)(sh)), \
283 "=&g" ((USItype)(sl)) \
284 : "0" ((USItype)(ah)), \
285 "g" ((USItype)(bh)), \
286 "1" ((USItype)(al)), \
287 "g" ((USItype)(bl)))
288#define umul_ppmm(ph, pl, m0, m1) \
289 __asm__ ("mulx %3,%0,%1" \
290 : "=g" ((USItype)(ph)), \
291 "=r" ((USItype)(pl)) \
292 : "%0" ((USItype)(m0)), \
293 "g" ((USItype)(m1)))
294#define udiv_qrnnd(q, r, nh, nl, d) \
295 __asm__ ("divx %4,%0,%1" \
296 : "=g" ((USItype)(q)), \
297 "=r" ((USItype)(r)) \
298 : "1" ((USItype)(nh)), \
299 "0" ((USItype)(nl)), \
300 "g" ((USItype)(d)))
301#define count_leading_zeros(count, x) \
302 __asm__ ("bsch/1 %1,%0" \
303 : "=g" (count) \
304 : "g" ((USItype)(x)), \
305 "0" ((USItype)0))
306#endif
307
308/***************************************
309 ************** HPPA *****************
310 ***************************************/
311#if defined(__hppa) && W_TYPE_SIZE == 32
312#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
313 __asm__ ("add %4,%5,%1\n" \
314 "addc %2,%3,%0" \
315 : "=r" ((USItype)(sh)), \
316 "=&r" ((USItype)(sl)) \
317 : "%rM" ((USItype)(ah)), \
318 "rM" ((USItype)(bh)), \
319 "%rM" ((USItype)(al)), \
320 "rM" ((USItype)(bl)))
321#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
322 __asm__ ("sub %4,%5,%1\n" \
323 "subb %2,%3,%0" \
324 : "=r" ((USItype)(sh)), \
325 "=&r" ((USItype)(sl)) \
326 : "rM" ((USItype)(ah)), \
327 "rM" ((USItype)(bh)), \
328 "rM" ((USItype)(al)), \
329 "rM" ((USItype)(bl)))
330#if defined(_PA_RISC1_1)
331#define umul_ppmm(wh, wl, u, v) \
332do { \
333 union {UDItype __ll; \
334 struct {USItype __h, __l; } __i; \
335 } __xx; \
336 __asm__ ("xmpyu %1,%2,%0" \
337 : "=*f" (__xx.__ll) \
338 : "*f" ((USItype)(u)), \
339 "*f" ((USItype)(v))); \
340 (wh) = __xx.__i.__h; \
341 (wl) = __xx.__i.__l; \
342} while (0)
343#define UMUL_TIME 8
344#define UDIV_TIME 60
345#else
346#define UMUL_TIME 40
347#define UDIV_TIME 80
348#endif
349#ifndef LONGLONG_STANDALONE
350#define udiv_qrnnd(q, r, n1, n0, d) \
351do { USItype __r; \
352 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
353 (r) = __r; \
354} while (0)
355extern USItype __udiv_qrnnd();
356#endif /* LONGLONG_STANDALONE */
357#define count_leading_zeros(count, x) \
358do { \
359 USItype __tmp; \
360 __asm__ ( \
361 "ldi 1,%0\n" \
362 "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
363 "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \
364 "ldo 16(%0),%0 ; Yes. Perform add.\n" \
365 "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
366 "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \
367 "ldo 8(%0),%0 ; Yes. Perform add.\n" \
368 "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
369 "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \
370 "ldo 4(%0),%0 ; Yes. Perform add.\n" \
371 "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
372 "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \
373 "ldo 2(%0),%0 ; Yes. Perform add.\n" \
374 "extru %1,30,1,%1 ; Extract bit 1.\n" \
375 "sub %0,%1,%0 ; Subtract it. " \
376 : "=r" (count), "=r" (__tmp) : "1" (x)); \
377} while (0)
378#endif /* hppa */
379
380/***************************************
381 ************** I370 *****************
382 ***************************************/
383#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
384#define umul_ppmm(xh, xl, m0, m1) \
385do { \
386 union {UDItype __ll; \
387 struct {USItype __h, __l; } __i; \
388 } __xx; \
389 USItype __m0 = (m0), __m1 = (m1); \
390 __asm__ ("mr %0,%3" \
391 : "=r" (__xx.__i.__h), \
392 "=r" (__xx.__i.__l) \
393 : "%1" (__m0), \
394 "r" (__m1)); \
395 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
396 (xh) += ((((SItype) __m0 >> 31) & __m1) \
397 + (((SItype) __m1 >> 31) & __m0)); \
398} while (0)
399#define smul_ppmm(xh, xl, m0, m1) \
400do { \
401 union {DItype __ll; \
402 struct {USItype __h, __l; } __i; \
403 } __xx; \
404 __asm__ ("mr %0,%3" \
405 : "=r" (__xx.__i.__h), \
406 "=r" (__xx.__i.__l) \
407 : "%1" (m0), \
408 "r" (m1)); \
409 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
410} while (0)
411#define sdiv_qrnnd(q, r, n1, n0, d) \
412do { \
413 union {DItype __ll; \
414 struct {USItype __h, __l; } __i; \
415 } __xx; \
416 __xx.__i.__h = n1; __xx.__i.__l = n0; \
417 __asm__ ("dr %0,%2" \
418 : "=r" (__xx.__ll) \
419 : "0" (__xx.__ll), "r" (d)); \
420 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
421} while (0)
422#endif
423
424/***************************************
425 ************** I386 *****************
426 ***************************************/
427#undef __i386__
428#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
429#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
430 __asm__ ("addl %5,%1\n" \
431 "adcl %3,%0" \
432 : "=r" ((USItype)(sh)), \
433 "=&r" ((USItype)(sl)) \
434 : "%0" ((USItype)(ah)), \
435 "g" ((USItype)(bh)), \
436 "%1" ((USItype)(al)), \
437 "g" ((USItype)(bl)))
438#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
439 __asm__ ("subl %5,%1\n" \
440 "sbbl %3,%0" \
441 : "=r" ((USItype)(sh)), \
442 "=&r" ((USItype)(sl)) \
443 : "0" ((USItype)(ah)), \
444 "g" ((USItype)(bh)), \
445 "1" ((USItype)(al)), \
446 "g" ((USItype)(bl)))
447#define umul_ppmm(w1, w0, u, v) \
448 __asm__ ("mull %3" \
449 : "=a" ((USItype)(w0)), \
450 "=d" ((USItype)(w1)) \
451 : "%0" ((USItype)(u)), \
452 "rm" ((USItype)(v)))
453#define udiv_qrnnd(q, r, n1, n0, d) \
454 __asm__ ("divl %4" \
455 : "=a" ((USItype)(q)), \
456 "=d" ((USItype)(r)) \
457 : "0" ((USItype)(n0)), \
458 "1" ((USItype)(n1)), \
459 "rm" ((USItype)(d)))
460#define count_leading_zeros(count, x) \
461do { \
462 USItype __cbtmp; \
463 __asm__ ("bsrl %1,%0" \
464 : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
465 (count) = __cbtmp ^ 31; \
466} while (0)
467#define count_trailing_zeros(count, x) \
468 __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
469#ifndef UMUL_TIME
470#define UMUL_TIME 40
471#endif
472#ifndef UDIV_TIME
473#define UDIV_TIME 40
474#endif
475#endif /* 80x86 */
476
477/***************************************
478 ************** I860 *****************
479 ***************************************/
480#if defined(__i860__) && W_TYPE_SIZE == 32
481#define rshift_rhlc(r, h, l, c) \
482 __asm__ ("shr %3,r0,r0\n" \
483 "shrd %1,%2,%0" \
484 "=r" (r) : "r" (h), "r" (l), "rn" (c))
485#endif /* i860 */
486
487/***************************************
488 ************** I960 *****************
489 ***************************************/
490#if defined(__i960__) && W_TYPE_SIZE == 32
491#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
492 __asm__ ("cmpo 1,0\n" \
493 "addc %5,%4,%1\n" \
494 "addc %3,%2,%0" \
495 : "=r" ((USItype)(sh)), \
496 "=&r" ((USItype)(sl)) \
497 : "%dI" ((USItype)(ah)), \
498 "dI" ((USItype)(bh)), \
499 "%dI" ((USItype)(al)), \
500 "dI" ((USItype)(bl)))
501#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
502 __asm__ ("cmpo 0,0\n" \
503 "subc %5,%4,%1\n" \
504 "subc %3,%2,%0" \
505 : "=r" ((USItype)(sh)), \
506 "=&r" ((USItype)(sl)) \
507 : "dI" ((USItype)(ah)), \
508 "dI" ((USItype)(bh)), \
509 "dI" ((USItype)(al)), \
510 "dI" ((USItype)(bl)))
511#define umul_ppmm(w1, w0, u, v) \
512 ({union {UDItype __ll; \
513 struct {USItype __l, __h; } __i; \
514 } __xx; \
515 __asm__ ("emul %2,%1,%0" \
516 : "=d" (__xx.__ll) \
517 : "%dI" ((USItype)(u)), \
518 "dI" ((USItype)(v))); \
519 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
520#define __umulsidi3(u, v) \
521 ({UDItype __w; \
522 __asm__ ("emul %2,%1,%0" \
523 : "=d" (__w) \
524 : "%dI" ((USItype)(u)), \
525 "dI" ((USItype)(v))); \
526 __w; })
527#define udiv_qrnnd(q, r, nh, nl, d) \
528do { \
529 union {UDItype __ll; \
530 struct {USItype __l, __h; } __i; \
531 } __nn; \
532 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
533 __asm__ ("ediv %d,%n,%0" \
534 : "=d" (__rq.__ll) \
535 : "dI" (__nn.__ll), \
536 "dI" ((USItype)(d))); \
537 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
538} while (0)
539#define count_leading_zeros(count, x) \
540do { \
541 USItype __cbtmp; \
542 __asm__ ("scanbit %1,%0" \
543 : "=r" (__cbtmp) \
544 : "r" ((USItype)(x))); \
545 (count) = __cbtmp ^ 31; \
546} while (0)
547#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
548#if defined(__i960mx) /* what is the proper symbol to test??? */
549#define rshift_rhlc(r, h, l, c) \
550do { \
551 union {UDItype __ll; \
552 struct {USItype __l, __h; } __i; \
553 } __nn; \
554 __nn.__i.__h = (h); __nn.__i.__l = (l); \
555 __asm__ ("shre %2,%1,%0" \
556 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
557}
558#endif /* i960mx */
559#endif /* i960 */
560
561/***************************************
562 ************** 68000 ****************
563 ***************************************/
564#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
565#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
566 __asm__ ("add%.l %5,%1\n" \
567 "addx%.l %3,%0" \
568 : "=d" ((USItype)(sh)), \
569 "=&d" ((USItype)(sl)) \
570 : "%0" ((USItype)(ah)), \
571 "d" ((USItype)(bh)), \
572 "%1" ((USItype)(al)), \
573 "g" ((USItype)(bl)))
574#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
575 __asm__ ("sub%.l %5,%1\n" \
576 "subx%.l %3,%0" \
577 : "=d" ((USItype)(sh)), \
578 "=&d" ((USItype)(sl)) \
579 : "0" ((USItype)(ah)), \
580 "d" ((USItype)(bh)), \
581 "1" ((USItype)(al)), \
582 "g" ((USItype)(bl)))
583#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
584#define umul_ppmm(w1, w0, u, v) \
585 __asm__ ("mulu%.l %3,%1:%0" \
586 : "=d" ((USItype)(w0)), \
587 "=d" ((USItype)(w1)) \
588 : "%0" ((USItype)(u)), \
589 "dmi" ((USItype)(v)))
590#define UMUL_TIME 45
591#define udiv_qrnnd(q, r, n1, n0, d) \
592 __asm__ ("divu%.l %4,%1:%0" \
593 : "=d" ((USItype)(q)), \
594 "=d" ((USItype)(r)) \
595 : "0" ((USItype)(n0)), \
596 "1" ((USItype)(n1)), \
597 "dmi" ((USItype)(d)))
598#define UDIV_TIME 90
599#define sdiv_qrnnd(q, r, n1, n0, d) \
600 __asm__ ("divs%.l %4,%1:%0" \
601 : "=d" ((USItype)(q)), \
602 "=d" ((USItype)(r)) \
603 : "0" ((USItype)(n0)), \
604 "1" ((USItype)(n1)), \
605 "dmi" ((USItype)(d)))
606#define count_leading_zeros(count, x) \
607 __asm__ ("bfffo %1{%b2:%b2},%0" \
608 : "=d" ((USItype)(count)) \
609 : "od" ((USItype)(x)), "n" (0))
610#define COUNT_LEADING_ZEROS_0 32
611#else /* not mc68020 */
612#define umul_ppmm(xh, xl, a, b) \
613do { USItype __umul_tmp1, __umul_tmp2; \
614 __asm__ ("| Inlined umul_ppmm\n" \
615 "move%.l %5,%3\n" \
616 "move%.l %2,%0\n" \
617 "move%.w %3,%1\n" \
618 "swap %3\n" \
619 "swap %0\n" \
620 "mulu %2,%1\n" \
621 "mulu %3,%0\n" \
622 "mulu %2,%3\n" \
623 "swap %2\n" \
624 "mulu %5,%2\n" \
625 "add%.l %3,%2\n" \
626 "jcc 1f\n" \
627 "add%.l %#0x10000,%0\n" \
628 "1: move%.l %2,%3\n" \
629 "clr%.w %2\n" \
630 "swap %2\n" \
631 "swap %3\n" \
632 "clr%.w %3\n" \
633 "add%.l %3,%1\n" \
634 "addx%.l %2,%0\n" \
635 "| End inlined umul_ppmm" \
636 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
637 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
638 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
639} while (0)
640#define UMUL_TIME 100
641#define UDIV_TIME 400
642#endif /* not mc68020 */
643#endif /* mc68000 */
644
645/***************************************
646 ************** 88000 ****************
647 ***************************************/
648#if defined(__m88000__) && W_TYPE_SIZE == 32
649#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
650 __asm__ ("addu.co %1,%r4,%r5\n" \
651 "addu.ci %0,%r2,%r3" \
652 : "=r" ((USItype)(sh)), \
653 "=&r" ((USItype)(sl)) \
654 : "%rJ" ((USItype)(ah)), \
655 "rJ" ((USItype)(bh)), \
656 "%rJ" ((USItype)(al)), \
657 "rJ" ((USItype)(bl)))
658#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
659 __asm__ ("subu.co %1,%r4,%r5\n" \
660 "subu.ci %0,%r2,%r3" \
661 : "=r" ((USItype)(sh)), \
662 "=&r" ((USItype)(sl)) \
663 : "rJ" ((USItype)(ah)), \
664 "rJ" ((USItype)(bh)), \
665 "rJ" ((USItype)(al)), \
666 "rJ" ((USItype)(bl)))
667#define count_leading_zeros(count, x) \
668do { \
669 USItype __cbtmp; \
670 __asm__ ("ff1 %0,%1" \
671 : "=r" (__cbtmp) \
672 : "r" ((USItype)(x))); \
673 (count) = __cbtmp ^ 31; \
674} while (0)
675#define COUNT_LEADING_ZEROS_0 63 /* sic */
676#if defined(__m88110__)
677#define umul_ppmm(wh, wl, u, v) \
678do { \
679 union {UDItype __ll; \
680 struct {USItype __h, __l; } __i; \
681 } __x; \
682 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
683 (wh) = __x.__i.__h; \
684 (wl) = __x.__i.__l; \
685} while (0)
686#define udiv_qrnnd(q, r, n1, n0, d) \
687 ({union {UDItype __ll; \
688 struct {USItype __h, __l; } __i; \
689 } __x, __q; \
690 __x.__i.__h = (n1); __x.__i.__l = (n0); \
691 __asm__ ("divu.d %0,%1,%2" \
692 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
693 (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
694#define UMUL_TIME 5
695#define UDIV_TIME 25
696#else
697#define UMUL_TIME 17
698#define UDIV_TIME 150
699#endif /* __m88110__ */
700#endif /* __m88000__ */
701
702/***************************************
703 ************** MIPS *****************
704 ***************************************/
705#if defined(__mips__) && W_TYPE_SIZE == 32
706#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
707#define umul_ppmm(w1, w0, u, v) \
708do { \
709 UDItype __ll = (UDItype)(u) * (v); \
710 w1 = __ll >> 32; \
711 w0 = __ll; \
712} while (0)
713#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
714#define umul_ppmm(w1, w0, u, v) \
715 __asm__ ("multu %2,%3" \
716 : "=l" ((USItype)(w0)), \
717 "=h" ((USItype)(w1)) \
718 : "d" ((USItype)(u)), \
719 "d" ((USItype)(v)))
720#else
721#define umul_ppmm(w1, w0, u, v) \
722 __asm__ ("multu %2,%3\n" \
723 "mflo %0\n" \
724 "mfhi %1" \
725 : "=d" ((USItype)(w0)), \
726 "=d" ((USItype)(w1)) \
727 : "d" ((USItype)(u)), \
728 "d" ((USItype)(v)))
729#endif
730#define UMUL_TIME 10
731#define UDIV_TIME 100
732#endif /* __mips__ */
733
734/***************************************
735 ************** MIPS/64 **************
736 ***************************************/
737#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
738#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
739#define umul_ppmm(w1, w0, u, v) \
740do { \
741 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
742 __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \
743 w1 = __ll >> 64; \
744 w0 = __ll; \
745} while (0)
746#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
747#define umul_ppmm(w1, w0, u, v) \
748 __asm__ ("dmultu %2,%3" \
749 : "=l" ((UDItype)(w0)), \
750 "=h" ((UDItype)(w1)) \
751 : "d" ((UDItype)(u)), \
752 "d" ((UDItype)(v)))
753#else
754#define umul_ppmm(w1, w0, u, v) \
755 __asm__ ("dmultu %2,%3\n" \
756 "mflo %0\n" \
757 "mfhi %1" \
758 : "=d" ((UDItype)(w0)), \
759 "=d" ((UDItype)(w1)) \
760 : "d" ((UDItype)(u)), \
761 "d" ((UDItype)(v)))
762#endif
763#define UMUL_TIME 20
764#define UDIV_TIME 140
765#endif /* __mips__ */
766
767/***************************************
768 ************** 32000 ****************
769 ***************************************/
770#if defined(__ns32000__) && W_TYPE_SIZE == 32
771#define umul_ppmm(w1, w0, u, v) \
772 ({union {UDItype __ll; \
773 struct {USItype __l, __h; } __i; \
774 } __xx; \
775 __asm__ ("meid %2,%0" \
776 : "=g" (__xx.__ll) \
777 : "%0" ((USItype)(u)), \
778 "g" ((USItype)(v))); \
779 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
780#define __umulsidi3(u, v) \
781 ({UDItype __w; \
782 __asm__ ("meid %2,%0" \
783 : "=g" (__w) \
784 : "%0" ((USItype)(u)), \
785 "g" ((USItype)(v))); \
786 __w; })
787#define udiv_qrnnd(q, r, n1, n0, d) \
788 ({union {UDItype __ll; \
789 struct {USItype __l, __h; } __i; \
790 } __xx; \
791 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
792 __asm__ ("deid %2,%0" \
793 : "=g" (__xx.__ll) \
794 : "0" (__xx.__ll), \
795 "g" ((USItype)(d))); \
796 (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
797#define count_trailing_zeros(count, x) \
798do { \
799 __asm__("ffsd %2,%0" \
800 : "=r"((USItype) (count)) \
801 : "0"((USItype) 0), "r"((USItype) (x))); \
802 } while (0)
803#endif /* __ns32000__ */
804
805/***************************************
806 ************** PPC ******************
807 ***************************************/
808#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
809#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
810do { \
811 if (__builtin_constant_p(bh) && (bh) == 0) \
812 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
813 : "=r" ((USItype)(sh)), \
814 "=&r" ((USItype)(sl)) \
815 : "%r" ((USItype)(ah)), \
816 "%r" ((USItype)(al)), \
817 "rI" ((USItype)(bl))); \
818 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
819 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
820 : "=r" ((USItype)(sh)), \
821 "=&r" ((USItype)(sl)) \
822 : "%r" ((USItype)(ah)), \
823 "%r" ((USItype)(al)), \
824 "rI" ((USItype)(bl))); \
825 else \
826 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
827 : "=r" ((USItype)(sh)), \
828 "=&r" ((USItype)(sl)) \
829 : "%r" ((USItype)(ah)), \
830 "r" ((USItype)(bh)), \
831 "%r" ((USItype)(al)), \
832 "rI" ((USItype)(bl))); \
833} while (0)
834#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
835do { \
836 if (__builtin_constant_p(ah) && (ah) == 0) \
837 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
838 : "=r" ((USItype)(sh)), \
839 "=&r" ((USItype)(sl)) \
840 : "r" ((USItype)(bh)), \
841 "rI" ((USItype)(al)), \
842 "r" ((USItype)(bl))); \
843 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
844 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
845 : "=r" ((USItype)(sh)), \
846 "=&r" ((USItype)(sl)) \
847 : "r" ((USItype)(bh)), \
848 "rI" ((USItype)(al)), \
849 "r" ((USItype)(bl))); \
850 else if (__builtin_constant_p(bh) && (bh) == 0) \
851 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
852 : "=r" ((USItype)(sh)), \
853 "=&r" ((USItype)(sl)) \
854 : "r" ((USItype)(ah)), \
855 "rI" ((USItype)(al)), \
856 "r" ((USItype)(bl))); \
857 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
858 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
859 : "=r" ((USItype)(sh)), \
860 "=&r" ((USItype)(sl)) \
861 : "r" ((USItype)(ah)), \
862 "rI" ((USItype)(al)), \
863 "r" ((USItype)(bl))); \
864 else \
865 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
866 : "=r" ((USItype)(sh)), \
867 "=&r" ((USItype)(sl)) \
868 : "r" ((USItype)(ah)), \
869 "r" ((USItype)(bh)), \
870 "rI" ((USItype)(al)), \
871 "r" ((USItype)(bl))); \
872} while (0)
873#define count_leading_zeros(count, x) \
874 __asm__ ("{cntlz|cntlzw} %0,%1" \
875 : "=r" ((USItype)(count)) \
876 : "r" ((USItype)(x)))
877#define COUNT_LEADING_ZEROS_0 32
878#if defined(_ARCH_PPC)
879#define umul_ppmm(ph, pl, m0, m1) \
880do { \
881 USItype __m0 = (m0), __m1 = (m1); \
882 __asm__ ("mulhwu %0,%1,%2" \
883 : "=r" ((USItype) ph) \
884 : "%r" (__m0), \
885 "r" (__m1)); \
886 (pl) = __m0 * __m1; \
887} while (0)
888#define UMUL_TIME 15
889#define smul_ppmm(ph, pl, m0, m1) \
890do { \
891 SItype __m0 = (m0), __m1 = (m1); \
892 __asm__ ("mulhw %0,%1,%2" \
893 : "=r" ((SItype) ph) \
894 : "%r" (__m0), \
895 "r" (__m1)); \
896 (pl) = __m0 * __m1; \
897} while (0)
898#define SMUL_TIME 14
899#define UDIV_TIME 120
900#else
901#define umul_ppmm(xh, xl, m0, m1) \
902do { \
903 USItype __m0 = (m0), __m1 = (m1); \
904 __asm__ ("mul %0,%2,%3" \
905 : "=r" ((USItype)(xh)), \
906 "=q" ((USItype)(xl)) \
907 : "r" (__m0), \
908 "r" (__m1)); \
909 (xh) += ((((SItype) __m0 >> 31) & __m1) \
910 + (((SItype) __m1 >> 31) & __m0)); \
911} while (0)
912#define UMUL_TIME 8
913#define smul_ppmm(xh, xl, m0, m1) \
914 __asm__ ("mul %0,%2,%3" \
915 : "=r" ((SItype)(xh)), \
916 "=q" ((SItype)(xl)) \
917 : "r" (m0), \
918 "r" (m1))
919#define SMUL_TIME 4
920#define sdiv_qrnnd(q, r, nh, nl, d) \
921 __asm__ ("div %0,%2,%4" \
922 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
923 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
924#define UDIV_TIME 100
925#endif
926#endif /* Power architecture variants. */
927
928/***************************************
929 ************** PYR ******************
930 ***************************************/
931#if defined(__pyr__) && W_TYPE_SIZE == 32
932#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
933 __asm__ ("addw %5,%1\n" \
934 "addwc %3,%0" \
935 : "=r" ((USItype)(sh)), \
936 "=&r" ((USItype)(sl)) \
937 : "%0" ((USItype)(ah)), \
938 "g" ((USItype)(bh)), \
939 "%1" ((USItype)(al)), \
940 "g" ((USItype)(bl)))
941#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
942 __asm__ ("subw %5,%1\n" \
943 "subwb %3,%0" \
944 : "=r" ((USItype)(sh)), \
945 "=&r" ((USItype)(sl)) \
946 : "0" ((USItype)(ah)), \
947 "g" ((USItype)(bh)), \
948 "1" ((USItype)(al)), \
949 "g" ((USItype)(bl)))
950 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
951#define umul_ppmm(w1, w0, u, v) \
952 ({union {UDItype __ll; \
953 struct {USItype __h, __l; } __i; \
954 } __xx; \
955 __asm__ ("movw %1,%R0\n" \
956 "uemul %2,%0" \
957 : "=&r" (__xx.__ll) \
958 : "g" ((USItype) (u)), \
959 "g" ((USItype)(v))); \
960 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
961#endif /* __pyr__ */
962
963/***************************************
964 ************** RT/ROMP **************
965 ***************************************/
966#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
967#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
968 __asm__ ("a %1,%5\n" \
969 "ae %0,%3" \
970 : "=r" ((USItype)(sh)), \
971 "=&r" ((USItype)(sl)) \
972 : "%0" ((USItype)(ah)), \
973 "r" ((USItype)(bh)), \
974 "%1" ((USItype)(al)), \
975 "r" ((USItype)(bl)))
976#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
977 __asm__ ("s %1,%5\n" \
978 "se %0,%3" \
979 : "=r" ((USItype)(sh)), \
980 "=&r" ((USItype)(sl)) \
981 : "0" ((USItype)(ah)), \
982 "r" ((USItype)(bh)), \
983 "1" ((USItype)(al)), \
984 "r" ((USItype)(bl)))
985#define umul_ppmm(ph, pl, m0, m1) \
986do { \
987 USItype __m0 = (m0), __m1 = (m1); \
988 __asm__ ( \
989 "s r2,r2\n" \
990 "mts r10,%2\n" \
991 "m r2,%3\n" \
992 "m r2,%3\n" \
993 "m r2,%3\n" \
994 "m r2,%3\n" \
995 "m r2,%3\n" \
996 "m r2,%3\n" \
997 "m r2,%3\n" \
998 "m r2,%3\n" \
999 "m r2,%3\n" \
1000 "m r2,%3\n" \
1001 "m r2,%3\n" \
1002 "m r2,%3\n" \
1003 "m r2,%3\n" \
1004 "m r2,%3\n" \
1005 "m r2,%3\n" \
1006 "m r2,%3\n" \
1007 "cas %0,r2,r0\n" \
1008 "mfs r10,%1" \
1009 : "=r" ((USItype)(ph)), \
1010 "=r" ((USItype)(pl)) \
1011 : "%r" (__m0), \
1012 "r" (__m1) \
1013 : "r2"); \
1014 (ph) += ((((SItype) __m0 >> 31) & __m1) \
1015 + (((SItype) __m1 >> 31) & __m0)); \
1016} while (0)
1017#define UMUL_TIME 20
1018#define UDIV_TIME 200
1019#define count_leading_zeros(count, x) \
1020do { \
1021 if ((x) >= 0x10000) \
1022 __asm__ ("clz %0,%1" \
1023 : "=r" ((USItype)(count)) \
1024 : "r" ((USItype)(x) >> 16)); \
1025 else { \
1026 __asm__ ("clz %0,%1" \
1027 : "=r" ((USItype)(count)) \
1028 : "r" ((USItype)(x))); \
1029 (count) += 16; \
1030 } \
1031} while (0)
1032#endif /* RT/ROMP */
1033
1034/***************************************
1035 ************** SH2 ******************
1036 ***************************************/
1037#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
1038 && W_TYPE_SIZE == 32
1039#define umul_ppmm(w1, w0, u, v) \
1040 __asm__ ( \
1041 "dmulu.l %2,%3\n" \
1042 "sts macl,%1\n" \
1043 "sts mach,%0" \
1044 : "=r" ((USItype)(w1)), \
1045 "=r" ((USItype)(w0)) \
1046 : "r" ((USItype)(u)), \
1047 "r" ((USItype)(v)) \
1048 : "macl", "mach")
1049#define UMUL_TIME 5
1050#endif
1051
1052/***************************************
1053 ************** SPARC ****************
1054 ***************************************/
1055#if defined(__sparc__) && W_TYPE_SIZE == 32
1056#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1057 __asm__ ("addcc %r4,%5,%1\n" \
1058 "addx %r2,%3,%0" \
1059 : "=r" ((USItype)(sh)), \
1060 "=&r" ((USItype)(sl)) \
1061 : "%rJ" ((USItype)(ah)), \
1062 "rI" ((USItype)(bh)), \
1063 "%rJ" ((USItype)(al)), \
1064 "rI" ((USItype)(bl)) \
1065 __CLOBBER_CC)
1066#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1067 __asm__ ("subcc %r4,%5,%1\n" \
1068 "subx %r2,%3,%0" \
1069 : "=r" ((USItype)(sh)), \
1070 "=&r" ((USItype)(sl)) \
1071 : "rJ" ((USItype)(ah)), \
1072 "rI" ((USItype)(bh)), \
1073 "rJ" ((USItype)(al)), \
1074 "rI" ((USItype)(bl)) \
1075 __CLOBBER_CC)
1076#if defined(__sparc_v8__)
1077/* Don't match immediate range because, 1) it is not often useful,
1078 2) the 'I' flag thinks of the range as a 13 bit signed interval,
1079 while we want to match a 13 bit interval, sign extended to 32 bits,
1080 but INTERPRETED AS UNSIGNED. */
1081#define umul_ppmm(w1, w0, u, v) \
1082 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1083 : "=r" ((USItype)(w1)), \
1084 "=r" ((USItype)(w0)) \
1085 : "r" ((USItype)(u)), \
1086 "r" ((USItype)(v)))
1087#define UMUL_TIME 5
1088#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
1089#define udiv_qrnnd(q, r, n1, n0, d) \
1090do { \
1091 USItype __q; \
1092 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1093 : "=r" ((USItype)(__q)) \
1094 : "r" ((USItype)(n1)), \
1095 "r" ((USItype)(n0)), \
1096 "r" ((USItype)(d))); \
1097 (r) = (n0) - __q * (d); \
1098 (q) = __q; \
1099} while (0)
1100#define UDIV_TIME 25
1101#endif /* SUPERSPARC */
1102#else /* ! __sparc_v8__ */
1103#if defined(__sparclite__)
1104/* This has hardware multiply but not divide. It also has two additional
1105 instructions scan (ffs from high bit) and divscc. */
1106#define umul_ppmm(w1, w0, u, v) \
1107 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1108 : "=r" ((USItype)(w1)), \
1109 "=r" ((USItype)(w0)) \
1110 : "r" ((USItype)(u)), \
1111 "r" ((USItype)(v)))
1112#define UMUL_TIME 5
1113#define udiv_qrnnd(q, r, n1, n0, d) \
1114 __asm__ ("! Inlined udiv_qrnnd\n" \
1115 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1116 "tst %%g0\n" \
1117 "divscc %3,%4,%%g1\n" \
1118 "divscc %%g1,%4,%%g1\n" \
1119 "divscc %%g1,%4,%%g1\n" \
1120 "divscc %%g1,%4,%%g1\n" \
1121 "divscc %%g1,%4,%%g1\n" \
1122 "divscc %%g1,%4,%%g1\n" \
1123 "divscc %%g1,%4,%%g1\n" \
1124 "divscc %%g1,%4,%%g1\n" \
1125 "divscc %%g1,%4,%%g1\n" \
1126 "divscc %%g1,%4,%%g1\n" \
1127 "divscc %%g1,%4,%%g1\n" \
1128 "divscc %%g1,%4,%%g1\n" \
1129 "divscc %%g1,%4,%%g1\n" \
1130 "divscc %%g1,%4,%%g1\n" \
1131 "divscc %%g1,%4,%%g1\n" \
1132 "divscc %%g1,%4,%%g1\n" \
1133 "divscc %%g1,%4,%%g1\n" \
1134 "divscc %%g1,%4,%%g1\n" \
1135 "divscc %%g1,%4,%%g1\n" \
1136 "divscc %%g1,%4,%%g1\n" \
1137 "divscc %%g1,%4,%%g1\n" \
1138 "divscc %%g1,%4,%%g1\n" \
1139 "divscc %%g1,%4,%%g1\n" \
1140 "divscc %%g1,%4,%%g1\n" \
1141 "divscc %%g1,%4,%%g1\n" \
1142 "divscc %%g1,%4,%%g1\n" \
1143 "divscc %%g1,%4,%%g1\n" \
1144 "divscc %%g1,%4,%%g1\n" \
1145 "divscc %%g1,%4,%%g1\n" \
1146 "divscc %%g1,%4,%%g1\n" \
1147 "divscc %%g1,%4,%%g1\n" \
1148 "divscc %%g1,%4,%0\n" \
1149 "rd %%y,%1\n" \
1150 "bl,a 1f\n" \
1151 "add %1,%4,%1\n" \
1152 "1: ! End of inline udiv_qrnnd" \
1153 : "=r" ((USItype)(q)), \
1154 "=r" ((USItype)(r)) \
1155 : "r" ((USItype)(n1)), \
1156 "r" ((USItype)(n0)), \
1157 "rI" ((USItype)(d)) \
1158 : "%g1" __AND_CLOBBER_CC)
1159#define UDIV_TIME 37
1160#define count_leading_zeros(count, x) \
1161 __asm__ ("scan %1,0,%0" \
1162 : "=r" ((USItype)(x)) \
1163 : "r" ((USItype)(count)))
1164/* Early sparclites return 63 for an argument of 0, but they warn that future
1165 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1166 undefined. */
1167#endif /* __sparclite__ */
1168#endif /* __sparc_v8__ */
1169 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
1170#ifndef umul_ppmm
1171#define umul_ppmm(w1, w0, u, v) \
1172 __asm__ ("! Inlined umul_ppmm\n" \
1173 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
1174 "sra %3,31,%%g2 ! Don't move this insn\n" \
1175 "and %2,%%g2,%%g2 ! Don't move this insn\n" \
1176 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1177 "mulscc %%g1,%3,%%g1\n" \
1178 "mulscc %%g1,%3,%%g1\n" \
1179 "mulscc %%g1,%3,%%g1\n" \
1180 "mulscc %%g1,%3,%%g1\n" \
1181 "mulscc %%g1,%3,%%g1\n" \
1182 "mulscc %%g1,%3,%%g1\n" \
1183 "mulscc %%g1,%3,%%g1\n" \
1184 "mulscc %%g1,%3,%%g1\n" \
1185 "mulscc %%g1,%3,%%g1\n" \
1186 "mulscc %%g1,%3,%%g1\n" \
1187 "mulscc %%g1,%3,%%g1\n" \
1188 "mulscc %%g1,%3,%%g1\n" \
1189 "mulscc %%g1,%3,%%g1\n" \
1190 "mulscc %%g1,%3,%%g1\n" \
1191 "mulscc %%g1,%3,%%g1\n" \
1192 "mulscc %%g1,%3,%%g1\n" \
1193 "mulscc %%g1,%3,%%g1\n" \
1194 "mulscc %%g1,%3,%%g1\n" \
1195 "mulscc %%g1,%3,%%g1\n" \
1196 "mulscc %%g1,%3,%%g1\n" \
1197 "mulscc %%g1,%3,%%g1\n" \
1198 "mulscc %%g1,%3,%%g1\n" \
1199 "mulscc %%g1,%3,%%g1\n" \
1200 "mulscc %%g1,%3,%%g1\n" \
1201 "mulscc %%g1,%3,%%g1\n" \
1202 "mulscc %%g1,%3,%%g1\n" \
1203 "mulscc %%g1,%3,%%g1\n" \
1204 "mulscc %%g1,%3,%%g1\n" \
1205 "mulscc %%g1,%3,%%g1\n" \
1206 "mulscc %%g1,%3,%%g1\n" \
1207 "mulscc %%g1,%3,%%g1\n" \
1208 "mulscc %%g1,%3,%%g1\n" \
1209 "mulscc %%g1,0,%%g1\n" \
1210 "add %%g1,%%g2,%0\n" \
1211 "rd %%y,%1" \
1212 : "=r" ((USItype)(w1)), \
1213 "=r" ((USItype)(w0)) \
1214 : "%rI" ((USItype)(u)), \
1215 "r" ((USItype)(v)) \
1216 : "%g1", "%g2" __AND_CLOBBER_CC)
1217#define UMUL_TIME 39 /* 39 instructions */
1218/* It's quite necessary to add this much assembler for the sparc.
1219 The default udiv_qrnnd (in C) is more than 10 times slower! */
1220#define udiv_qrnnd(q, r, n1, n0, d) \
1221 __asm__ ("! Inlined udiv_qrnnd\n\t" \
1222 "mov 32,%%g1\n\t" \
1223 "subcc %1,%2,%%g0\n\t" \
1224 "1: bcs 5f\n\t" \
1225 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1226 "sub %1,%2,%1 ! this kills msb of n\n\t" \
1227 "addx %1,%1,%1 ! so this can't give carry\n\t" \
1228 "subcc %%g1,1,%%g1\n\t" \
1229 "2: bne 1b\n\t" \
1230 "subcc %1,%2,%%g0\n\t" \
1231 "bcs 3f\n\t" \
1232 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1233 "b 3f\n\t" \
1234 "sub %1,%2,%1 ! this kills msb of n\n\t" \
1235 "4: sub %1,%2,%1\n\t" \
1236 "5: addxcc %1,%1,%1\n\t" \
1237 "bcc 2b\n\t" \
1238 "subcc %%g1,1,%%g1\n\t" \
1239 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \
1240 "bne 4b\n\t" \
1241 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
1242 "sub %1,%2,%1\n\t" \
1243 "3: xnor %0,0,%0\n\t" \
1244 "! End of inline udiv_qrnnd\n" \
1245 : "=&r" ((USItype)(q)), \
1246 "=&r" ((USItype)(r)) \
1247 : "r" ((USItype)(d)), \
1248 "1" ((USItype)(n1)), \
1249 "0" ((USItype)(n0)) : "%g1", "cc")
1250#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1251#endif
1252#endif /* __sparc__ */
1253
1254/***************************************
1255 ************** VAX ******************
1256 ***************************************/
1257#if defined(__vax__) && W_TYPE_SIZE == 32
1258#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1259 __asm__ ("addl2 %5,%1\n" \
1260 "adwc %3,%0" \
1261 : "=g" ((USItype)(sh)), \
1262 "=&g" ((USItype)(sl)) \
1263 : "%0" ((USItype)(ah)), \
1264 "g" ((USItype)(bh)), \
1265 "%1" ((USItype)(al)), \
1266 "g" ((USItype)(bl)))
1267#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1268 __asm__ ("subl2 %5,%1\n" \
1269 "sbwc %3,%0" \
1270 : "=g" ((USItype)(sh)), \
1271 "=&g" ((USItype)(sl)) \
1272 : "0" ((USItype)(ah)), \
1273 "g" ((USItype)(bh)), \
1274 "1" ((USItype)(al)), \
1275 "g" ((USItype)(bl)))
1276#define umul_ppmm(xh, xl, m0, m1) \
1277do { \
1278 union {UDItype __ll; \
1279 struct {USItype __l, __h; } __i; \
1280 } __xx; \
1281 USItype __m0 = (m0), __m1 = (m1); \
1282 __asm__ ("emul %1,%2,$0,%0" \
1283 : "=g" (__xx.__ll) \
1284 : "g" (__m0), \
1285 "g" (__m1)); \
1286 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1287 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1288 + (((SItype) __m1 >> 31) & __m0)); \
1289} while (0)
1290#define sdiv_qrnnd(q, r, n1, n0, d) \
1291do { \
1292 union {DItype __ll; \
1293 struct {SItype __l, __h; } __i; \
1294 } __xx; \
1295 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1296 __asm__ ("ediv %3,%2,%0,%1" \
1297 : "=g" (q), "=g" (r) \
1298 : "g" (__xx.__ll), "g" (d)); \
1299} while (0)
1300#endif /* __vax__ */
1301
1302/***************************************
1303 ************** Z8000 ****************
1304 ***************************************/
1305#if defined(__z8000__) && W_TYPE_SIZE == 16
1306#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1307 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1308 : "=r" ((unsigned int)(sh)), \
1309 "=&r" ((unsigned int)(sl)) \
1310 : "%0" ((unsigned int)(ah)), \
1311 "r" ((unsigned int)(bh)), \
1312 "%1" ((unsigned int)(al)), \
1313 "rQR" ((unsigned int)(bl)))
1314#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1315 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1316 : "=r" ((unsigned int)(sh)), \
1317 "=&r" ((unsigned int)(sl)) \
1318 : "0" ((unsigned int)(ah)), \
1319 "r" ((unsigned int)(bh)), \
1320 "1" ((unsigned int)(al)), \
1321 "rQR" ((unsigned int)(bl)))
1322#define umul_ppmm(xh, xl, m0, m1) \
1323do { \
1324 union {long int __ll; \
1325 struct {unsigned int __h, __l; } __i; \
1326 } __xx; \
1327 unsigned int __m0 = (m0), __m1 = (m1); \
1328 __asm__ ("mult %S0,%H3" \
1329 : "=r" (__xx.__i.__h), \
1330 "=r" (__xx.__i.__l) \
1331 : "%1" (__m0), \
1332 "rQR" (__m1)); \
1333 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1334 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1335 + (((signed int) __m1 >> 15) & __m0)); \
1336} while (0)
1337#endif /* __z8000__ */
1338
1339#endif /* __GNUC__ */
1340
1341/***************************************
1342 *********** Generic Versions ********
1343 ***************************************/
1344#if !defined(umul_ppmm) && defined(__umulsidi3)
1345#define umul_ppmm(ph, pl, m0, m1) \
1346{ \
1347 UDWtype __ll = __umulsidi3(m0, m1); \
1348 ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1349 pl = (UWtype) __ll; \
1350}
1351#endif
1352
1353#if !defined(__umulsidi3)
1354#define __umulsidi3(u, v) \
1355 ({UWtype __hi, __lo; \
1356 umul_ppmm(__hi, __lo, u, v); \
1357 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1358#endif
1359
1360 /* If this machine has no inline assembler, use C macros. */
1361
1362#if !defined(add_ssaaaa)
1363#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1364do { \
1365 UWtype __x; \
1366 __x = (al) + (bl); \
1367 (sh) = (ah) + (bh) + (__x < (al)); \
1368 (sl) = __x; \
1369} while (0)
1370#endif
1371
1372#if !defined(sub_ddmmss)
1373#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1374do { \
1375 UWtype __x; \
1376 __x = (al) - (bl); \
1377 (sh) = (ah) - (bh) - (__x > (al)); \
1378 (sl) = __x; \
1379} while (0)
1380#endif
1381
1382#if !defined(umul_ppmm)
1383#define umul_ppmm(w1, w0, u, v) \
1384do { \
1385 UWtype __x0, __x1, __x2, __x3; \
1386 UHWtype __ul, __vl, __uh, __vh; \
1387 UWtype __u = (u), __v = (v); \
1388 \
1389 __ul = __ll_lowpart(__u); \
1390 __uh = __ll_highpart(__u); \
1391 __vl = __ll_lowpart(__v); \
1392 __vh = __ll_highpart(__v); \
1393 \
1394 __x0 = (UWtype) __ul * __vl; \
1395 __x1 = (UWtype) __ul * __vh; \
1396 __x2 = (UWtype) __uh * __vl; \
1397 __x3 = (UWtype) __uh * __vh; \
1398 \
1399 __x1 += __ll_highpart(__x0);/* this can't give carry */ \
1400 __x1 += __x2; /* but this indeed can */ \
1401 if (__x1 < __x2) /* did we get it? */ \
1402 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1403 \
1404 (w1) = __x3 + __ll_highpart(__x1); \
1405 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
1406} while (0)
1407#endif
1408
1409#if !defined(umul_ppmm)
1410#define smul_ppmm(w1, w0, u, v) \
1411do { \
1412 UWtype __w1; \
1413 UWtype __m0 = (u), __m1 = (v); \
1414 umul_ppmm(__w1, w0, __m0, __m1); \
1415 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
1416 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
1417} while (0)
1418#endif
1419
1420 /* Define this unconditionally, so it can be used for debugging. */
1421#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1422do { \
1423 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1424 __d1 = __ll_highpart(d); \
1425 __d0 = __ll_lowpart(d); \
1426 \
1427 __r1 = (n1) % __d1; \
1428 __q1 = (n1) / __d1; \
1429 __m = (UWtype) __q1 * __d0; \
1430 __r1 = __r1 * __ll_B | __ll_highpart(n0); \
1431 if (__r1 < __m) { \
1432 __q1--, __r1 += (d); \
1433 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
1434 if (__r1 < __m) \
1435 __q1--, __r1 += (d); \
1436 } \
1437 __r1 -= __m; \
1438 \
1439 __r0 = __r1 % __d1; \
1440 __q0 = __r1 / __d1; \
1441 __m = (UWtype) __q0 * __d0; \
1442 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
1443 if (__r0 < __m) { \
1444 __q0--, __r0 += (d); \
1445 if (__r0 >= (d)) \
1446 if (__r0 < __m) \
1447 __q0--, __r0 += (d); \
1448 } \
1449 __r0 -= __m; \
1450 \
1451 (q) = (UWtype) __q1 * __ll_B | __q0; \
1452 (r) = __r0; \
1453} while (0)
1454
1455/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1456 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1457#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
1458#define udiv_qrnnd(q, r, nh, nl, d) \
1459do { \
1460 UWtype __r; \
1461 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1462 (r) = __r; \
1463} while (0)
1464#endif
1465
1466 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1467#if !defined(udiv_qrnnd)
1468#define UDIV_NEEDS_NORMALIZATION 1
1469#define udiv_qrnnd __udiv_qrnnd_c
1470#endif
1471
1472#undef count_leading_zeros
1473#if !defined(count_leading_zeros)
1474 extern
1475#ifdef __STDC__
1476 const
1477#endif
1478 unsigned char __clz_tab[];
1479#define count_leading_zeros(count, x) \
1480do { \
1481 UWtype __xr = (x); \
1482 UWtype __a; \
1483 \
1484 if (W_TYPE_SIZE <= 32) { \
1485 __a = __xr < ((UWtype) 1 << 2*__BITS4) \
1486 ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
1487 : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
1488 } \
1489 else { \
1490 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
1491 if (((__xr >> __a) & 0xff) != 0) \
1492 break; \
1493 } \
1494 \
1495 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
1496} while (0)
1497 /* This version gives a well-defined value for zero. */
1498#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1499#endif
1500
1501#if !defined(count_trailing_zeros)
1502/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1503 defined in asm, but if it is not, the C version above is good enough. */
1504#define count_trailing_zeros(count, x) \
1505do { \
1506 UWtype __ctz_x = (x); \
1507 UWtype __ctz_c; \
1508 count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
1509 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
1510} while (0)
1511#endif
1512
1513#ifndef UDIV_NEEDS_NORMALIZATION
1514#define UDIV_NEEDS_NORMALIZATION 0
1515#endif