blob: be8031ef8e3c65cef03affac8844ecfadee99ad7 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12#ifndef _MATH_PRIVATE_H_
13#define _MATH_PRIVATE_H_
14
15#include <endian.h>
16#include <sys/types.h>
17
18/* The original fdlibm code used statements like:
19 n0 = ((*(int*)&one)>>29)^1; * index of high word *
20 ix0 = *(n0+(int*)&x); * high word of x *
21 ix1 = *((1-n0)+(int*)&x); * low word of x *
22 to dig two 32 bit words out of the 64 bit IEEE floating point
23 value. That is non-ANSI, and, moreover, the gcc instruction
24 scheduler gets it wrong. We instead use the following macros.
25 Unlike the original code, we determine the endianness at compile
26 time, not at run time; I don't see much benefit to selecting
27 endianness at run time. */
28
29/* A union which permits us to convert between a double and two 32 bit
30 ints. */
31
32/*
33 * Math on arm is special (read: stupid):
34 * For FPA, float words are always big-endian.
35 * For VFP, float words follow the memory system mode.
36 * For Maverick, float words are always little-endian.
37 */
38
39#if !defined(__MAVERICK__) && ((__BYTE_ORDER == __BIG_ENDIAN) || \
40 (!defined(__VFP_FP__) && (defined(__arm__) || defined(__thumb__))))
41
42typedef union
43{
44 double value;
45 struct
46 {
47 u_int32_t msw;
48 u_int32_t lsw;
49 } parts;
50} ieee_double_shape_type;
51
52#else
53
54typedef union
55{
56 double value;
57 struct
58 {
59 u_int32_t lsw;
60 u_int32_t msw;
61 } parts;
62} ieee_double_shape_type;
63
64#endif
65
66/* Get two 32 bit ints from a double. */
67
68#define EXTRACT_WORDS(ix0,ix1,d) \
69do { \
70 ieee_double_shape_type ew_u; \
71 ew_u.value = (d); \
72 (ix0) = ew_u.parts.msw; \
73 (ix1) = ew_u.parts.lsw; \
74} while (0)
75
76/* Get the more significant 32 bit int from a double. */
77
78#define GET_HIGH_WORD(i,d) \
79do { \
80 ieee_double_shape_type gh_u; \
81 gh_u.value = (d); \
82 (i) = gh_u.parts.msw; \
83} while (0)
84
85/* Get the less significant 32 bit int from a double. */
86
87#define GET_LOW_WORD(i,d) \
88do { \
89 ieee_double_shape_type gl_u; \
90 gl_u.value = (d); \
91 (i) = gl_u.parts.lsw; \
92} while (0)
93
94/* Set a double from two 32 bit ints. */
95
96#define INSERT_WORDS(d,ix0,ix1) \
97do { \
98 ieee_double_shape_type iw_u; \
99 iw_u.parts.msw = (ix0); \
100 iw_u.parts.lsw = (ix1); \
101 (d) = iw_u.value; \
102} while (0)
103
104/* Set the more significant 32 bits of a double from an int. */
105
106#define SET_HIGH_WORD(d,v) \
107do { \
108 ieee_double_shape_type sh_u; \
109 sh_u.value = (d); \
110 sh_u.parts.msw = (v); \
111 (d) = sh_u.value; \
112} while (0)
113
114/* Set the less significant 32 bits of a double from an int. */
115
116#define SET_LOW_WORD(d,v) \
117do { \
118 ieee_double_shape_type sl_u; \
119 sl_u.value = (d); \
120 sl_u.parts.lsw = (v); \
121 (d) = sl_u.value; \
122} while (0)
123
124/* A union which permits us to convert between a float and a 32 bit
125 int. */
126
127typedef union
128{
129 float value;
130 u_int32_t word;
131} ieee_float_shape_type;
132
133/* Get a 32 bit int from a float. */
134
135#define GET_FLOAT_WORD(i,d) \
136do { \
137 ieee_float_shape_type gf_u; \
138 gf_u.value = (d); \
139 (i) = gf_u.word; \
140} while (0)
141
142/* Set a float from a 32 bit int. */
143
144#define SET_FLOAT_WORD(d,i) \
145do { \
146 ieee_float_shape_type sf_u; \
147 sf_u.word = (i); \
148 (d) = sf_u.value; \
149} while (0)
150
151/* ieee style elementary functions */
152extern double __ieee754_sqrt (double) attribute_hidden;
153extern double __ieee754_acos (double) attribute_hidden;
154extern double __ieee754_acosh (double) attribute_hidden;
155extern double __ieee754_log (double) attribute_hidden;
156extern double __ieee754_log2 (double) attribute_hidden;
157extern double __ieee754_atanh (double) attribute_hidden;
158extern double __ieee754_asin (double) attribute_hidden;
159extern double __ieee754_atan2 (double,double) attribute_hidden;
160extern double __ieee754_exp (double) attribute_hidden;
161extern double __ieee754_cosh (double) attribute_hidden;
162extern double __ieee754_fmod (double,double) attribute_hidden;
163extern double __ieee754_pow (double,double) attribute_hidden;
164extern double __ieee754_lgamma_r (double,int *) attribute_hidden;
165/*extern double __ieee754_gamma_r (double,int *) attribute_hidden;*/
166extern double __ieee754_lgamma (double) attribute_hidden;
167/*extern double __ieee754_gamma (double) attribute_hidden;*/
168extern double __ieee754_log10 (double) attribute_hidden;
169extern double __ieee754_sinh (double) attribute_hidden;
170extern double __ieee754_hypot (double,double) attribute_hidden;
171extern double __ieee754_j0 (double) attribute_hidden;
172extern double __ieee754_j1 (double) attribute_hidden;
173extern double __ieee754_y0 (double) attribute_hidden;
174extern double __ieee754_y1 (double) attribute_hidden;
175extern double __ieee754_jn (int,double) attribute_hidden;
176extern double __ieee754_yn (int,double) attribute_hidden;
177extern double __ieee754_remainder (double,double) attribute_hidden;
178extern int __ieee754_rem_pio2 (double,double*) attribute_hidden;
179extern double __ieee754_scalb (double,double) attribute_hidden;
180
181/* fdlibm kernel function */
182#ifndef _IEEE_LIBM
183extern double __kernel_standard (double,double,int) attribute_hidden;
184#endif
185extern double __kernel_sin (double,double,int) attribute_hidden;
186extern double __kernel_cos (double,double) attribute_hidden;
187extern double __kernel_tan (double,double,int) attribute_hidden;
188extern int __kernel_rem_pio2 (double*,double*,int,int,int,const int*) attribute_hidden;
189
190/*
191 * math_opt_barrier(x): safely load x, even if it was manipulated
192 * by non-floationg point operations. This macro returns the value of x.
193 * This ensures compiler does not (ab)use its knowledge about x value
194 * and don't optimize future operations. Example:
195 * float x;
196 * SET_FLOAT_WORD(x, 0x80000001); // sets a bit pattern
197 * y = math_opt_barrier(x); // "compiler, do not cheat!"
198 * y = y * y; // compiler can't optimize, must use real multiply insn
199 *
200 * math_force_eval(x): force expression x to be evaluated.
201 * Useful if otherwise compiler may eliminate the expression
202 * as unused. This macro returns no value.
203 * Example: "void fn(float f) { f = f * f; }"
204 * versus "void fn(float f) { f = f * f; math_force_eval(f); }"
205 *
206 * Currently, math_force_eval(x) stores x into
207 * a floating point register or memory *of the appropriate size*.
208 * There is no guarantee this will not change.
209 */
210#if defined(__i386__)
211#define math_opt_barrier(x) ({ \
212 __typeof(x) __x = (x); \
213 /* "t": load x into top-of-stack fpreg */ \
214 __asm__ ("" : "=t" (__x) : "0" (__x)); \
215 __x; \
216})
217#define math_force_eval(x) do { \
218 __typeof(x) __x = (x); \
219 if (sizeof(__x) <= sizeof(double)) \
220 /* "m": store x into a memory location */ \
221 __asm__ __volatile__ ("" : : "m" (__x)); \
222 else /* long double */ \
223 /* "f": load x into (any) fpreg */ \
224 __asm__ __volatile__ ("" : : "f" (__x)); \
225} while (0)
226#endif
227
228#if defined(__x86_64__)
229#define math_opt_barrier(x) ({ \
230 __typeof(x) __x = (x); \
231 if (sizeof(__x) <= sizeof(double)) \
232 /* "x": load into XMM SSE register */ \
233 __asm__ ("" : "=x" (__x) : "0" (__x)); \
234 else /* long double */ \
235 /* "t": load x into top-of-stack fpreg */ \
236 __asm__ ("" : "=t" (__x) : "0" (__x)); \
237 __x; \
238})
239#define math_force_eval(x) do { \
240 __typeof(x) __x = (x); \
241 if (sizeof(__x) <= sizeof(double)) \
242 /* "x": load into XMM SSE register */ \
243 __asm__ __volatile__ ("" : : "x" (__x)); \
244 else /* long double */ \
245 /* "f": load x into (any) fpreg */ \
246 __asm__ __volatile__ ("" : : "f" (__x)); \
247} while (0)
248#endif
249
250/* Default implementations force store to a memory location */
251#ifndef math_opt_barrier
252#define math_opt_barrier(x) ({ __typeof(x) __x = (x); __asm__ ("" : "+m" (__x)); __x; })
253#endif
254#ifndef math_force_eval
255#define math_force_eval(x) do { __typeof(x) __x = (x); __asm__ __volatile__ ("" : : "m" (__x)); } while (0)
256#endif
257
258
259#endif /* _MATH_PRIVATE_H_ */