| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 2002-2016 Free Software Foundation, Inc. | 
 | 2 |    This file is part of the GNU C Library. | 
 | 3 |    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. | 
 | 4 |  | 
 | 5 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 6 |    modify it under the terms of the GNU Lesser General Public | 
 | 7 |    License as published by the Free Software Foundation; either | 
 | 8 |    version 2.1 of the License, or (at your option) any later version. | 
 | 9 |  | 
 | 10 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 11 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 12 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 13 |    Lesser General Public License for more details. | 
 | 14 |  | 
 | 15 |    You should have received a copy of the GNU Lesser General Public | 
 | 16 |    License along with the GNU C Library; if not, see | 
 | 17 |    <http://www.gnu.org/licenses/>.  */ | 
 | 18 |  | 
 | 19 | #include <stdint.h> | 
 | 20 | #include <tls.h>	/* For tcbhead_t.  */ | 
 | 21 |  | 
 | 22 |  | 
 | 23 | typedef int8_t atomic8_t; | 
 | 24 | typedef uint8_t uatomic8_t; | 
 | 25 | typedef int_fast8_t atomic_fast8_t; | 
 | 26 | typedef uint_fast8_t uatomic_fast8_t; | 
 | 27 |  | 
 | 28 | typedef int16_t atomic16_t; | 
 | 29 | typedef uint16_t uatomic16_t; | 
 | 30 | typedef int_fast16_t atomic_fast16_t; | 
 | 31 | typedef uint_fast16_t uatomic_fast16_t; | 
 | 32 |  | 
 | 33 | typedef int32_t atomic32_t; | 
 | 34 | typedef uint32_t uatomic32_t; | 
 | 35 | typedef int_fast32_t atomic_fast32_t; | 
 | 36 | typedef uint_fast32_t uatomic_fast32_t; | 
 | 37 |  | 
 | 38 | typedef int64_t atomic64_t; | 
 | 39 | typedef uint64_t uatomic64_t; | 
 | 40 | typedef int_fast64_t atomic_fast64_t; | 
 | 41 | typedef uint_fast64_t uatomic_fast64_t; | 
 | 42 |  | 
 | 43 | typedef intptr_t atomicptr_t; | 
 | 44 | typedef uintptr_t uatomicptr_t; | 
 | 45 | typedef intmax_t atomic_max_t; | 
 | 46 | typedef uintmax_t uatomic_max_t; | 
 | 47 |  | 
 | 48 |  | 
 | 49 | #ifndef LOCK_PREFIX | 
 | 50 | # ifdef UP | 
 | 51 | #  define LOCK_PREFIX	/* nothing */ | 
 | 52 | # else | 
 | 53 | #  define LOCK_PREFIX "lock;" | 
 | 54 | # endif | 
 | 55 | #endif | 
 | 56 |  | 
 | 57 | #define __HAVE_64B_ATOMICS 0 | 
 | 58 | #define USE_ATOMIC_COMPILER_BUILTINS 0 | 
 | 59 |  | 
 | 60 |  | 
 | 61 | #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ | 
 | 62 |   __sync_val_compare_and_swap (mem, oldval, newval) | 
 | 63 | #define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \ | 
 | 64 |   (! __sync_bool_compare_and_swap (mem, oldval, newval)) | 
 | 65 |  | 
 | 66 |  | 
 | 67 | #define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \ | 
 | 68 |   ({ __typeof (*mem) ret;						      \ | 
 | 69 |      __asm __volatile ("cmpl $0, %%gs:%P5\n\t"                                \ | 
 | 70 |                        "je 0f\n\t"                                            \ | 
 | 71 |                        "lock\n"                                               \ | 
 | 72 |                        "0:\tcmpxchgb %b2, %1"				      \ | 
 | 73 | 		       : "=a" (ret), "=m" (*mem)			      \ | 
 | 74 | 		       : "q" (newval), "m" (*mem), "0" (oldval),	      \ | 
 | 75 | 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \ | 
 | 76 |      ret; }) | 
 | 77 |  | 
 | 78 | #define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \ | 
 | 79 |   ({ __typeof (*mem) ret;						      \ | 
 | 80 |      __asm __volatile ("cmpl $0, %%gs:%P5\n\t"                                \ | 
 | 81 |                        "je 0f\n\t"                                            \ | 
 | 82 |                        "lock\n"                                               \ | 
 | 83 |                        "0:\tcmpxchgw %w2, %1"				      \ | 
 | 84 | 		       : "=a" (ret), "=m" (*mem)			      \ | 
 | 85 | 		       : "r" (newval), "m" (*mem), "0" (oldval),	      \ | 
 | 86 | 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \ | 
 | 87 |      ret; }) | 
 | 88 |  | 
 | 89 | #define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \ | 
 | 90 |   ({ __typeof (*mem) ret;						      \ | 
 | 91 |      __asm __volatile ("cmpl $0, %%gs:%P5\n\t"                                \ | 
 | 92 |                        "je 0f\n\t"                                            \ | 
 | 93 |                        "lock\n"                                               \ | 
 | 94 |                        "0:\tcmpxchgl %2, %1"				      \ | 
 | 95 | 		       : "=a" (ret), "=m" (*mem)			      \ | 
 | 96 | 		       : "r" (newval), "m" (*mem), "0" (oldval),	      \ | 
 | 97 | 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \ | 
 | 98 |      ret; }) | 
 | 99 |  | 
 | 100 | /* XXX We do not really need 64-bit compare-and-exchange.  At least | 
 | 101 |    not in the moment.  Using it would mean causing portability | 
 | 102 |    problems since not many other 32-bit architectures have support for | 
 | 103 |    such an operation.  So don't define any code for now.  If it is | 
 | 104 |    really going to be used the code below can be used on Intel Pentium | 
 | 105 |    and later, but NOT on i486.  */ | 
 | 106 | #if 1 | 
 | 107 | # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval)	      \ | 
 | 108 |   ({ __typeof (*mem) ret = *(mem);					      \ | 
 | 109 |      abort ();								      \ | 
 | 110 |      ret = (newval);							      \ | 
 | 111 |      ret = (oldval);							      \ | 
 | 112 |      ret; }) | 
 | 113 | # define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval)	      \ | 
 | 114 |   ({ __typeof (*mem) ret = *(mem);					      \ | 
 | 115 |      abort ();								      \ | 
 | 116 |      ret = (newval);							      \ | 
 | 117 |      ret = (oldval);							      \ | 
 | 118 |      ret; }) | 
 | 119 | #else | 
 | 120 | # ifdef __PIC__ | 
 | 121 | #  define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ | 
 | 122 |   ({ __typeof (*mem) ret;						      \ | 
 | 123 |      __asm __volatile ("xchgl %2, %%ebx\n\t"				      \ | 
 | 124 | 		       LOCK_PREFIX "cmpxchg8b %1\n\t"			      \ | 
 | 125 | 		       "xchgl %2, %%ebx"				      \ | 
 | 126 | 		       : "=A" (ret), "=m" (*mem)			      \ | 
 | 127 | 		       : "DS" (((unsigned long long int) (newval))	      \ | 
 | 128 | 			       & 0xffffffff),				      \ | 
 | 129 | 			 "c" (((unsigned long long int) (newval)) >> 32),     \ | 
 | 130 | 			 "m" (*mem), "a" (((unsigned long long int) (oldval)) \ | 
 | 131 | 					  & 0xffffffff),		      \ | 
 | 132 | 			 "d" (((unsigned long long int) (oldval)) >> 32));    \ | 
 | 133 |      ret; }) | 
 | 134 |  | 
 | 135 | #  define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ | 
 | 136 |   ({ __typeof (*mem) ret;						      \ | 
 | 137 |      __asm __volatile ("xchgl %2, %%ebx\n\t"				      \ | 
 | 138 | 		       "cmpl $0, %%gs:%P7\n\t"				      \ | 
 | 139 | 		       "je 0f\n\t"					      \ | 
 | 140 | 		       "lock\n"						      \ | 
 | 141 | 		       "0:\tcmpxchg8b %1\n\t"				      \ | 
 | 142 | 		       "xchgl %2, %%ebx"				      \ | 
 | 143 | 		       : "=A" (ret), "=m" (*mem)			      \ | 
 | 144 | 		       : "DS" (((unsigned long long int) (newval))	      \ | 
 | 145 | 			       & 0xffffffff),				      \ | 
 | 146 | 			 "c" (((unsigned long long int) (newval)) >> 32),     \ | 
 | 147 | 			 "m" (*mem), "a" (((unsigned long long int) (oldval)) \ | 
 | 148 | 					  & 0xffffffff),		      \ | 
 | 149 | 			 "d" (((unsigned long long int) (oldval)) >> 32),     \ | 
 | 150 | 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \ | 
 | 151 |      ret; }) | 
 | 152 | # else | 
 | 153 | #  define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ | 
 | 154 |   ({ __typeof (*mem) ret;						      \ | 
 | 155 |      __asm __volatile (LOCK_PREFIX "cmpxchg8b %1"			      \ | 
 | 156 | 		       : "=A" (ret), "=m" (*mem)			      \ | 
 | 157 | 		       : "b" (((unsigned long long int) (newval))	      \ | 
 | 158 | 			      & 0xffffffff),				      \ | 
 | 159 | 			 "c" (((unsigned long long int) (newval)) >> 32),     \ | 
 | 160 | 			 "m" (*mem), "a" (((unsigned long long int) (oldval)) \ | 
 | 161 | 					  & 0xffffffff),		      \ | 
 | 162 | 			 "d" (((unsigned long long int) (oldval)) >> 32));    \ | 
 | 163 |      ret; }) | 
 | 164 |  | 
 | 165 | #  define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ | 
 | 166 |   ({ __typeof (*mem) ret;						      \ | 
 | 167 |      __asm __volatile ("cmpl $0, %%gs:%P7\n\t"				      \ | 
 | 168 | 		       "je 0f\n\t"					      \ | 
 | 169 | 		       "lock\n"						      \ | 
 | 170 | 		       "0:\tcmpxchg8b %1"				      \ | 
 | 171 | 		       : "=A" (ret), "=m" (*mem)			      \ | 
 | 172 | 		       : "b" (((unsigned long long int) (newval))	      \ | 
 | 173 | 			      & 0xffffffff),				      \ | 
 | 174 | 			 "c" (((unsigned long long int) (newval)) >> 32),     \ | 
 | 175 | 			 "m" (*mem), "a" (((unsigned long long int) (oldval)) \ | 
 | 176 | 					  & 0xffffffff),		      \ | 
 | 177 | 			 "d" (((unsigned long long int) (oldval)) >> 32),     \ | 
 | 178 | 			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \ | 
 | 179 |      ret; }) | 
 | 180 | # endif | 
 | 181 | #endif | 
 | 182 |  | 
 | 183 |  | 
 | 184 | /* Note that we need no lock prefix.  */ | 
 | 185 | #define atomic_exchange_acq(mem, newvalue) \ | 
 | 186 |   ({ __typeof (*mem) result;						      \ | 
 | 187 |      if (sizeof (*mem) == 1)						      \ | 
 | 188 |        __asm __volatile ("xchgb %b0, %1"				      \ | 
 | 189 | 			 : "=q" (result), "=m" (*mem)			      \ | 
 | 190 | 			 : "0" (newvalue), "m" (*mem));			      \ | 
 | 191 |      else if (sizeof (*mem) == 2)					      \ | 
 | 192 |        __asm __volatile ("xchgw %w0, %1"				      \ | 
 | 193 | 			 : "=r" (result), "=m" (*mem)			      \ | 
 | 194 | 			 : "0" (newvalue), "m" (*mem));			      \ | 
 | 195 |      else if (sizeof (*mem) == 4)					      \ | 
 | 196 |        __asm __volatile ("xchgl %0, %1"					      \ | 
 | 197 | 			 : "=r" (result), "=m" (*mem)			      \ | 
 | 198 | 			 : "0" (newvalue), "m" (*mem));			      \ | 
 | 199 |      else								      \ | 
 | 200 |        {								      \ | 
 | 201 | 	 result = 0;							      \ | 
 | 202 | 	 abort ();							      \ | 
 | 203 |        }								      \ | 
 | 204 |      result; }) | 
 | 205 |  | 
 | 206 |  | 
 | 207 | #define __arch_exchange_and_add_body(lock, pfx, mem, value) \ | 
 | 208 |   ({ __typeof (*mem) __result;						      \ | 
 | 209 |      __typeof (value) __addval = (value);				      \ | 
 | 210 |      if (sizeof (*mem) == 1)						      \ | 
 | 211 |        __asm __volatile (lock "xaddb %b0, %1"				      \ | 
 | 212 | 			 : "=q" (__result), "=m" (*mem)			      \ | 
 | 213 | 			 : "0" (__addval), "m" (*mem),			      \ | 
 | 214 | 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \ | 
 | 215 |      else if (sizeof (*mem) == 2)					      \ | 
 | 216 |        __asm __volatile (lock "xaddw %w0, %1"				      \ | 
 | 217 | 			 : "=r" (__result), "=m" (*mem)			      \ | 
 | 218 | 			 : "0" (__addval), "m" (*mem),			      \ | 
 | 219 | 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \ | 
 | 220 |      else if (sizeof (*mem) == 4)					      \ | 
 | 221 |        __asm __volatile (lock "xaddl %0, %1"				      \ | 
 | 222 | 			 : "=r" (__result), "=m" (*mem)			      \ | 
 | 223 | 			 : "0" (__addval), "m" (*mem),			      \ | 
 | 224 | 			   "i" (offsetof (tcbhead_t, multiple_threads)));     \ | 
 | 225 |      else								      \ | 
 | 226 |        {								      \ | 
 | 227 | 	 __typeof (mem) __memp = (mem);					      \ | 
 | 228 | 	 __typeof (*mem) __tmpval;					      \ | 
 | 229 | 	 __result = *__memp;						      \ | 
 | 230 | 	 do								      \ | 
 | 231 | 	   __tmpval = __result;						      \ | 
 | 232 | 	 while ((__result = pfx##_compare_and_exchange_val_64_acq	      \ | 
 | 233 | 		 (__memp, __result + __addval, __result)) == __tmpval);	      \ | 
 | 234 |        }								      \ | 
 | 235 |      __result; }) | 
 | 236 |  | 
 | 237 | #define atomic_exchange_and_add(mem, value) \ | 
 | 238 |   __sync_fetch_and_add (mem, value) | 
 | 239 |  | 
 | 240 | #define __arch_exchange_and_add_cprefix \ | 
 | 241 |   "cmpl $0, %%gs:%P4\n\tje 0f\n\tlock\n0:\t" | 
 | 242 |  | 
 | 243 | #define catomic_exchange_and_add(mem, value) \ | 
 | 244 |   __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c,    \ | 
 | 245 | 				mem, value) | 
 | 246 |  | 
 | 247 |  | 
 | 248 | #define __arch_add_body(lock, pfx, mem, value) \ | 
 | 249 |   do {									      \ | 
 | 250 |     if (__builtin_constant_p (value) && (value) == 1)			      \ | 
 | 251 |       atomic_increment (mem);						      \ | 
 | 252 |     else if (__builtin_constant_p (value) && (value) == -1)		      \ | 
 | 253 |       atomic_decrement (mem);						      \ | 
 | 254 |     else if (sizeof (*mem) == 1)					      \ | 
 | 255 |       __asm __volatile (lock "addb %b1, %0"				      \ | 
 | 256 | 			: "=m" (*mem)					      \ | 
 | 257 | 			: "iq" (value), "m" (*mem),			      \ | 
 | 258 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 259 |     else if (sizeof (*mem) == 2)					      \ | 
 | 260 |       __asm __volatile (lock "addw %w1, %0"				      \ | 
 | 261 | 			: "=m" (*mem)					      \ | 
 | 262 | 			: "ir" (value), "m" (*mem),			      \ | 
 | 263 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 264 |     else if (sizeof (*mem) == 4)					      \ | 
 | 265 |       __asm __volatile (lock "addl %1, %0"				      \ | 
 | 266 | 			: "=m" (*mem)					      \ | 
 | 267 | 			: "ir" (value), "m" (*mem),			      \ | 
 | 268 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 269 |     else								      \ | 
 | 270 |       {									      \ | 
 | 271 | 	__typeof (value) __addval = (value);				      \ | 
 | 272 | 	__typeof (mem) __memp = (mem);					      \ | 
 | 273 | 	__typeof (*mem) __oldval = *__memp;				      \ | 
 | 274 | 	__typeof (*mem) __tmpval;					      \ | 
 | 275 | 	do								      \ | 
 | 276 | 	  __tmpval = __oldval;						      \ | 
 | 277 | 	while ((__oldval = pfx##_compare_and_exchange_val_64_acq	      \ | 
 | 278 | 		(__memp, __oldval + __addval, __oldval)) == __tmpval);	      \ | 
 | 279 |       }									      \ | 
 | 280 |   } while (0) | 
 | 281 |  | 
 | 282 | #define atomic_add(mem, value) \ | 
 | 283 |   __arch_add_body (LOCK_PREFIX, __arch, mem, value) | 
 | 284 |  | 
 | 285 | #define __arch_add_cprefix \ | 
 | 286 |   "cmpl $0, %%gs:%P3\n\tje 0f\n\tlock\n0:\t" | 
 | 287 |  | 
 | 288 | #define catomic_add(mem, value) \ | 
 | 289 |   __arch_add_body (__arch_add_cprefix, __arch_c, mem, value) | 
 | 290 |  | 
 | 291 |  | 
 | 292 | #define atomic_add_negative(mem, value) \ | 
 | 293 |   ({ unsigned char __result;						      \ | 
 | 294 |      if (sizeof (*mem) == 1)						      \ | 
 | 295 |        __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1"		      \ | 
 | 296 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 297 | 			 : "iq" (value), "m" (*mem));			      \ | 
 | 298 |      else if (sizeof (*mem) == 2)					      \ | 
 | 299 |        __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1"		      \ | 
 | 300 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 301 | 			 : "ir" (value), "m" (*mem));			      \ | 
 | 302 |      else if (sizeof (*mem) == 4)					      \ | 
 | 303 |        __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1"		      \ | 
 | 304 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 305 | 			 : "ir" (value), "m" (*mem));			      \ | 
 | 306 |      else								      \ | 
 | 307 |        abort ();							      \ | 
 | 308 |      __result; }) | 
 | 309 |  | 
 | 310 |  | 
 | 311 | #define atomic_add_zero(mem, value) \ | 
 | 312 |   ({ unsigned char __result;						      \ | 
 | 313 |      if (sizeof (*mem) == 1)						      \ | 
 | 314 |        __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1"		      \ | 
 | 315 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 316 | 			 : "iq" (value), "m" (*mem));			      \ | 
 | 317 |      else if (sizeof (*mem) == 2)					      \ | 
 | 318 |        __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1"		      \ | 
 | 319 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 320 | 			 : "ir" (value), "m" (*mem));			      \ | 
 | 321 |      else if (sizeof (*mem) == 4)					      \ | 
 | 322 |        __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1"		      \ | 
 | 323 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 324 | 			 : "ir" (value), "m" (*mem));			      \ | 
 | 325 |      else								      \ | 
 | 326 |        abort ();							      \ | 
 | 327 |      __result; }) | 
 | 328 |  | 
 | 329 |  | 
 | 330 | #define __arch_increment_body(lock,  pfx, mem) \ | 
 | 331 |   do {									      \ | 
 | 332 |     if (sizeof (*mem) == 1)						      \ | 
 | 333 |       __asm __volatile (lock "incb %b0"					      \ | 
 | 334 | 			: "=m" (*mem)					      \ | 
 | 335 | 			: "m" (*mem),					      \ | 
 | 336 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 337 |     else if (sizeof (*mem) == 2)					      \ | 
 | 338 |       __asm __volatile (lock "incw %w0"					      \ | 
 | 339 | 			: "=m" (*mem)					      \ | 
 | 340 | 			: "m" (*mem),					      \ | 
 | 341 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 342 |     else if (sizeof (*mem) == 4)					      \ | 
 | 343 |       __asm __volatile (lock "incl %0"					      \ | 
 | 344 | 			: "=m" (*mem)					      \ | 
 | 345 | 			: "m" (*mem),					      \ | 
 | 346 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 347 |     else								      \ | 
 | 348 |       {									      \ | 
 | 349 | 	__typeof (mem) __memp = (mem);					      \ | 
 | 350 | 	__typeof (*mem) __oldval = *__memp;				      \ | 
 | 351 | 	__typeof (*mem) __tmpval;					      \ | 
 | 352 | 	do								      \ | 
 | 353 | 	  __tmpval = __oldval;						      \ | 
 | 354 | 	while ((__oldval = pfx##_compare_and_exchange_val_64_acq	      \ | 
 | 355 | 		(__memp, __oldval + 1, __oldval)) == __tmpval);		      \ | 
 | 356 |       }									      \ | 
 | 357 |   } while (0) | 
 | 358 |  | 
 | 359 | #define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem) | 
 | 360 |  | 
 | 361 | #define __arch_increment_cprefix \ | 
 | 362 |   "cmpl $0, %%gs:%P2\n\tje 0f\n\tlock\n0:\t" | 
 | 363 |  | 
 | 364 | #define catomic_increment(mem) \ | 
 | 365 |   __arch_increment_body (__arch_increment_cprefix, __arch_c, mem) | 
 | 366 |  | 
 | 367 |  | 
 | 368 | #define atomic_increment_and_test(mem) \ | 
 | 369 |   ({ unsigned char __result;						      \ | 
 | 370 |      if (sizeof (*mem) == 1)						      \ | 
 | 371 |        __asm __volatile (LOCK_PREFIX "incb %0; sete %b1"		      \ | 
 | 372 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 373 | 			 : "m" (*mem));					      \ | 
 | 374 |      else if (sizeof (*mem) == 2)					      \ | 
 | 375 |        __asm __volatile (LOCK_PREFIX "incw %0; sete %w1"		      \ | 
 | 376 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 377 | 			 : "m" (*mem));					      \ | 
 | 378 |      else if (sizeof (*mem) == 4)					      \ | 
 | 379 |        __asm __volatile (LOCK_PREFIX "incl %0; sete %1"			      \ | 
 | 380 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 381 | 			 : "m" (*mem));					      \ | 
 | 382 |      else								      \ | 
 | 383 |        abort ();							      \ | 
 | 384 |      __result; }) | 
 | 385 |  | 
 | 386 |  | 
 | 387 | #define __arch_decrement_body(lock, pfx, mem) \ | 
 | 388 |   do {									      \ | 
 | 389 |     if (sizeof (*mem) == 1)						      \ | 
 | 390 |       __asm __volatile (lock "decb %b0"					      \ | 
 | 391 | 			: "=m" (*mem)					      \ | 
 | 392 | 			: "m" (*mem),					      \ | 
 | 393 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 394 |     else if (sizeof (*mem) == 2)					      \ | 
 | 395 |       __asm __volatile (lock "decw %w0"					      \ | 
 | 396 | 			: "=m" (*mem)					      \ | 
 | 397 | 			: "m" (*mem),					      \ | 
 | 398 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 399 |     else if (sizeof (*mem) == 4)					      \ | 
 | 400 |       __asm __volatile (lock "decl %0"					      \ | 
 | 401 | 			: "=m" (*mem)					      \ | 
 | 402 | 			: "m" (*mem),					      \ | 
 | 403 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 404 |     else								      \ | 
 | 405 |       {									      \ | 
 | 406 | 	__typeof (mem) __memp = (mem);					      \ | 
 | 407 | 	__typeof (*mem) __oldval = *__memp;				      \ | 
 | 408 | 	__typeof (*mem) __tmpval;					      \ | 
 | 409 | 	do								      \ | 
 | 410 | 	  __tmpval = __oldval;						      \ | 
 | 411 | 	while ((__oldval = pfx##_compare_and_exchange_val_64_acq	      \ | 
 | 412 | 		(__memp, __oldval - 1, __oldval)) == __tmpval); 	      \ | 
 | 413 |       }									      \ | 
 | 414 |   } while (0) | 
 | 415 |  | 
 | 416 | #define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem) | 
 | 417 |  | 
 | 418 | #define __arch_decrement_cprefix \ | 
 | 419 |   "cmpl $0, %%gs:%P2\n\tje 0f\n\tlock\n0:\t" | 
 | 420 |  | 
 | 421 | #define catomic_decrement(mem) \ | 
 | 422 |   __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem) | 
 | 423 |  | 
 | 424 |  | 
 | 425 | #define atomic_decrement_and_test(mem) \ | 
 | 426 |   ({ unsigned char __result;						      \ | 
 | 427 |      if (sizeof (*mem) == 1)						      \ | 
 | 428 |        __asm __volatile (LOCK_PREFIX "decb %b0; sete %1"		      \ | 
 | 429 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 430 | 			 : "m" (*mem));					      \ | 
 | 431 |      else if (sizeof (*mem) == 2)					      \ | 
 | 432 |        __asm __volatile (LOCK_PREFIX "decw %w0; sete %1"		      \ | 
 | 433 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 434 | 			 : "m" (*mem));					      \ | 
 | 435 |      else if (sizeof (*mem) == 4)					      \ | 
 | 436 |        __asm __volatile (LOCK_PREFIX "decl %0; sete %1"			      \ | 
 | 437 | 			 : "=m" (*mem), "=qm" (__result)		      \ | 
 | 438 | 			 : "m" (*mem));					      \ | 
 | 439 |      else								      \ | 
 | 440 |        abort ();							      \ | 
 | 441 |      __result; }) | 
 | 442 |  | 
 | 443 |  | 
 | 444 | #define atomic_bit_set(mem, bit) \ | 
 | 445 |   do {									      \ | 
 | 446 |     if (sizeof (*mem) == 1)						      \ | 
 | 447 |       __asm __volatile (LOCK_PREFIX "orb %b2, %0"			      \ | 
 | 448 | 			: "=m" (*mem)					      \ | 
 | 449 | 			: "m" (*mem), "iq" (1 << (bit)));		      \ | 
 | 450 |     else if (sizeof (*mem) == 2)					      \ | 
 | 451 |       __asm __volatile (LOCK_PREFIX "orw %w2, %0"			      \ | 
 | 452 | 			: "=m" (*mem)					      \ | 
 | 453 | 			: "m" (*mem), "ir" (1 << (bit)));		      \ | 
 | 454 |     else if (sizeof (*mem) == 4)					      \ | 
 | 455 |       __asm __volatile (LOCK_PREFIX "orl %2, %0"			      \ | 
 | 456 | 			: "=m" (*mem)					      \ | 
 | 457 | 			: "m" (*mem), "ir" (1 << (bit)));		      \ | 
 | 458 |     else								      \ | 
 | 459 |       abort ();								      \ | 
 | 460 |   } while (0) | 
 | 461 |  | 
 | 462 |  | 
 | 463 | #define atomic_bit_test_set(mem, bit) \ | 
 | 464 |   ({ unsigned char __result;						      \ | 
 | 465 |      if (sizeof (*mem) == 1)						      \ | 
 | 466 |        __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"		      \ | 
 | 467 | 			 : "=q" (__result), "=m" (*mem)			      \ | 
 | 468 | 			 : "m" (*mem), "ir" (bit));			      \ | 
 | 469 |      else if (sizeof (*mem) == 2)					      \ | 
 | 470 |        __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0"		      \ | 
 | 471 | 			 : "=q" (__result), "=m" (*mem)			      \ | 
 | 472 | 			 : "m" (*mem), "ir" (bit));			      \ | 
 | 473 |      else if (sizeof (*mem) == 4)					      \ | 
 | 474 |        __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0"		      \ | 
 | 475 | 			 : "=q" (__result), "=m" (*mem)			      \ | 
 | 476 | 			 : "m" (*mem), "ir" (bit));			      \ | 
 | 477 |      else							      	      \ | 
 | 478 |        abort ();							      \ | 
 | 479 |      __result; }) | 
 | 480 |  | 
 | 481 |  | 
 | 482 | #define atomic_spin_nop() asm ("rep; nop") | 
 | 483 |  | 
 | 484 |  | 
 | 485 | #define __arch_and_body(lock, mem, mask) \ | 
 | 486 |   do {									      \ | 
 | 487 |     if (sizeof (*mem) == 1)						      \ | 
 | 488 |       __asm __volatile (lock "andb %b1, %0"				      \ | 
 | 489 | 			: "=m" (*mem)					      \ | 
 | 490 | 			: "iq" (mask), "m" (*mem),			      \ | 
 | 491 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 492 |     else if (sizeof (*mem) == 2)					      \ | 
 | 493 |       __asm __volatile (lock "andw %w1, %0"				      \ | 
 | 494 | 			: "=m" (*mem)					      \ | 
 | 495 | 			: "ir" (mask), "m" (*mem),			      \ | 
 | 496 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 497 |     else if (sizeof (*mem) == 4)					      \ | 
 | 498 |       __asm __volatile (lock "andl %1, %0"				      \ | 
 | 499 | 			: "=m" (*mem)					      \ | 
 | 500 | 			: "ir" (mask), "m" (*mem),			      \ | 
 | 501 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 502 |     else								      \ | 
 | 503 |       abort ();								      \ | 
 | 504 |   } while (0) | 
 | 505 |  | 
 | 506 | #define __arch_cprefix \ | 
 | 507 |   "cmpl $0, %%gs:%P3\n\tje 0f\n\tlock\n0:\t" | 
 | 508 |  | 
 | 509 | #define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask) | 
 | 510 |  | 
 | 511 | #define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask) | 
 | 512 |  | 
 | 513 |  | 
 | 514 | #define __arch_or_body(lock, mem, mask) \ | 
 | 515 |   do {									      \ | 
 | 516 |     if (sizeof (*mem) == 1)						      \ | 
 | 517 |       __asm __volatile (lock "orb %b1, %0"				      \ | 
 | 518 | 			: "=m" (*mem)					      \ | 
 | 519 | 			: "iq" (mask), "m" (*mem),			      \ | 
 | 520 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 521 |     else if (sizeof (*mem) == 2)					      \ | 
 | 522 |       __asm __volatile (lock "orw %w1, %0"				      \ | 
 | 523 | 			: "=m" (*mem)					      \ | 
 | 524 | 			: "ir" (mask), "m" (*mem),			      \ | 
 | 525 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 526 |     else if (sizeof (*mem) == 4)					      \ | 
 | 527 |       __asm __volatile (lock "orl %1, %0"				      \ | 
 | 528 | 			: "=m" (*mem)					      \ | 
 | 529 | 			: "ir" (mask), "m" (*mem),			      \ | 
 | 530 | 			  "i" (offsetof (tcbhead_t, multiple_threads)));      \ | 
 | 531 |     else								      \ | 
 | 532 |       abort ();								      \ | 
 | 533 |   } while (0) | 
 | 534 |  | 
 | 535 | #define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask) | 
 | 536 |  | 
 | 537 | #define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask) | 
 | 538 |  | 
 | 539 | /* We don't use mfence because it is supposedly slower due to having to | 
 | 540 |    provide stronger guarantees (e.g., regarding self-modifying code).  */ | 
 | 541 | #define atomic_full_barrier() \ | 
 | 542 |     __asm __volatile (LOCK_PREFIX "orl $0, (%%esp)" ::: "memory") | 
 | 543 | #define atomic_read_barrier() __asm ("" ::: "memory") | 
 | 544 | #define atomic_write_barrier() __asm ("" ::: "memory") |