yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2018-2019 The OpenSSL Project Authors. All Rights Reserved. |
| 3 | * |
| 4 | * Licensed under the OpenSSL license (the "License"). You may not use |
| 5 | * this file except in compliance with the License. You can obtain a copy |
| 6 | * in the file LICENSE in the source distribution or at |
| 7 | * https://www.openssl.org/source/license.html |
| 8 | */ |
| 9 | |
| 10 | /* |
| 11 | * Contemporary compilers implement lock-free atomic memory access |
| 12 | * primitives that facilitate writing "thread-opportunistic" or even real |
| 13 | * multi-threading low-overhead code. "Thread-opportunistic" is when |
| 14 | * exact result is not required, e.g. some statistics, or execution flow |
| 15 | * doesn't have to be unambiguous. Simplest example is lazy "constant" |
| 16 | * initialization when one can synchronize on variable itself, e.g. |
| 17 | * |
| 18 | * if (var == NOT_YET_INITIALIZED) |
| 19 | * var = function_returning_same_value(); |
| 20 | * |
| 21 | * This does work provided that loads and stores are single-instruction |
| 22 | * operations (and integer ones are on *all* supported platforms), but |
| 23 | * it upsets Thread Sanitizer. Suggested solution is |
| 24 | * |
| 25 | * if (tsan_load(&var) == NOT_YET_INITIALIZED) |
| 26 | * tsan_store(&var, function_returning_same_value()); |
| 27 | * |
| 28 | * Production machine code would be the same, so one can wonder why |
| 29 | * bother. Having Thread Sanitizer accept "thread-opportunistic" code |
| 30 | * allows to move on trouble-shooting real bugs. |
| 31 | * |
| 32 | * Resolving Thread Sanitizer nits was the initial purpose for this module, |
| 33 | * but it was later extended with more nuanced primitives that are useful |
| 34 | * even in "non-opportunistic" scenarios. Most notably verifying if a shared |
| 35 | * structure is fully initialized and bypassing the initialization lock. |
| 36 | * It's suggested to view macros defined in this module as "annotations" for |
| 37 | * thread-safe lock-free code, "Thread-Safe ANnotations"... |
| 38 | * |
| 39 | * It's assumed that ATOMIC_{LONG|INT}_LOCK_FREE are assigned same value as |
| 40 | * ATOMIC_POINTER_LOCK_FREE. And check for >= 2 ensures that corresponding |
| 41 | * code is inlined. It should be noted that statistics counters become |
| 42 | * accurate in such case. |
| 43 | * |
| 44 | * Special note about TSAN_QUALIFIER. It might be undesired to use it in |
| 45 | * a shared header. Because whether operation on specific variable or member |
| 46 | * is atomic or not might be irrelevant in other modules. In such case one |
| 47 | * can use TSAN_QUALIFIER in cast specifically when it has to count. |
| 48 | */ |
| 49 | |
| 50 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L \ |
| 51 | && !defined(__STDC_NO_ATOMICS__) |
| 52 | # include <stdatomic.h> |
| 53 | |
| 54 | # if defined(ATOMIC_POINTER_LOCK_FREE) \ |
| 55 | && ATOMIC_POINTER_LOCK_FREE >= 2 |
| 56 | # define TSAN_QUALIFIER _Atomic |
| 57 | # define tsan_load(ptr) atomic_load_explicit((ptr), memory_order_relaxed) |
| 58 | # define tsan_store(ptr, val) atomic_store_explicit((ptr), (val), memory_order_relaxed) |
| 59 | # define tsan_counter(ptr) atomic_fetch_add_explicit((ptr), 1, memory_order_relaxed) |
| 60 | # define tsan_decr(ptr) atomic_fetch_add_explicit((ptr), -1, memory_order_relaxed) |
| 61 | # define tsan_ld_acq(ptr) atomic_load_explicit((ptr), memory_order_acquire) |
| 62 | # define tsan_st_rel(ptr, val) atomic_store_explicit((ptr), (val), memory_order_release) |
| 63 | # endif |
| 64 | |
| 65 | #elif defined(__GNUC__) && defined(__ATOMIC_RELAXED) |
| 66 | |
| 67 | # if defined(__GCC_ATOMIC_POINTER_LOCK_FREE) \ |
| 68 | && __GCC_ATOMIC_POINTER_LOCK_FREE >= 2 |
| 69 | # define TSAN_QUALIFIER volatile |
| 70 | # define tsan_load(ptr) __atomic_load_n((ptr), __ATOMIC_RELAXED) |
| 71 | # define tsan_store(ptr, val) __atomic_store_n((ptr), (val), __ATOMIC_RELAXED) |
| 72 | # define tsan_counter(ptr) __atomic_fetch_add((ptr), 1, __ATOMIC_RELAXED) |
| 73 | # define tsan_decr(ptr) __atomic_fetch_add((ptr), -1, __ATOMIC_RELAXED) |
| 74 | # define tsan_ld_acq(ptr) __atomic_load_n((ptr), __ATOMIC_ACQUIRE) |
| 75 | # define tsan_st_rel(ptr, val) __atomic_store_n((ptr), (val), __ATOMIC_RELEASE) |
| 76 | # endif |
| 77 | |
| 78 | #elif defined(_MSC_VER) && _MSC_VER>=1200 \ |
| 79 | && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ |
| 80 | defined(_M_ARM64) || (defined(_M_ARM) && _M_ARM >= 7 && !defined(_WIN32_WCE))) |
| 81 | /* |
| 82 | * There is subtle dependency on /volatile:<iso|ms> command-line option. |
| 83 | * "ms" implies same semantic as memory_order_acquire for loads and |
| 84 | * memory_order_release for stores, while "iso" - memory_order_relaxed for |
| 85 | * either. Real complication is that defaults are different on x86 and ARM. |
| 86 | * There is explanation for that, "ms" is backward compatible with earlier |
| 87 | * compiler versions, while multi-processor ARM can be viewed as brand new |
| 88 | * platform to MSC and its users, and with non-relaxed semantic taking toll |
| 89 | * with additional instructions and penalties, it kind of makes sense to |
| 90 | * default to "iso"... |
| 91 | */ |
| 92 | # define TSAN_QUALIFIER volatile |
| 93 | # if defined(_M_ARM) || defined(_M_ARM64) |
| 94 | # define _InterlockedExchangeAdd _InterlockedExchangeAdd_nf |
| 95 | # pragma intrinsic(_InterlockedExchangeAdd_nf) |
| 96 | # pragma intrinsic(__iso_volatile_load32, __iso_volatile_store32) |
| 97 | # ifdef _WIN64 |
| 98 | # define _InterlockedExchangeAdd64 _InterlockedExchangeAdd64_nf |
| 99 | # pragma intrinsic(_InterlockedExchangeAdd64_nf) |
| 100 | # pragma intrinsic(__iso_volatile_load64, __iso_volatile_store64) |
| 101 | # define tsan_load(ptr) (sizeof(*(ptr)) == 8 ? __iso_volatile_load64(ptr) \ |
| 102 | : __iso_volatile_load32(ptr)) |
| 103 | # define tsan_store(ptr, val) (sizeof(*(ptr)) == 8 ? __iso_volatile_store64((ptr), (val)) \ |
| 104 | : __iso_volatile_store32((ptr), (val))) |
| 105 | # else |
| 106 | # define tsan_load(ptr) __iso_volatile_load32(ptr) |
| 107 | # define tsan_store(ptr, val) __iso_volatile_store32((ptr), (val)) |
| 108 | # endif |
| 109 | # else |
| 110 | # define tsan_load(ptr) (*(ptr)) |
| 111 | # define tsan_store(ptr, val) (*(ptr) = (val)) |
| 112 | # endif |
| 113 | # pragma intrinsic(_InterlockedExchangeAdd) |
| 114 | # ifdef _WIN64 |
| 115 | # pragma intrinsic(_InterlockedExchangeAdd64) |
| 116 | # define tsan_counter(ptr) (sizeof(*(ptr)) == 8 ? _InterlockedExchangeAdd64((ptr), 1) \ |
| 117 | : _InterlockedExchangeAdd((ptr), 1)) |
| 118 | # define tsan_decr(ptr) (sizeof(*(ptr)) == 8 ? _InterlockedExchangeAdd64((ptr), -1) \ |
| 119 | : _InterlockedExchangeAdd((ptr), -1)) |
| 120 | # else |
| 121 | # define tsan_counter(ptr) _InterlockedExchangeAdd((ptr), 1) |
| 122 | # define tsan_decr(ptr) _InterlockedExchangeAdd((ptr), -1) |
| 123 | # endif |
| 124 | # if !defined(_ISO_VOLATILE) |
| 125 | # define tsan_ld_acq(ptr) (*(ptr)) |
| 126 | # define tsan_st_rel(ptr, val) (*(ptr) = (val)) |
| 127 | # endif |
| 128 | |
| 129 | #endif |
| 130 | |
| 131 | #ifndef TSAN_QUALIFIER |
| 132 | |
| 133 | # define TSAN_QUALIFIER volatile |
| 134 | # define tsan_load(ptr) (*(ptr)) |
| 135 | # define tsan_store(ptr, val) (*(ptr) = (val)) |
| 136 | # define tsan_counter(ptr) ((*(ptr))++) |
| 137 | # define tsan_decr(ptr) ((*(ptr))--) |
| 138 | /* |
| 139 | * Lack of tsan_ld_acq and tsan_ld_rel means that compiler support is not |
| 140 | * sophisticated enough to support them. Code that relies on them should be |
| 141 | * protected with #ifdef tsan_ld_acq with locked fallback. |
| 142 | */ |
| 143 | |
| 144 | #endif |