b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Shadow Call Stack support. |
| 4 | * |
| 5 | * Copyright (C) 2019 Google LLC |
| 6 | */ |
| 7 | |
| 8 | #include <linux/cpuhotplug.h> |
| 9 | #include <linux/kasan.h> |
| 10 | #include <linux/mm.h> |
| 11 | #include <linux/mmzone.h> |
| 12 | #include <linux/scs.h> |
| 13 | #include <linux/slab.h> |
| 14 | #include <linux/vmalloc.h> |
| 15 | #include <linux/vmstat.h> |
| 16 | #include <asm/scs.h> |
| 17 | |
| 18 | static inline void *__scs_base(struct task_struct *tsk) |
| 19 | { |
| 20 | /* |
| 21 | * To minimize risk the of exposure, architectures may clear a |
| 22 | * task's thread_info::shadow_call_stack while that task is |
| 23 | * running, and only save/restore the active shadow call stack |
| 24 | * pointer when the usual register may be clobbered (e.g. across |
| 25 | * context switches). |
| 26 | * |
| 27 | * The shadow call stack is aligned to SCS_SIZE, and grows |
| 28 | * upwards, so we can mask out the low bits to extract the base |
| 29 | * when the task is not running. |
| 30 | */ |
| 31 | return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1)); |
| 32 | } |
| 33 | |
| 34 | static inline unsigned long *scs_magic(void *s) |
| 35 | { |
| 36 | return (unsigned long *)(s + SCS_SIZE) - 1; |
| 37 | } |
| 38 | |
| 39 | static inline void scs_set_magic(void *s) |
| 40 | { |
| 41 | *scs_magic(s) = SCS_END_MAGIC; |
| 42 | } |
| 43 | |
| 44 | #ifdef CONFIG_SHADOW_CALL_STACK_VMAP |
| 45 | |
| 46 | /* Matches NR_CACHED_STACKS for VMAP_STACK */ |
| 47 | #define NR_CACHED_SCS 2 |
| 48 | static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]); |
| 49 | |
| 50 | static void *scs_alloc(int node) |
| 51 | { |
| 52 | int i; |
| 53 | void *s; |
| 54 | |
| 55 | for (i = 0; i < NR_CACHED_SCS; i++) { |
| 56 | s = this_cpu_xchg(scs_cache[i], NULL); |
| 57 | if (s) { |
| 58 | memset(s, 0, SCS_SIZE); |
| 59 | goto out; |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | /* |
| 64 | * We allocate a full page for the shadow stack, which should be |
| 65 | * more than we need. Check the assumption nevertheless. |
| 66 | */ |
| 67 | BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE); |
| 68 | |
| 69 | s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE, |
| 70 | VMALLOC_START, VMALLOC_END, |
| 71 | GFP_SCS, PAGE_KERNEL, 0, |
| 72 | node, __builtin_return_address(0)); |
| 73 | |
| 74 | out: |
| 75 | if (s) |
| 76 | scs_set_magic(s); |
| 77 | /* TODO: poison for KASAN, unpoison in scs_free */ |
| 78 | |
| 79 | return s; |
| 80 | } |
| 81 | |
| 82 | static void scs_free(void *s) |
| 83 | { |
| 84 | int i; |
| 85 | |
| 86 | for (i = 0; i < NR_CACHED_SCS; i++) |
| 87 | if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) |
| 88 | return; |
| 89 | |
| 90 | vfree_atomic(s); |
| 91 | } |
| 92 | |
| 93 | static struct page *__scs_page(struct task_struct *tsk) |
| 94 | { |
| 95 | return vmalloc_to_page(__scs_base(tsk)); |
| 96 | } |
| 97 | |
| 98 | static int scs_cleanup(unsigned int cpu) |
| 99 | { |
| 100 | int i; |
| 101 | void **cache = per_cpu_ptr(scs_cache, cpu); |
| 102 | |
| 103 | for (i = 0; i < NR_CACHED_SCS; i++) { |
| 104 | vfree(cache[i]); |
| 105 | cache[i] = NULL; |
| 106 | } |
| 107 | |
| 108 | return 0; |
| 109 | } |
| 110 | |
| 111 | void __init scs_init(void) |
| 112 | { |
| 113 | WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL, |
| 114 | scs_cleanup) < 0); |
| 115 | } |
| 116 | |
| 117 | #else /* !CONFIG_SHADOW_CALL_STACK_VMAP */ |
| 118 | |
| 119 | static struct kmem_cache *scs_cache; |
| 120 | |
| 121 | static inline void *scs_alloc(int node) |
| 122 | { |
| 123 | void *s; |
| 124 | |
| 125 | s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); |
| 126 | if (s) { |
| 127 | scs_set_magic(s); |
| 128 | /* |
| 129 | * Poison the allocation to catch unintentional accesses to |
| 130 | * the shadow stack when KASAN is enabled. |
| 131 | */ |
| 132 | kasan_poison_object_data(scs_cache, s); |
| 133 | } |
| 134 | |
| 135 | return s; |
| 136 | } |
| 137 | |
| 138 | static inline void scs_free(void *s) |
| 139 | { |
| 140 | kasan_unpoison_object_data(scs_cache, s); |
| 141 | kmem_cache_free(scs_cache, s); |
| 142 | } |
| 143 | |
| 144 | static struct page *__scs_page(struct task_struct *tsk) |
| 145 | { |
| 146 | return virt_to_page(__scs_base(tsk)); |
| 147 | } |
| 148 | |
| 149 | void __init scs_init(void) |
| 150 | { |
| 151 | scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE, |
| 152 | 0, NULL); |
| 153 | WARN_ON(!scs_cache); |
| 154 | } |
| 155 | |
| 156 | #endif /* CONFIG_SHADOW_CALL_STACK_VMAP */ |
| 157 | |
| 158 | void scs_task_reset(struct task_struct *tsk) |
| 159 | { |
| 160 | /* |
| 161 | * Reset the shadow stack to the base address in case the task |
| 162 | * is reused. |
| 163 | */ |
| 164 | task_set_scs(tsk, __scs_base(tsk)); |
| 165 | } |
| 166 | |
| 167 | static void scs_account(struct task_struct *tsk, int account) |
| 168 | { |
| 169 | mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES, |
| 170 | account * SCS_SIZE); |
| 171 | } |
| 172 | |
| 173 | int scs_prepare(struct task_struct *tsk, int node) |
| 174 | { |
| 175 | void *s; |
| 176 | |
| 177 | s = scs_alloc(node); |
| 178 | if (!s) |
| 179 | return -ENOMEM; |
| 180 | |
| 181 | task_set_scs(tsk, s); |
| 182 | scs_account(tsk, 1); |
| 183 | |
| 184 | return 0; |
| 185 | } |
| 186 | |
| 187 | #ifdef CONFIG_DEBUG_STACK_USAGE |
| 188 | static void scs_check_usage(struct task_struct *tsk) |
| 189 | { |
| 190 | static unsigned long highest; |
| 191 | |
| 192 | unsigned long *p = __scs_base(tsk); |
| 193 | unsigned long *end = scs_magic(p); |
| 194 | unsigned long prev, curr = highest, used = 0; |
| 195 | |
| 196 | for (; p < end; ++p) { |
| 197 | if (!READ_ONCE_NOCHECK(*p)) |
| 198 | break; |
| 199 | used += sizeof(*p); |
| 200 | } |
| 201 | |
| 202 | while (used > curr) { |
| 203 | prev = cmpxchg_relaxed(&highest, curr, used); |
| 204 | |
| 205 | if (prev == curr) { |
| 206 | pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", |
| 207 | tsk->comm, task_pid_nr(tsk), used); |
| 208 | break; |
| 209 | } |
| 210 | |
| 211 | curr = prev; |
| 212 | } |
| 213 | } |
| 214 | #else |
| 215 | static inline void scs_check_usage(struct task_struct *tsk) |
| 216 | { |
| 217 | } |
| 218 | #endif |
| 219 | |
| 220 | bool scs_corrupted(struct task_struct *tsk) |
| 221 | { |
| 222 | unsigned long *magic = scs_magic(__scs_base(tsk)); |
| 223 | |
| 224 | return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; |
| 225 | } |
| 226 | |
| 227 | void scs_release(struct task_struct *tsk) |
| 228 | { |
| 229 | void *s; |
| 230 | |
| 231 | s = __scs_base(tsk); |
| 232 | if (!s) |
| 233 | return; |
| 234 | |
| 235 | WARN_ON(scs_corrupted(tsk)); |
| 236 | scs_check_usage(tsk); |
| 237 | |
| 238 | scs_account(tsk, -1); |
| 239 | task_set_scs(tsk, NULL); |
| 240 | scs_free(s); |
| 241 | } |