b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | // Copyright (c) 2019 Facebook |
| 3 | #include <linux/sched.h> |
| 4 | #include <linux/ptrace.h> |
| 5 | #include <stdint.h> |
| 6 | #include <stddef.h> |
| 7 | #include <stdbool.h> |
| 8 | #include <linux/bpf.h> |
| 9 | #include "bpf_helpers.h" |
| 10 | |
| 11 | #define FUNCTION_NAME_LEN 64 |
| 12 | #define FILE_NAME_LEN 128 |
| 13 | #define TASK_COMM_LEN 16 |
| 14 | |
| 15 | typedef struct { |
| 16 | int PyThreadState_frame; |
| 17 | int PyThreadState_thread; |
| 18 | int PyFrameObject_back; |
| 19 | int PyFrameObject_code; |
| 20 | int PyFrameObject_lineno; |
| 21 | int PyCodeObject_filename; |
| 22 | int PyCodeObject_name; |
| 23 | int String_data; |
| 24 | int String_size; |
| 25 | } OffsetConfig; |
| 26 | |
| 27 | typedef struct { |
| 28 | uintptr_t current_state_addr; |
| 29 | uintptr_t tls_key_addr; |
| 30 | OffsetConfig offsets; |
| 31 | bool use_tls; |
| 32 | } PidData; |
| 33 | |
| 34 | typedef struct { |
| 35 | uint32_t success; |
| 36 | } Stats; |
| 37 | |
| 38 | typedef struct { |
| 39 | char name[FUNCTION_NAME_LEN]; |
| 40 | char file[FILE_NAME_LEN]; |
| 41 | } Symbol; |
| 42 | |
| 43 | typedef struct { |
| 44 | uint32_t pid; |
| 45 | uint32_t tid; |
| 46 | char comm[TASK_COMM_LEN]; |
| 47 | int32_t kernel_stack_id; |
| 48 | int32_t user_stack_id; |
| 49 | bool thread_current; |
| 50 | bool pthread_match; |
| 51 | bool stack_complete; |
| 52 | int16_t stack_len; |
| 53 | int32_t stack[STACK_MAX_LEN]; |
| 54 | |
| 55 | int has_meta; |
| 56 | int metadata; |
| 57 | char dummy_safeguard; |
| 58 | } Event; |
| 59 | |
| 60 | |
| 61 | typedef int pid_t; |
| 62 | |
| 63 | typedef struct { |
| 64 | void* f_back; // PyFrameObject.f_back, previous frame |
| 65 | void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject |
| 66 | void* co_filename; // PyCodeObject.co_filename |
| 67 | void* co_name; // PyCodeObject.co_name |
| 68 | } FrameData; |
| 69 | |
| 70 | static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) |
| 71 | { |
| 72 | void* thread_state; |
| 73 | int key; |
| 74 | |
| 75 | bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); |
| 76 | bpf_probe_read(&thread_state, sizeof(thread_state), |
| 77 | tls_base + 0x310 + key * 0x10 + 0x08); |
| 78 | return thread_state; |
| 79 | } |
| 80 | |
| 81 | static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, |
| 82 | FrameData *frame, Symbol *symbol) |
| 83 | { |
| 84 | // read data from PyFrameObject |
| 85 | bpf_probe_read(&frame->f_back, |
| 86 | sizeof(frame->f_back), |
| 87 | frame_ptr + pidData->offsets.PyFrameObject_back); |
| 88 | bpf_probe_read(&frame->f_code, |
| 89 | sizeof(frame->f_code), |
| 90 | frame_ptr + pidData->offsets.PyFrameObject_code); |
| 91 | |
| 92 | // read data from PyCodeObject |
| 93 | if (!frame->f_code) |
| 94 | return false; |
| 95 | bpf_probe_read(&frame->co_filename, |
| 96 | sizeof(frame->co_filename), |
| 97 | frame->f_code + pidData->offsets.PyCodeObject_filename); |
| 98 | bpf_probe_read(&frame->co_name, |
| 99 | sizeof(frame->co_name), |
| 100 | frame->f_code + pidData->offsets.PyCodeObject_name); |
| 101 | // read actual names into symbol |
| 102 | if (frame->co_filename) |
| 103 | bpf_probe_read_str(&symbol->file, |
| 104 | sizeof(symbol->file), |
| 105 | frame->co_filename + pidData->offsets.String_data); |
| 106 | if (frame->co_name) |
| 107 | bpf_probe_read_str(&symbol->name, |
| 108 | sizeof(symbol->name), |
| 109 | frame->co_name + pidData->offsets.String_data); |
| 110 | return true; |
| 111 | } |
| 112 | |
| 113 | struct { |
| 114 | __uint(type, BPF_MAP_TYPE_HASH); |
| 115 | __uint(max_entries, 1); |
| 116 | __type(key, int); |
| 117 | __type(value, PidData); |
| 118 | } pidmap SEC(".maps"); |
| 119 | |
| 120 | struct { |
| 121 | __uint(type, BPF_MAP_TYPE_HASH); |
| 122 | __uint(max_entries, 1); |
| 123 | __type(key, int); |
| 124 | __type(value, Event); |
| 125 | } eventmap SEC(".maps"); |
| 126 | |
| 127 | struct { |
| 128 | __uint(type, BPF_MAP_TYPE_HASH); |
| 129 | __uint(max_entries, 1); |
| 130 | __type(key, Symbol); |
| 131 | __type(value, int); |
| 132 | } symbolmap SEC(".maps"); |
| 133 | |
| 134 | struct { |
| 135 | __uint(type, BPF_MAP_TYPE_ARRAY); |
| 136 | __uint(max_entries, 1); |
| 137 | __type(key, int); |
| 138 | __type(value, Stats); |
| 139 | } statsmap SEC(".maps"); |
| 140 | |
| 141 | struct { |
| 142 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); |
| 143 | __uint(max_entries, 32); |
| 144 | __uint(key_size, sizeof(int)); |
| 145 | __uint(value_size, sizeof(int)); |
| 146 | } perfmap SEC(".maps"); |
| 147 | |
| 148 | struct { |
| 149 | __uint(type, BPF_MAP_TYPE_STACK_TRACE); |
| 150 | __uint(max_entries, 1000); |
| 151 | __uint(key_size, sizeof(int)); |
| 152 | __uint(value_size, sizeof(long long) * 127); |
| 153 | } stackmap SEC(".maps"); |
| 154 | |
| 155 | static __always_inline int __on_event(struct pt_regs *ctx) |
| 156 | { |
| 157 | uint64_t pid_tgid = bpf_get_current_pid_tgid(); |
| 158 | pid_t pid = (pid_t)(pid_tgid >> 32); |
| 159 | PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); |
| 160 | if (!pidData) |
| 161 | return 0; |
| 162 | |
| 163 | int zero = 0; |
| 164 | Event* event = bpf_map_lookup_elem(&eventmap, &zero); |
| 165 | if (!event) |
| 166 | return 0; |
| 167 | |
| 168 | event->pid = pid; |
| 169 | |
| 170 | event->tid = (pid_t)pid_tgid; |
| 171 | bpf_get_current_comm(&event->comm, sizeof(event->comm)); |
| 172 | |
| 173 | event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); |
| 174 | event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); |
| 175 | |
| 176 | void* thread_state_current = (void*)0; |
| 177 | bpf_probe_read(&thread_state_current, |
| 178 | sizeof(thread_state_current), |
| 179 | (void*)(long)pidData->current_state_addr); |
| 180 | |
| 181 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
| 182 | void* tls_base = (void*)task; |
| 183 | |
| 184 | void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) |
| 185 | : thread_state_current; |
| 186 | event->thread_current = thread_state == thread_state_current; |
| 187 | |
| 188 | if (pidData->use_tls) { |
| 189 | uint64_t pthread_created; |
| 190 | uint64_t pthread_self; |
| 191 | bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10); |
| 192 | |
| 193 | bpf_probe_read(&pthread_created, |
| 194 | sizeof(pthread_created), |
| 195 | thread_state + pidData->offsets.PyThreadState_thread); |
| 196 | event->pthread_match = pthread_created == pthread_self; |
| 197 | } else { |
| 198 | event->pthread_match = 1; |
| 199 | } |
| 200 | |
| 201 | if (event->pthread_match || !pidData->use_tls) { |
| 202 | void* frame_ptr; |
| 203 | FrameData frame; |
| 204 | Symbol sym = {}; |
| 205 | int cur_cpu = bpf_get_smp_processor_id(); |
| 206 | |
| 207 | bpf_probe_read(&frame_ptr, |
| 208 | sizeof(frame_ptr), |
| 209 | thread_state + pidData->offsets.PyThreadState_frame); |
| 210 | |
| 211 | int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); |
| 212 | if (symbol_counter == NULL) |
| 213 | return 0; |
| 214 | #ifdef NO_UNROLL |
| 215 | #pragma clang loop unroll(disable) |
| 216 | #else |
| 217 | #pragma clang loop unroll(full) |
| 218 | #endif |
| 219 | /* Unwind python stack */ |
| 220 | for (int i = 0; i < STACK_MAX_LEN; ++i) { |
| 221 | if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { |
| 222 | int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; |
| 223 | int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); |
| 224 | if (!symbol_id) { |
| 225 | bpf_map_update_elem(&symbolmap, &sym, &zero, 0); |
| 226 | symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); |
| 227 | if (!symbol_id) |
| 228 | return 0; |
| 229 | } |
| 230 | if (*symbol_id == new_symbol_id) |
| 231 | (*symbol_counter)++; |
| 232 | event->stack[i] = *symbol_id; |
| 233 | event->stack_len = i + 1; |
| 234 | frame_ptr = frame.f_back; |
| 235 | } |
| 236 | } |
| 237 | event->stack_complete = frame_ptr == NULL; |
| 238 | } else { |
| 239 | event->stack_complete = 1; |
| 240 | } |
| 241 | |
| 242 | Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); |
| 243 | if (stats) |
| 244 | stats->success++; |
| 245 | |
| 246 | event->has_meta = 0; |
| 247 | bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); |
| 248 | return 0; |
| 249 | } |
| 250 | |
| 251 | SEC("raw_tracepoint/kfree_skb") |
| 252 | int on_event(struct pt_regs* ctx) |
| 253 | { |
| 254 | int i, ret = 0; |
| 255 | ret |= __on_event(ctx); |
| 256 | ret |= __on_event(ctx); |
| 257 | ret |= __on_event(ctx); |
| 258 | ret |= __on_event(ctx); |
| 259 | ret |= __on_event(ctx); |
| 260 | return ret; |
| 261 | } |
| 262 | |
| 263 | char _license[] SEC("license") = "GPL"; |