| rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * mm/percpu-debug.c | 
|  | 3 | * | 
|  | 4 | * Copyright (C) 2017		Facebook Inc. | 
|  | 5 | * Copyright (C) 2017		Dennis Zhou <dennisz@fb.com> | 
|  | 6 | * | 
|  | 7 | * This file is released under the GPLv2. | 
|  | 8 | * | 
|  | 9 | * Prints statistics about the percpu allocator and backing chunks. | 
|  | 10 | */ | 
|  | 11 | #include <linux/debugfs.h> | 
|  | 12 | #include <linux/list.h> | 
|  | 13 | #include <linux/percpu.h> | 
|  | 14 | #include <linux/seq_file.h> | 
|  | 15 | #include <linux/sort.h> | 
|  | 16 | #include <linux/vmalloc.h> | 
|  | 17 |  | 
|  | 18 | #include "percpu-internal.h" | 
|  | 19 |  | 
|  | 20 | #define P(X, Y) \ | 
|  | 21 | seq_printf(m, "  %-20s: %12lld\n", X, (long long int)Y) | 
|  | 22 |  | 
|  | 23 | struct percpu_stats pcpu_stats; | 
|  | 24 | struct pcpu_alloc_info pcpu_stats_ai; | 
|  | 25 |  | 
|  | 26 | static int cmpint(const void *a, const void *b) | 
|  | 27 | { | 
|  | 28 | return *(int *)a - *(int *)b; | 
|  | 29 | } | 
|  | 30 |  | 
|  | 31 | /* | 
|  | 32 | * Iterates over all chunks to find the max nr_alloc entries. | 
|  | 33 | */ | 
|  | 34 | static int find_max_nr_alloc(void) | 
|  | 35 | { | 
|  | 36 | struct pcpu_chunk *chunk; | 
|  | 37 | int slot, max_nr_alloc; | 
|  | 38 |  | 
|  | 39 | max_nr_alloc = 0; | 
|  | 40 | for (slot = 0; slot < pcpu_nr_slots; slot++) | 
|  | 41 | list_for_each_entry(chunk, &pcpu_slot[slot], list) | 
|  | 42 | max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc); | 
|  | 43 |  | 
|  | 44 | return max_nr_alloc; | 
|  | 45 | } | 
|  | 46 |  | 
|  | 47 | /* | 
|  | 48 | * Prints out chunk state. Fragmentation is considered between | 
|  | 49 | * the beginning of the chunk to the last allocation. | 
|  | 50 | * | 
|  | 51 | * All statistics are in bytes unless stated otherwise. | 
|  | 52 | */ | 
|  | 53 | static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, | 
|  | 54 | int *buffer) | 
|  | 55 | { | 
|  | 56 | int i, last_alloc, as_len, start, end; | 
|  | 57 | int *alloc_sizes, *p; | 
|  | 58 | /* statistics */ | 
|  | 59 | int sum_frag = 0, max_frag = 0; | 
|  | 60 | int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0; | 
|  | 61 |  | 
|  | 62 | alloc_sizes = buffer; | 
|  | 63 |  | 
|  | 64 | /* | 
|  | 65 | * find_last_bit returns the start value if nothing found. | 
|  | 66 | * Therefore, we must determine if it is a failure of find_last_bit | 
|  | 67 | * and set the appropriate value. | 
|  | 68 | */ | 
|  | 69 | last_alloc = find_last_bit(chunk->alloc_map, | 
|  | 70 | pcpu_chunk_map_bits(chunk) - | 
|  | 71 | chunk->end_offset / PCPU_MIN_ALLOC_SIZE - 1); | 
|  | 72 | last_alloc = test_bit(last_alloc, chunk->alloc_map) ? | 
|  | 73 | last_alloc + 1 : 0; | 
|  | 74 |  | 
|  | 75 | as_len = 0; | 
|  | 76 | start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE; | 
|  | 77 |  | 
|  | 78 | /* | 
|  | 79 | * If a bit is set in the allocation map, the bound_map identifies | 
|  | 80 | * where the allocation ends.  If the allocation is not set, the | 
|  | 81 | * bound_map does not identify free areas as it is only kept accurate | 
|  | 82 | * on allocation, not free. | 
|  | 83 | * | 
|  | 84 | * Positive values are allocations and negative values are free | 
|  | 85 | * fragments. | 
|  | 86 | */ | 
|  | 87 | while (start < last_alloc) { | 
|  | 88 | if (test_bit(start, chunk->alloc_map)) { | 
|  | 89 | end = find_next_bit(chunk->bound_map, last_alloc, | 
|  | 90 | start + 1); | 
|  | 91 | alloc_sizes[as_len] = 1; | 
|  | 92 | } else { | 
|  | 93 | end = find_next_bit(chunk->alloc_map, last_alloc, | 
|  | 94 | start + 1); | 
|  | 95 | alloc_sizes[as_len] = -1; | 
|  | 96 | } | 
|  | 97 |  | 
|  | 98 | alloc_sizes[as_len++] *= (end - start) * PCPU_MIN_ALLOC_SIZE; | 
|  | 99 |  | 
|  | 100 | start = end; | 
|  | 101 | } | 
|  | 102 |  | 
|  | 103 | /* | 
|  | 104 | * The negative values are free fragments and thus sorting gives the | 
|  | 105 | * free fragments at the beginning in largest first order. | 
|  | 106 | */ | 
|  | 107 | if (as_len > 0) { | 
|  | 108 | sort(alloc_sizes, as_len, sizeof(int), cmpint, NULL); | 
|  | 109 |  | 
|  | 110 | /* iterate through the unallocated fragments */ | 
|  | 111 | for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) { | 
|  | 112 | sum_frag -= *p; | 
|  | 113 | max_frag = max(max_frag, -1 * (*p)); | 
|  | 114 | } | 
|  | 115 |  | 
|  | 116 | cur_min_alloc = alloc_sizes[i]; | 
|  | 117 | cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2]; | 
|  | 118 | cur_max_alloc = alloc_sizes[as_len - 1]; | 
|  | 119 | } | 
|  | 120 |  | 
|  | 121 | P("nr_alloc", chunk->nr_alloc); | 
|  | 122 | P("max_alloc_size", chunk->max_alloc_size); | 
|  | 123 | P("empty_pop_pages", chunk->nr_empty_pop_pages); | 
|  | 124 | P("first_bit", chunk->first_bit); | 
|  | 125 | P("free_bytes", chunk->free_bytes); | 
|  | 126 | P("contig_bytes", chunk->contig_bits * PCPU_MIN_ALLOC_SIZE); | 
|  | 127 | P("sum_frag", sum_frag); | 
|  | 128 | P("max_frag", max_frag); | 
|  | 129 | P("cur_min_alloc", cur_min_alloc); | 
|  | 130 | P("cur_med_alloc", cur_med_alloc); | 
|  | 131 | P("cur_max_alloc", cur_max_alloc); | 
|  | 132 | seq_putc(m, '\n'); | 
|  | 133 | } | 
|  | 134 |  | 
|  | 135 | static int percpu_stats_show(struct seq_file *m, void *v) | 
|  | 136 | { | 
|  | 137 | struct pcpu_chunk *chunk; | 
|  | 138 | int slot, max_nr_alloc; | 
|  | 139 | int *buffer; | 
|  | 140 |  | 
|  | 141 | alloc_buffer: | 
|  | 142 | spin_lock_irq(&pcpu_lock); | 
|  | 143 | max_nr_alloc = find_max_nr_alloc(); | 
|  | 144 | spin_unlock_irq(&pcpu_lock); | 
|  | 145 |  | 
|  | 146 | /* there can be at most this many free and allocated fragments */ | 
|  | 147 | buffer = vmalloc((2 * max_nr_alloc + 1) * sizeof(int)); | 
|  | 148 | if (!buffer) | 
|  | 149 | return -ENOMEM; | 
|  | 150 |  | 
|  | 151 | spin_lock_irq(&pcpu_lock); | 
|  | 152 |  | 
|  | 153 | /* if the buffer allocated earlier is too small */ | 
|  | 154 | if (max_nr_alloc < find_max_nr_alloc()) { | 
|  | 155 | spin_unlock_irq(&pcpu_lock); | 
|  | 156 | vfree(buffer); | 
|  | 157 | goto alloc_buffer; | 
|  | 158 | } | 
|  | 159 |  | 
|  | 160 | #define PL(X) \ | 
|  | 161 | seq_printf(m, "  %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X) | 
|  | 162 |  | 
|  | 163 | seq_printf(m, | 
|  | 164 | "Percpu Memory Statistics\n" | 
|  | 165 | "Allocation Info:\n" | 
|  | 166 | "----------------------------------------\n"); | 
|  | 167 | PL(unit_size); | 
|  | 168 | PL(static_size); | 
|  | 169 | PL(reserved_size); | 
|  | 170 | PL(dyn_size); | 
|  | 171 | PL(atom_size); | 
|  | 172 | PL(alloc_size); | 
|  | 173 | seq_putc(m, '\n'); | 
|  | 174 |  | 
|  | 175 | #undef PL | 
|  | 176 |  | 
|  | 177 | #define PU(X) \ | 
|  | 178 | seq_printf(m, "  %-20s: %12llu\n", #X, (unsigned long long)pcpu_stats.X) | 
|  | 179 |  | 
|  | 180 | seq_printf(m, | 
|  | 181 | "Global Stats:\n" | 
|  | 182 | "----------------------------------------\n"); | 
|  | 183 | PU(nr_alloc); | 
|  | 184 | PU(nr_dealloc); | 
|  | 185 | PU(nr_cur_alloc); | 
|  | 186 | PU(nr_max_alloc); | 
|  | 187 | PU(nr_chunks); | 
|  | 188 | PU(nr_max_chunks); | 
|  | 189 | PU(min_alloc_size); | 
|  | 190 | PU(max_alloc_size); | 
|  | 191 | P("empty_pop_pages", pcpu_nr_empty_pop_pages); | 
|  | 192 | seq_putc(m, '\n'); | 
|  | 193 |  | 
|  | 194 | #undef PU | 
|  | 195 |  | 
|  | 196 | seq_printf(m, | 
|  | 197 | "Per Chunk Stats:\n" | 
|  | 198 | "----------------------------------------\n"); | 
|  | 199 |  | 
|  | 200 | if (pcpu_reserved_chunk) { | 
|  | 201 | seq_puts(m, "Chunk: <- Reserved Chunk\n"); | 
|  | 202 | chunk_map_stats(m, pcpu_reserved_chunk, buffer); | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | for (slot = 0; slot < pcpu_nr_slots; slot++) { | 
|  | 206 | list_for_each_entry(chunk, &pcpu_slot[slot], list) { | 
|  | 207 | if (chunk == pcpu_first_chunk) { | 
|  | 208 | seq_puts(m, "Chunk: <- First Chunk\n"); | 
|  | 209 | chunk_map_stats(m, chunk, buffer); | 
|  | 210 |  | 
|  | 211 |  | 
|  | 212 | } else { | 
|  | 213 | seq_puts(m, "Chunk:\n"); | 
|  | 214 | chunk_map_stats(m, chunk, buffer); | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | } | 
|  | 218 | } | 
|  | 219 |  | 
|  | 220 | spin_unlock_irq(&pcpu_lock); | 
|  | 221 |  | 
|  | 222 | vfree(buffer); | 
|  | 223 |  | 
|  | 224 | return 0; | 
|  | 225 | } | 
|  | 226 |  | 
|  | 227 | static int percpu_stats_open(struct inode *inode, struct file *filp) | 
|  | 228 | { | 
|  | 229 | return single_open(filp, percpu_stats_show, NULL); | 
|  | 230 | } | 
|  | 231 |  | 
|  | 232 | static const struct file_operations percpu_stats_fops = { | 
|  | 233 | .open		= percpu_stats_open, | 
|  | 234 | .read		= seq_read, | 
|  | 235 | .llseek		= seq_lseek, | 
|  | 236 | .release	= single_release, | 
|  | 237 | }; | 
|  | 238 |  | 
|  | 239 | static int __init init_percpu_stats_debugfs(void) | 
|  | 240 | { | 
|  | 241 | debugfs_create_file("percpu_stats", 0444, NULL, NULL, | 
|  | 242 | &percpu_stats_fops); | 
|  | 243 |  | 
|  | 244 | return 0; | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | late_initcall(init_percpu_stats_debugfs); |