blob: 79b0e5796f3734dafd963fc87240f2c5011aef5f [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * sysctl.c: General linux system control interface
4 *
5 * Begun 24 March 1995, Stephen Tweedie
6 * Added /proc support, Dec 1995
7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10 * Dynamic registration fixes, Stephen Tweedie.
11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 * Horn.
14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 * Wendling.
18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
20 */
21
22#include <linux/module.h>
23#include <linux/aio.h>
24#include <linux/mm.h>
25#include <linux/swap.h>
26#include <linux/slab.h>
27#include <linux/sysctl.h>
28#include <linux/bitmap.h>
29#include <linux/signal.h>
30#include <linux/printk.h>
31#include <linux/proc_fs.h>
32#include <linux/security.h>
33#include <linux/ctype.h>
34#include <linux/kmemleak.h>
35#include <linux/fs.h>
36#include <linux/init.h>
37#include <linux/kernel.h>
38#include <linux/kobject.h>
39#include <linux/net.h>
40#include <linux/sysrq.h>
41#include <linux/highuid.h>
42#include <linux/writeback.h>
43#include <linux/ratelimit.h>
44#include <linux/compaction.h>
45#include <linux/hugetlb.h>
46#include <linux/initrd.h>
47#include <linux/key.h>
48#include <linux/times.h>
49#include <linux/limits.h>
50#include <linux/dcache.h>
51#include <linux/dnotify.h>
52#include <linux/syscalls.h>
53#include <linux/vmstat.h>
54#include <linux/nfs_fs.h>
55#include <linux/acpi.h>
56#include <linux/reboot.h>
57#include <linux/ftrace.h>
58#include <linux/perf_event.h>
59#include <linux/kprobes.h>
60#include <linux/pipe_fs_i.h>
61#include <linux/oom.h>
62#include <linux/kmod.h>
63#include <linux/capability.h>
64#include <linux/binfmts.h>
65#include <linux/sched/sysctl.h>
66#include <linux/sched/coredump.h>
67#include <linux/kexec.h>
68#include <linux/bpf.h>
69#include <linux/mount.h>
70#include <linux/userfaultfd_k.h>
71
72#include "../lib/kstrtox.h"
73
74#include <linux/uaccess.h>
75#include <asm/processor.h>
76
77#ifdef CONFIG_X86
78#include <asm/nmi.h>
79#include <asm/stacktrace.h>
80#include <asm/io.h>
81#endif
82#ifdef CONFIG_SPARC
83#include <asm/setup.h>
84#endif
85#ifdef CONFIG_BSD_PROCESS_ACCT
86#include <linux/acct.h>
87#endif
88#ifdef CONFIG_RT_MUTEXES
89#include <linux/rtmutex.h>
90#endif
91#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
92#include <linux/lockdep.h>
93#endif
94#ifdef CONFIG_CHR_DEV_SG
95#include <scsi/sg.h>
96#endif
97#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
98#include <linux/stackleak.h>
99#endif
100#ifdef CONFIG_LOCKUP_DETECTOR
101#include <linux/nmi.h>
102#endif
103
104#if defined(CONFIG_SYSCTL)
105
106/* External variables not in a header file. */
107extern int suid_dumpable;
108#ifdef CONFIG_COREDUMP
109extern int core_uses_pid;
110extern char core_pattern[];
111extern unsigned int core_pipe_limit;
112#endif
113extern int pid_max;
114extern int extra_free_kbytes;
115extern int pid_max_min, pid_max_max;
116extern int percpu_pagelist_fraction;
117extern int latencytop_enabled;
118extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
119#ifndef CONFIG_MMU
120extern int sysctl_nr_trim_pages;
121#endif
122
123/* Constants used for minimum and maximum */
124#ifdef CONFIG_LOCKUP_DETECTOR
125static int sixty = 60;
126#endif
127
128static int __maybe_unused neg_one = -1;
129static int __maybe_unused two = 2;
130static int __maybe_unused four = 4;
131static unsigned long zero_ul;
132static unsigned long one_ul = 1;
133static unsigned long long_max = LONG_MAX;
134static int one_hundred = 100;
135static int one_thousand = 1000;
136#ifdef CONFIG_PRINTK
137static int ten_thousand = 10000;
138#endif
139#ifdef CONFIG_PERF_EVENTS
140static int six_hundred_forty_kb = 640 * 1024;
141#endif
142
143/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
144static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
145
146/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
147static int maxolduid = 65535;
148static int minolduid;
149
150static int ngroups_max = NGROUPS_MAX;
151static const int cap_last_cap = CAP_LAST_CAP;
152
153/*
154 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
155 * and hung_task_check_interval_secs
156 */
157#ifdef CONFIG_DETECT_HUNG_TASK
158static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
159#endif
160
161#ifdef CONFIG_INOTIFY_USER
162#include <linux/inotify.h>
163#endif
164#ifdef CONFIG_SPARC
165#endif
166
167#ifdef CONFIG_PARISC
168extern int pwrsw_enabled;
169#endif
170
171#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
172extern int unaligned_enabled;
173#endif
174
175#ifdef CONFIG_IA64
176extern int unaligned_dump_stack;
177#endif
178
179#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
180extern int no_unaligned_warning;
181#endif
182
183#ifdef CONFIG_PROC_SYSCTL
184
185/**
186 * enum sysctl_writes_mode - supported sysctl write modes
187 *
188 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
189 * to be written, and multiple writes on the same sysctl file descriptor
190 * will rewrite the sysctl value, regardless of file position. No warning
191 * is issued when the initial position is not 0.
192 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
193 * not 0.
194 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
195 * file position 0 and the value must be fully contained in the buffer
196 * sent to the write syscall. If dealing with strings respect the file
197 * position, but restrict this to the max length of the buffer, anything
198 * passed the max length will be ignored. Multiple writes will append
199 * to the buffer.
200 *
201 * These write modes control how current file position affects the behavior of
202 * updating sysctl values through the proc interface on each write.
203 */
204enum sysctl_writes_mode {
205 SYSCTL_WRITES_LEGACY = -1,
206 SYSCTL_WRITES_WARN = 0,
207 SYSCTL_WRITES_STRICT = 1,
208};
209
210static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
211
212static int proc_do_cad_pid(struct ctl_table *table, int write,
213 void __user *buffer, size_t *lenp, loff_t *ppos);
214static int proc_taint(struct ctl_table *table, int write,
215 void __user *buffer, size_t *lenp, loff_t *ppos);
216#endif
217
218#ifdef CONFIG_PRINTK
219static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
220 void __user *buffer, size_t *lenp, loff_t *ppos);
221#endif
222
223static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
224 void __user *buffer, size_t *lenp, loff_t *ppos);
225#ifdef CONFIG_COREDUMP
226static int proc_dostring_coredump(struct ctl_table *table, int write,
227 void __user *buffer, size_t *lenp, loff_t *ppos);
228#endif
229static int proc_dopipe_max_size(struct ctl_table *table, int write,
230 void __user *buffer, size_t *lenp, loff_t *ppos);
231
232#ifdef CONFIG_MAGIC_SYSRQ
233/* Note: sysrq code uses its own private copy */
234static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
235
236static int sysrq_sysctl_handler(struct ctl_table *table, int write,
237 void __user *buffer, size_t *lenp,
238 loff_t *ppos)
239{
240 int error;
241
242 error = proc_dointvec(table, write, buffer, lenp, ppos);
243 if (error)
244 return error;
245
246 if (write)
247 sysrq_toggle_support(__sysrq_enabled);
248
249 return 0;
250}
251
252#endif
253
254#ifdef CONFIG_BPF_SYSCALL
255
256void __weak unpriv_ebpf_notify(int new_state)
257{
258}
259
260static int bpf_unpriv_handler(struct ctl_table *table, int write,
261 void *buffer, size_t *lenp, loff_t *ppos)
262{
263 int ret, unpriv_enable = *(int *)table->data;
264 bool locked_state = unpriv_enable == 1;
265 struct ctl_table tmp = *table;
266
267 if (write && !capable(CAP_SYS_ADMIN))
268 return -EPERM;
269
270 tmp.data = &unpriv_enable;
271 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
272 if (write && !ret) {
273 if (locked_state && unpriv_enable != 1)
274 return -EPERM;
275 *(int *)table->data = unpriv_enable;
276 }
277
278 unpriv_ebpf_notify(unpriv_enable);
279
280 return ret;
281}
282#endif
283
284static struct ctl_table kern_table[];
285static struct ctl_table vm_table[];
286static struct ctl_table fs_table[];
287static struct ctl_table debug_table[];
288static struct ctl_table dev_table[];
289extern struct ctl_table random_table[];
290#ifdef CONFIG_EPOLL
291extern struct ctl_table epoll_table[];
292#endif
293
294#ifdef CONFIG_FW_LOADER_USER_HELPER
295extern struct ctl_table firmware_config_table[];
296#endif
297
298#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
299 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
300int sysctl_legacy_va_layout;
301#endif
302
303/* The default sysctl tables: */
304
305static struct ctl_table sysctl_base_table[] = {
306 {
307 .procname = "kernel",
308 .mode = 0555,
309 .child = kern_table,
310 },
311 {
312 .procname = "vm",
313 .mode = 0555,
314 .child = vm_table,
315 },
316 {
317 .procname = "fs",
318 .mode = 0555,
319 .child = fs_table,
320 },
321 {
322 .procname = "debug",
323 .mode = 0555,
324 .child = debug_table,
325 },
326 {
327 .procname = "dev",
328 .mode = 0555,
329 .child = dev_table,
330 },
331 { }
332};
333
334#ifdef CONFIG_SCHED_DEBUG
335static int min_sched_granularity_ns = 100000; /* 100 usecs */
336static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
337static int min_wakeup_granularity_ns; /* 0 usecs */
338static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
339#ifdef CONFIG_SMP
340static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
341static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
342#endif /* CONFIG_SMP */
343#endif /* CONFIG_SCHED_DEBUG */
344
345#ifdef CONFIG_COMPACTION
346static int min_extfrag_threshold;
347static int max_extfrag_threshold = 1000;
348#endif
349
350static struct ctl_table kern_table[] = {
351 {
352 .procname = "sched_child_runs_first",
353 .data = &sysctl_sched_child_runs_first,
354 .maxlen = sizeof(unsigned int),
355 .mode = 0644,
356 .proc_handler = proc_dointvec,
357 },
358#ifdef CONFIG_SCHED_DEBUG
359 {
360 .procname = "sched_min_granularity_ns",
361 .data = &sysctl_sched_min_granularity,
362 .maxlen = sizeof(unsigned int),
363 .mode = 0644,
364 .proc_handler = sched_proc_update_handler,
365 .extra1 = &min_sched_granularity_ns,
366 .extra2 = &max_sched_granularity_ns,
367 },
368 {
369 .procname = "sched_latency_ns",
370 .data = &sysctl_sched_latency,
371 .maxlen = sizeof(unsigned int),
372 .mode = 0644,
373 .proc_handler = sched_proc_update_handler,
374 .extra1 = &min_sched_granularity_ns,
375 .extra2 = &max_sched_granularity_ns,
376 },
377 {
378 .procname = "sched_wakeup_granularity_ns",
379 .data = &sysctl_sched_wakeup_granularity,
380 .maxlen = sizeof(unsigned int),
381 .mode = 0644,
382 .proc_handler = sched_proc_update_handler,
383 .extra1 = &min_wakeup_granularity_ns,
384 .extra2 = &max_wakeup_granularity_ns,
385 },
386#ifdef CONFIG_SMP
387 {
388 .procname = "sched_tunable_scaling",
389 .data = &sysctl_sched_tunable_scaling,
390 .maxlen = sizeof(enum sched_tunable_scaling),
391 .mode = 0644,
392 .proc_handler = sched_proc_update_handler,
393 .extra1 = &min_sched_tunable_scaling,
394 .extra2 = &max_sched_tunable_scaling,
395 },
396 {
397 .procname = "sched_migration_cost_ns",
398 .data = &sysctl_sched_migration_cost,
399 .maxlen = sizeof(unsigned int),
400 .mode = 0644,
401 .proc_handler = proc_dointvec,
402 },
403 {
404 .procname = "sched_nr_migrate",
405 .data = &sysctl_sched_nr_migrate,
406 .maxlen = sizeof(unsigned int),
407 .mode = 0644,
408 .proc_handler = proc_dointvec,
409 },
410#ifdef CONFIG_SCHEDSTATS
411 {
412 .procname = "sched_schedstats",
413 .data = NULL,
414 .maxlen = sizeof(unsigned int),
415 .mode = 0644,
416 .proc_handler = sysctl_schedstats,
417 .extra1 = SYSCTL_ZERO,
418 .extra2 = SYSCTL_ONE,
419 },
420#endif /* CONFIG_SCHEDSTATS */
421#endif /* CONFIG_SMP */
422#ifdef CONFIG_NUMA_BALANCING
423 {
424 .procname = "numa_balancing_scan_delay_ms",
425 .data = &sysctl_numa_balancing_scan_delay,
426 .maxlen = sizeof(unsigned int),
427 .mode = 0644,
428 .proc_handler = proc_dointvec,
429 },
430 {
431 .procname = "numa_balancing_scan_period_min_ms",
432 .data = &sysctl_numa_balancing_scan_period_min,
433 .maxlen = sizeof(unsigned int),
434 .mode = 0644,
435 .proc_handler = proc_dointvec,
436 },
437 {
438 .procname = "numa_balancing_scan_period_max_ms",
439 .data = &sysctl_numa_balancing_scan_period_max,
440 .maxlen = sizeof(unsigned int),
441 .mode = 0644,
442 .proc_handler = proc_dointvec,
443 },
444 {
445 .procname = "numa_balancing_scan_size_mb",
446 .data = &sysctl_numa_balancing_scan_size,
447 .maxlen = sizeof(unsigned int),
448 .mode = 0644,
449 .proc_handler = proc_dointvec_minmax,
450 .extra1 = SYSCTL_ONE,
451 },
452 {
453 .procname = "numa_balancing",
454 .data = NULL, /* filled in by handler */
455 .maxlen = sizeof(unsigned int),
456 .mode = 0644,
457 .proc_handler = sysctl_numa_balancing,
458 .extra1 = SYSCTL_ZERO,
459 .extra2 = SYSCTL_ONE,
460 },
461#endif /* CONFIG_NUMA_BALANCING */
462#endif /* CONFIG_SCHED_DEBUG */
463 {
464 .procname = "sched_rt_period_us",
465 .data = &sysctl_sched_rt_period,
466 .maxlen = sizeof(unsigned int),
467 .mode = 0644,
468 .proc_handler = sched_rt_handler,
469 .extra1 = SYSCTL_ONE,
470 .extra2 = SYSCTL_INT_MAX,
471 },
472 {
473 .procname = "sched_rt_runtime_us",
474 .data = &sysctl_sched_rt_runtime,
475 .maxlen = sizeof(int),
476 .mode = 0644,
477 .proc_handler = sched_rt_handler,
478 .extra1 = &neg_one,
479 .extra2 = SYSCTL_INT_MAX,
480 },
481 {
482 .procname = "sched_rr_timeslice_ms",
483 .data = &sysctl_sched_rr_timeslice,
484 .maxlen = sizeof(int),
485 .mode = 0644,
486 .proc_handler = sched_rr_handler,
487 },
488#ifdef CONFIG_UCLAMP_TASK
489 {
490 .procname = "sched_util_clamp_min",
491 .data = &sysctl_sched_uclamp_util_min,
492 .maxlen = sizeof(unsigned int),
493 .mode = 0644,
494 .proc_handler = sysctl_sched_uclamp_handler,
495 },
496 {
497 .procname = "sched_util_clamp_max",
498 .data = &sysctl_sched_uclamp_util_max,
499 .maxlen = sizeof(unsigned int),
500 .mode = 0644,
501 .proc_handler = sysctl_sched_uclamp_handler,
502 },
503 {
504 .procname = "sched_util_clamp_min_rt_default",
505 .data = &sysctl_sched_uclamp_util_min_rt_default,
506 .maxlen = sizeof(unsigned int),
507 .mode = 0644,
508 .proc_handler = sysctl_sched_uclamp_handler,
509 },
510#endif
511#ifdef CONFIG_SCHED_AUTOGROUP
512 {
513 .procname = "sched_autogroup_enabled",
514 .data = &sysctl_sched_autogroup_enabled,
515 .maxlen = sizeof(unsigned int),
516 .mode = 0644,
517 .proc_handler = proc_dointvec_minmax,
518 .extra1 = SYSCTL_ZERO,
519 .extra2 = SYSCTL_ONE,
520 },
521#endif
522#ifdef CONFIG_CFS_BANDWIDTH
523 {
524 .procname = "sched_cfs_bandwidth_slice_us",
525 .data = &sysctl_sched_cfs_bandwidth_slice,
526 .maxlen = sizeof(unsigned int),
527 .mode = 0644,
528 .proc_handler = proc_dointvec_minmax,
529 .extra1 = SYSCTL_ONE,
530 },
531#endif
532#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
533 {
534 .procname = "sched_energy_aware",
535 .data = &sysctl_sched_energy_aware,
536 .maxlen = sizeof(unsigned int),
537 .mode = 0644,
538 .proc_handler = sched_energy_aware_handler,
539 .extra1 = SYSCTL_ZERO,
540 .extra2 = SYSCTL_ONE,
541 },
542#endif
543#ifdef CONFIG_PROVE_LOCKING
544 {
545 .procname = "prove_locking",
546 .data = &prove_locking,
547 .maxlen = sizeof(int),
548 .mode = 0644,
549 .proc_handler = proc_dointvec,
550 },
551#endif
552#ifdef CONFIG_LOCK_STAT
553 {
554 .procname = "lock_stat",
555 .data = &lock_stat,
556 .maxlen = sizeof(int),
557 .mode = 0644,
558 .proc_handler = proc_dointvec,
559 },
560#endif
561 {
562 .procname = "panic",
563 .data = &panic_timeout,
564 .maxlen = sizeof(int),
565 .mode = 0644,
566 .proc_handler = proc_dointvec,
567 },
568#ifdef CONFIG_COREDUMP
569 {
570 .procname = "core_uses_pid",
571 .data = &core_uses_pid,
572 .maxlen = sizeof(int),
573 .mode = 0644,
574 .proc_handler = proc_dointvec,
575 },
576 {
577 .procname = "core_pattern",
578 .data = core_pattern,
579 .maxlen = CORENAME_MAX_SIZE,
580 .mode = 0644,
581 .proc_handler = proc_dostring_coredump,
582 },
583 {
584 .procname = "core_pipe_limit",
585 .data = &core_pipe_limit,
586 .maxlen = sizeof(unsigned int),
587 .mode = 0644,
588 .proc_handler = proc_dointvec,
589 },
590#endif
591#ifdef CONFIG_PROC_SYSCTL
592 {
593 .procname = "tainted",
594 .maxlen = sizeof(long),
595 .mode = 0644,
596 .proc_handler = proc_taint,
597 },
598 {
599 .procname = "sysctl_writes_strict",
600 .data = &sysctl_writes_strict,
601 .maxlen = sizeof(int),
602 .mode = 0644,
603 .proc_handler = proc_dointvec_minmax,
604 .extra1 = &neg_one,
605 .extra2 = SYSCTL_ONE,
606 },
607#endif
608#ifdef CONFIG_LATENCYTOP
609 {
610 .procname = "latencytop",
611 .data = &latencytop_enabled,
612 .maxlen = sizeof(int),
613 .mode = 0644,
614 .proc_handler = sysctl_latencytop,
615 },
616#endif
617#ifdef CONFIG_BLK_DEV_INITRD
618 {
619 .procname = "real-root-dev",
620 .data = &real_root_dev,
621 .maxlen = sizeof(int),
622 .mode = 0644,
623 .proc_handler = proc_dointvec,
624 },
625#endif
626 {
627 .procname = "print-fatal-signals",
628 .data = &print_fatal_signals,
629 .maxlen = sizeof(int),
630 .mode = 0644,
631 .proc_handler = proc_dointvec,
632 },
633#ifdef CONFIG_SPARC
634 {
635 .procname = "reboot-cmd",
636 .data = reboot_command,
637 .maxlen = 256,
638 .mode = 0644,
639 .proc_handler = proc_dostring,
640 },
641 {
642 .procname = "stop-a",
643 .data = &stop_a_enabled,
644 .maxlen = sizeof (int),
645 .mode = 0644,
646 .proc_handler = proc_dointvec,
647 },
648 {
649 .procname = "scons-poweroff",
650 .data = &scons_pwroff,
651 .maxlen = sizeof (int),
652 .mode = 0644,
653 .proc_handler = proc_dointvec,
654 },
655#endif
656#ifdef CONFIG_SPARC64
657 {
658 .procname = "tsb-ratio",
659 .data = &sysctl_tsb_ratio,
660 .maxlen = sizeof (int),
661 .mode = 0644,
662 .proc_handler = proc_dointvec,
663 },
664#endif
665#ifdef CONFIG_PARISC
666 {
667 .procname = "soft-power",
668 .data = &pwrsw_enabled,
669 .maxlen = sizeof (int),
670 .mode = 0644,
671 .proc_handler = proc_dointvec,
672 },
673#endif
674#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
675 {
676 .procname = "unaligned-trap",
677 .data = &unaligned_enabled,
678 .maxlen = sizeof (int),
679 .mode = 0644,
680 .proc_handler = proc_dointvec,
681 },
682#endif
683 {
684 .procname = "ctrl-alt-del",
685 .data = &C_A_D,
686 .maxlen = sizeof(int),
687 .mode = 0644,
688 .proc_handler = proc_dointvec,
689 },
690#ifdef CONFIG_FUNCTION_TRACER
691 {
692 .procname = "ftrace_enabled",
693 .data = &ftrace_enabled,
694 .maxlen = sizeof(int),
695 .mode = 0644,
696 .proc_handler = ftrace_enable_sysctl,
697 },
698#endif
699#ifdef CONFIG_STACK_TRACER
700 {
701 .procname = "stack_tracer_enabled",
702 .data = &stack_tracer_enabled,
703 .maxlen = sizeof(int),
704 .mode = 0644,
705 .proc_handler = stack_trace_sysctl,
706 },
707#endif
708#ifdef CONFIG_TRACING
709 {
710 .procname = "ftrace_dump_on_oops",
711 .data = &ftrace_dump_on_oops,
712 .maxlen = sizeof(int),
713 .mode = 0644,
714 .proc_handler = proc_dointvec,
715 },
716 {
717 .procname = "traceoff_on_warning",
718 .data = &__disable_trace_on_warning,
719 .maxlen = sizeof(__disable_trace_on_warning),
720 .mode = 0644,
721 .proc_handler = proc_dointvec,
722 },
723 {
724 .procname = "tracepoint_printk",
725 .data = &tracepoint_printk,
726 .maxlen = sizeof(tracepoint_printk),
727 .mode = 0644,
728 .proc_handler = tracepoint_printk_sysctl,
729 },
730#endif
731#ifdef CONFIG_KEXEC_CORE
732 {
733 .procname = "kexec_load_disabled",
734 .data = &kexec_load_disabled,
735 .maxlen = sizeof(int),
736 .mode = 0644,
737 /* only handle a transition from default "0" to "1" */
738 .proc_handler = proc_dointvec_minmax,
739 .extra1 = SYSCTL_ONE,
740 .extra2 = SYSCTL_ONE,
741 },
742#endif
743#ifdef CONFIG_MODULES
744 {
745 .procname = "modprobe",
746 .data = &modprobe_path,
747 .maxlen = KMOD_PATH_LEN,
748 .mode = 0644,
749 .proc_handler = proc_dostring,
750 },
751 {
752 .procname = "modules_disabled",
753 .data = &modules_disabled,
754 .maxlen = sizeof(int),
755 .mode = 0644,
756 /* only handle a transition from default "0" to "1" */
757 .proc_handler = proc_dointvec_minmax,
758 .extra1 = SYSCTL_ONE,
759 .extra2 = SYSCTL_ONE,
760 },
761#endif
762#ifdef CONFIG_UEVENT_HELPER
763 {
764 .procname = "hotplug",
765 .data = &uevent_helper,
766 .maxlen = UEVENT_HELPER_PATH_LEN,
767 .mode = 0644,
768 .proc_handler = proc_dostring,
769 },
770#endif
771#ifdef CONFIG_CHR_DEV_SG
772 {
773 .procname = "sg-big-buff",
774 .data = &sg_big_buff,
775 .maxlen = sizeof (int),
776 .mode = 0444,
777 .proc_handler = proc_dointvec,
778 },
779#endif
780#ifdef CONFIG_BSD_PROCESS_ACCT
781 {
782 .procname = "acct",
783 .data = &acct_parm,
784 .maxlen = 3*sizeof(int),
785 .mode = 0644,
786 .proc_handler = proc_dointvec,
787 },
788#endif
789#ifdef CONFIG_MAGIC_SYSRQ
790 {
791 .procname = "sysrq",
792 .data = &__sysrq_enabled,
793 .maxlen = sizeof (int),
794 .mode = 0644,
795 .proc_handler = sysrq_sysctl_handler,
796 },
797#endif
798#ifdef CONFIG_PROC_SYSCTL
799 {
800 .procname = "cad_pid",
801 .data = NULL,
802 .maxlen = sizeof (int),
803 .mode = 0600,
804 .proc_handler = proc_do_cad_pid,
805 },
806#endif
807 {
808 .procname = "threads-max",
809 .data = NULL,
810 .maxlen = sizeof(int),
811 .mode = 0644,
812 .proc_handler = sysctl_max_threads,
813 },
814 {
815 .procname = "random",
816 .mode = 0555,
817 .child = random_table,
818 },
819 {
820 .procname = "usermodehelper",
821 .mode = 0555,
822 .child = usermodehelper_table,
823 },
824#ifdef CONFIG_FW_LOADER_USER_HELPER
825 {
826 .procname = "firmware_config",
827 .mode = 0555,
828 .child = firmware_config_table,
829 },
830#endif
831 {
832 .procname = "overflowuid",
833 .data = &overflowuid,
834 .maxlen = sizeof(int),
835 .mode = 0644,
836 .proc_handler = proc_dointvec_minmax,
837 .extra1 = &minolduid,
838 .extra2 = &maxolduid,
839 },
840 {
841 .procname = "overflowgid",
842 .data = &overflowgid,
843 .maxlen = sizeof(int),
844 .mode = 0644,
845 .proc_handler = proc_dointvec_minmax,
846 .extra1 = &minolduid,
847 .extra2 = &maxolduid,
848 },
849#ifdef CONFIG_S390
850#ifdef CONFIG_MATHEMU
851 {
852 .procname = "ieee_emulation_warnings",
853 .data = &sysctl_ieee_emulation_warnings,
854 .maxlen = sizeof(int),
855 .mode = 0644,
856 .proc_handler = proc_dointvec,
857 },
858#endif
859 {
860 .procname = "userprocess_debug",
861 .data = &show_unhandled_signals,
862 .maxlen = sizeof(int),
863 .mode = 0644,
864 .proc_handler = proc_dointvec,
865 },
866#endif
867 {
868 .procname = "pid_max",
869 .data = &pid_max,
870 .maxlen = sizeof (int),
871 .mode = 0644,
872 .proc_handler = proc_dointvec_minmax,
873 .extra1 = &pid_max_min,
874 .extra2 = &pid_max_max,
875 },
876 {
877 .procname = "panic_on_oops",
878 .data = &panic_on_oops,
879 .maxlen = sizeof(int),
880 .mode = 0644,
881 .proc_handler = proc_dointvec,
882 },
883 {
884 .procname = "panic_print",
885 .data = &panic_print,
886 .maxlen = sizeof(unsigned long),
887 .mode = 0644,
888 .proc_handler = proc_doulongvec_minmax,
889 },
890#if defined CONFIG_PRINTK
891 {
892 .procname = "printk",
893 .data = &console_loglevel,
894 .maxlen = 4*sizeof(int),
895 .mode = 0644,
896 .proc_handler = proc_dointvec,
897 },
898 {
899 .procname = "printk_ratelimit",
900 .data = &printk_ratelimit_state.interval,
901 .maxlen = sizeof(int),
902 .mode = 0644,
903 .proc_handler = proc_dointvec_jiffies,
904 },
905 {
906 .procname = "printk_ratelimit_burst",
907 .data = &printk_ratelimit_state.burst,
908 .maxlen = sizeof(int),
909 .mode = 0644,
910 .proc_handler = proc_dointvec,
911 },
912 {
913 .procname = "printk_delay",
914 .data = &printk_delay_msec,
915 .maxlen = sizeof(int),
916 .mode = 0644,
917 .proc_handler = proc_dointvec_minmax,
918 .extra1 = SYSCTL_ZERO,
919 .extra2 = &ten_thousand,
920 },
921 {
922 .procname = "printk_devkmsg",
923 .data = devkmsg_log_str,
924 .maxlen = DEVKMSG_STR_MAX_SIZE,
925 .mode = 0644,
926 .proc_handler = devkmsg_sysctl_set_loglvl,
927 },
928 {
929 .procname = "dmesg_restrict",
930 .data = &dmesg_restrict,
931 .maxlen = sizeof(int),
932 .mode = 0644,
933 .proc_handler = proc_dointvec_minmax_sysadmin,
934 .extra1 = SYSCTL_ZERO,
935 .extra2 = SYSCTL_ONE,
936 },
937 {
938 .procname = "kptr_restrict",
939 .data = &kptr_restrict,
940 .maxlen = sizeof(int),
941 .mode = 0644,
942 .proc_handler = proc_dointvec_minmax_sysadmin,
943 .extra1 = SYSCTL_ZERO,
944 .extra2 = &two,
945 },
946#endif
947 {
948 .procname = "ngroups_max",
949 .data = &ngroups_max,
950 .maxlen = sizeof (int),
951 .mode = 0444,
952 .proc_handler = proc_dointvec,
953 },
954 {
955 .procname = "cap_last_cap",
956 .data = (void *)&cap_last_cap,
957 .maxlen = sizeof(int),
958 .mode = 0444,
959 .proc_handler = proc_dointvec,
960 },
961#if defined(CONFIG_LOCKUP_DETECTOR)
962 {
963 .procname = "watchdog",
964 .data = &watchdog_user_enabled,
965 .maxlen = sizeof(int),
966 .mode = 0644,
967 .proc_handler = proc_watchdog,
968 .extra1 = SYSCTL_ZERO,
969 .extra2 = SYSCTL_ONE,
970 },
971 {
972 .procname = "watchdog_thresh",
973 .data = &watchdog_thresh,
974 .maxlen = sizeof(int),
975 .mode = 0644,
976 .proc_handler = proc_watchdog_thresh,
977 .extra1 = SYSCTL_ZERO,
978 .extra2 = &sixty,
979 },
980 {
981 .procname = "nmi_watchdog",
982 .data = &nmi_watchdog_user_enabled,
983 .maxlen = sizeof(int),
984 .mode = NMI_WATCHDOG_SYSCTL_PERM,
985 .proc_handler = proc_nmi_watchdog,
986 .extra1 = SYSCTL_ZERO,
987 .extra2 = SYSCTL_ONE,
988 },
989 {
990 .procname = "watchdog_cpumask",
991 .data = &watchdog_cpumask_bits,
992 .maxlen = NR_CPUS,
993 .mode = 0644,
994 .proc_handler = proc_watchdog_cpumask,
995 },
996#ifdef CONFIG_SOFTLOCKUP_DETECTOR
997 {
998 .procname = "soft_watchdog",
999 .data = &soft_watchdog_user_enabled,
1000 .maxlen = sizeof(int),
1001 .mode = 0644,
1002 .proc_handler = proc_soft_watchdog,
1003 .extra1 = SYSCTL_ZERO,
1004 .extra2 = SYSCTL_ONE,
1005 },
1006 {
1007 .procname = "softlockup_panic",
1008 .data = &softlockup_panic,
1009 .maxlen = sizeof(int),
1010 .mode = 0644,
1011 .proc_handler = proc_dointvec_minmax,
1012 .extra1 = SYSCTL_ZERO,
1013 .extra2 = SYSCTL_ONE,
1014 },
1015#ifdef CONFIG_SMP
1016 {
1017 .procname = "softlockup_all_cpu_backtrace",
1018 .data = &sysctl_softlockup_all_cpu_backtrace,
1019 .maxlen = sizeof(int),
1020 .mode = 0644,
1021 .proc_handler = proc_dointvec_minmax,
1022 .extra1 = SYSCTL_ZERO,
1023 .extra2 = SYSCTL_ONE,
1024 },
1025#endif /* CONFIG_SMP */
1026#endif
1027#ifdef CONFIG_HARDLOCKUP_DETECTOR
1028 {
1029 .procname = "hardlockup_panic",
1030 .data = &hardlockup_panic,
1031 .maxlen = sizeof(int),
1032 .mode = 0644,
1033 .proc_handler = proc_dointvec_minmax,
1034 .extra1 = SYSCTL_ZERO,
1035 .extra2 = SYSCTL_ONE,
1036 },
1037#ifdef CONFIG_SMP
1038 {
1039 .procname = "hardlockup_all_cpu_backtrace",
1040 .data = &sysctl_hardlockup_all_cpu_backtrace,
1041 .maxlen = sizeof(int),
1042 .mode = 0644,
1043 .proc_handler = proc_dointvec_minmax,
1044 .extra1 = SYSCTL_ZERO,
1045 .extra2 = SYSCTL_ONE,
1046 },
1047#endif /* CONFIG_SMP */
1048#endif
1049#endif
1050
1051#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1052 {
1053 .procname = "unknown_nmi_panic",
1054 .data = &unknown_nmi_panic,
1055 .maxlen = sizeof (int),
1056 .mode = 0644,
1057 .proc_handler = proc_dointvec,
1058 },
1059#endif
1060#if defined(CONFIG_X86)
1061 {
1062 .procname = "panic_on_unrecovered_nmi",
1063 .data = &panic_on_unrecovered_nmi,
1064 .maxlen = sizeof(int),
1065 .mode = 0644,
1066 .proc_handler = proc_dointvec,
1067 },
1068 {
1069 .procname = "panic_on_io_nmi",
1070 .data = &panic_on_io_nmi,
1071 .maxlen = sizeof(int),
1072 .mode = 0644,
1073 .proc_handler = proc_dointvec,
1074 },
1075#ifdef CONFIG_DEBUG_STACKOVERFLOW
1076 {
1077 .procname = "panic_on_stackoverflow",
1078 .data = &sysctl_panic_on_stackoverflow,
1079 .maxlen = sizeof(int),
1080 .mode = 0644,
1081 .proc_handler = proc_dointvec,
1082 },
1083#endif
1084 {
1085 .procname = "bootloader_type",
1086 .data = &bootloader_type,
1087 .maxlen = sizeof (int),
1088 .mode = 0444,
1089 .proc_handler = proc_dointvec,
1090 },
1091 {
1092 .procname = "bootloader_version",
1093 .data = &bootloader_version,
1094 .maxlen = sizeof (int),
1095 .mode = 0444,
1096 .proc_handler = proc_dointvec,
1097 },
1098 {
1099 .procname = "io_delay_type",
1100 .data = &io_delay_type,
1101 .maxlen = sizeof(int),
1102 .mode = 0644,
1103 .proc_handler = proc_dointvec,
1104 },
1105#endif
1106#if defined(CONFIG_MMU)
1107 {
1108 .procname = "randomize_va_space",
1109 .data = &randomize_va_space,
1110 .maxlen = sizeof(int),
1111 .mode = 0644,
1112 .proc_handler = proc_dointvec,
1113 },
1114#endif
1115#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1116 {
1117 .procname = "spin_retry",
1118 .data = &spin_retry,
1119 .maxlen = sizeof (int),
1120 .mode = 0644,
1121 .proc_handler = proc_dointvec,
1122 },
1123#endif
1124#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1125 {
1126 .procname = "acpi_video_flags",
1127 .data = &acpi_realmode_flags,
1128 .maxlen = sizeof (unsigned long),
1129 .mode = 0644,
1130 .proc_handler = proc_doulongvec_minmax,
1131 },
1132#endif
1133#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1134 {
1135 .procname = "ignore-unaligned-usertrap",
1136 .data = &no_unaligned_warning,
1137 .maxlen = sizeof (int),
1138 .mode = 0644,
1139 .proc_handler = proc_dointvec,
1140 },
1141#endif
1142#ifdef CONFIG_IA64
1143 {
1144 .procname = "unaligned-dump-stack",
1145 .data = &unaligned_dump_stack,
1146 .maxlen = sizeof (int),
1147 .mode = 0644,
1148 .proc_handler = proc_dointvec,
1149 },
1150#endif
1151#ifdef CONFIG_DETECT_HUNG_TASK
1152 {
1153 .procname = "hung_task_panic",
1154 .data = &sysctl_hung_task_panic,
1155 .maxlen = sizeof(int),
1156 .mode = 0644,
1157 .proc_handler = proc_dointvec_minmax,
1158 .extra1 = SYSCTL_ZERO,
1159 .extra2 = SYSCTL_ONE,
1160 },
1161 {
1162 .procname = "hung_task_check_count",
1163 .data = &sysctl_hung_task_check_count,
1164 .maxlen = sizeof(int),
1165 .mode = 0644,
1166 .proc_handler = proc_dointvec_minmax,
1167 .extra1 = SYSCTL_ZERO,
1168 },
1169 {
1170 .procname = "hung_task_timeout_secs",
1171 .data = &sysctl_hung_task_timeout_secs,
1172 .maxlen = sizeof(unsigned long),
1173 .mode = 0644,
1174 .proc_handler = proc_dohung_task_timeout_secs,
1175 .extra2 = &hung_task_timeout_max,
1176 },
1177 {
1178 .procname = "hung_task_check_interval_secs",
1179 .data = &sysctl_hung_task_check_interval_secs,
1180 .maxlen = sizeof(unsigned long),
1181 .mode = 0644,
1182 .proc_handler = proc_dohung_task_timeout_secs,
1183 .extra2 = &hung_task_timeout_max,
1184 },
1185 {
1186 .procname = "hung_task_warnings",
1187 .data = &sysctl_hung_task_warnings,
1188 .maxlen = sizeof(int),
1189 .mode = 0644,
1190 .proc_handler = proc_dointvec_minmax,
1191 .extra1 = &neg_one,
1192 },
1193#endif
1194#ifdef CONFIG_RT_MUTEXES
1195 {
1196 .procname = "max_lock_depth",
1197 .data = &max_lock_depth,
1198 .maxlen = sizeof(int),
1199 .mode = 0644,
1200 .proc_handler = proc_dointvec,
1201 },
1202#endif
1203 {
1204 .procname = "poweroff_cmd",
1205 .data = &poweroff_cmd,
1206 .maxlen = POWEROFF_CMD_PATH_LEN,
1207 .mode = 0644,
1208 .proc_handler = proc_dostring,
1209 },
1210#ifdef CONFIG_KEYS
1211 {
1212 .procname = "keys",
1213 .mode = 0555,
1214 .child = key_sysctls,
1215 },
1216#endif
1217#ifdef CONFIG_PERF_EVENTS
1218 /*
1219 * User-space scripts rely on the existence of this file
1220 * as a feature check for perf_events being enabled.
1221 *
1222 * So it's an ABI, do not remove!
1223 */
1224 {
1225 .procname = "perf_event_paranoid",
1226 .data = &sysctl_perf_event_paranoid,
1227 .maxlen = sizeof(sysctl_perf_event_paranoid),
1228 .mode = 0644,
1229 .proc_handler = proc_dointvec,
1230 },
1231 {
1232 .procname = "perf_event_mlock_kb",
1233 .data = &sysctl_perf_event_mlock,
1234 .maxlen = sizeof(sysctl_perf_event_mlock),
1235 .mode = 0644,
1236 .proc_handler = proc_dointvec,
1237 },
1238 {
1239 .procname = "perf_event_max_sample_rate",
1240 .data = &sysctl_perf_event_sample_rate,
1241 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1242 .mode = 0644,
1243 .proc_handler = perf_proc_update_handler,
1244 .extra1 = SYSCTL_ONE,
1245 },
1246 {
1247 .procname = "perf_cpu_time_max_percent",
1248 .data = &sysctl_perf_cpu_time_max_percent,
1249 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1250 .mode = 0644,
1251 .proc_handler = perf_cpu_time_max_percent_handler,
1252 .extra1 = SYSCTL_ZERO,
1253 .extra2 = &one_hundred,
1254 },
1255 {
1256 .procname = "perf_event_max_stack",
1257 .data = &sysctl_perf_event_max_stack,
1258 .maxlen = sizeof(sysctl_perf_event_max_stack),
1259 .mode = 0644,
1260 .proc_handler = perf_event_max_stack_handler,
1261 .extra1 = SYSCTL_ZERO,
1262 .extra2 = &six_hundred_forty_kb,
1263 },
1264 {
1265 .procname = "perf_event_max_contexts_per_stack",
1266 .data = &sysctl_perf_event_max_contexts_per_stack,
1267 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
1268 .mode = 0644,
1269 .proc_handler = perf_event_max_stack_handler,
1270 .extra1 = SYSCTL_ZERO,
1271 .extra2 = &one_thousand,
1272 },
1273#endif
1274 {
1275 .procname = "panic_on_warn",
1276 .data = &panic_on_warn,
1277 .maxlen = sizeof(int),
1278 .mode = 0644,
1279 .proc_handler = proc_dointvec_minmax,
1280 .extra1 = SYSCTL_ZERO,
1281 .extra2 = SYSCTL_ONE,
1282 },
1283#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1284 {
1285 .procname = "timer_migration",
1286 .data = &sysctl_timer_migration,
1287 .maxlen = sizeof(unsigned int),
1288 .mode = 0644,
1289 .proc_handler = timer_migration_handler,
1290 .extra1 = SYSCTL_ZERO,
1291 .extra2 = SYSCTL_ONE,
1292 },
1293#endif
1294#ifdef CONFIG_BPF_SYSCALL
1295 {
1296 .procname = "unprivileged_bpf_disabled",
1297 .data = &sysctl_unprivileged_bpf_disabled,
1298 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
1299 .mode = 0644,
1300 .proc_handler = bpf_unpriv_handler,
1301 .extra1 = SYSCTL_ZERO,
1302 .extra2 = &two,
1303 },
1304 {
1305 .procname = "bpf_stats_enabled",
1306 .data = &bpf_stats_enabled_key.key,
1307 .maxlen = sizeof(bpf_stats_enabled_key),
1308 .mode = 0644,
1309 .proc_handler = proc_do_static_key,
1310 },
1311#endif
1312#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1313 {
1314 .procname = "panic_on_rcu_stall",
1315 .data = &sysctl_panic_on_rcu_stall,
1316 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
1317 .mode = 0644,
1318 .proc_handler = proc_dointvec_minmax,
1319 .extra1 = SYSCTL_ZERO,
1320 .extra2 = SYSCTL_ONE,
1321 },
1322#endif
1323#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1324 {
1325 .procname = "stack_erasing",
1326 .data = NULL,
1327 .maxlen = sizeof(int),
1328 .mode = 0600,
1329 .proc_handler = stack_erasing_sysctl,
1330 .extra1 = SYSCTL_ZERO,
1331 .extra2 = SYSCTL_ONE,
1332 },
1333#endif
1334 {
1335 .procname = "rdup",
1336 .data = &ramdump_pm80x_flag,
1337 .maxlen = sizeof(int),
1338 .mode = 0600,
1339 .proc_handler = ramdump_pm80x_flag_sysctl,
1340 },
1341#ifdef CONFIG_UBIFS_FS
1342 {
1343 .procname = "ubifs_error",
1344 .data = &ramdump_ubifs_flag,
1345 .maxlen = sizeof(int),
1346 .mode = 0600,
1347 .proc_handler = ramdump_ubifs_flag_sysctl,
1348 },
1349#endif
1350 { }
1351};
1352
1353static struct ctl_table vm_table[] = {
1354 {
1355 .procname = "overcommit_memory",
1356 .data = &sysctl_overcommit_memory,
1357 .maxlen = sizeof(sysctl_overcommit_memory),
1358 .mode = 0644,
1359 .proc_handler = proc_dointvec_minmax,
1360 .extra1 = SYSCTL_ZERO,
1361 .extra2 = &two,
1362 },
1363 {
1364 .procname = "panic_on_oom",
1365 .data = &sysctl_panic_on_oom,
1366 .maxlen = sizeof(sysctl_panic_on_oom),
1367 .mode = 0644,
1368 .proc_handler = proc_dointvec_minmax,
1369 .extra1 = SYSCTL_ZERO,
1370 .extra2 = &two,
1371 },
1372 {
1373 .procname = "oom_kill_allocating_task",
1374 .data = &sysctl_oom_kill_allocating_task,
1375 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
1376 .mode = 0644,
1377 .proc_handler = proc_dointvec,
1378 },
1379 {
1380 .procname = "oom_dump_tasks",
1381 .data = &sysctl_oom_dump_tasks,
1382 .maxlen = sizeof(sysctl_oom_dump_tasks),
1383 .mode = 0644,
1384 .proc_handler = proc_dointvec,
1385 },
1386 {
1387 .procname = "overcommit_ratio",
1388 .data = &sysctl_overcommit_ratio,
1389 .maxlen = sizeof(sysctl_overcommit_ratio),
1390 .mode = 0644,
1391 .proc_handler = overcommit_ratio_handler,
1392 },
1393 {
1394 .procname = "overcommit_kbytes",
1395 .data = &sysctl_overcommit_kbytes,
1396 .maxlen = sizeof(sysctl_overcommit_kbytes),
1397 .mode = 0644,
1398 .proc_handler = overcommit_kbytes_handler,
1399 },
1400 {
1401 .procname = "page-cluster",
1402 .data = &page_cluster,
1403 .maxlen = sizeof(int),
1404 .mode = 0644,
1405 .proc_handler = proc_dointvec_minmax,
1406 .extra1 = SYSCTL_ZERO,
1407 },
1408 {
1409 .procname = "dirty_background_ratio",
1410 .data = &dirty_background_ratio,
1411 .maxlen = sizeof(dirty_background_ratio),
1412 .mode = 0644,
1413 .proc_handler = dirty_background_ratio_handler,
1414 .extra1 = SYSCTL_ZERO,
1415 .extra2 = &one_hundred,
1416 },
1417 {
1418 .procname = "dirty_background_bytes",
1419 .data = &dirty_background_bytes,
1420 .maxlen = sizeof(dirty_background_bytes),
1421 .mode = 0644,
1422 .proc_handler = dirty_background_bytes_handler,
1423 .extra1 = &one_ul,
1424 },
1425 {
1426 .procname = "dirty_ratio",
1427 .data = &vm_dirty_ratio,
1428 .maxlen = sizeof(vm_dirty_ratio),
1429 .mode = 0644,
1430 .proc_handler = dirty_ratio_handler,
1431 .extra1 = SYSCTL_ZERO,
1432 .extra2 = &one_hundred,
1433 },
1434 {
1435 .procname = "dirty_bytes",
1436 .data = &vm_dirty_bytes,
1437 .maxlen = sizeof(vm_dirty_bytes),
1438 .mode = 0644,
1439 .proc_handler = dirty_bytes_handler,
1440 .extra1 = &dirty_bytes_min,
1441 },
1442 {
1443 .procname = "dirty_writeback_centisecs",
1444 .data = &dirty_writeback_interval,
1445 .maxlen = sizeof(dirty_writeback_interval),
1446 .mode = 0644,
1447 .proc_handler = dirty_writeback_centisecs_handler,
1448 },
1449 {
1450 .procname = "dirty_expire_centisecs",
1451 .data = &dirty_expire_interval,
1452 .maxlen = sizeof(dirty_expire_interval),
1453 .mode = 0644,
1454 .proc_handler = proc_dointvec_minmax,
1455 .extra1 = SYSCTL_ZERO,
1456 },
1457 {
1458 .procname = "dirtytime_expire_seconds",
1459 .data = &dirtytime_expire_interval,
1460 .maxlen = sizeof(dirtytime_expire_interval),
1461 .mode = 0644,
1462 .proc_handler = dirtytime_interval_handler,
1463 .extra1 = SYSCTL_ZERO,
1464 },
1465 {
1466 .procname = "swappiness",
1467 .data = &vm_swappiness,
1468 .maxlen = sizeof(vm_swappiness),
1469 .mode = 0644,
1470 .proc_handler = proc_dointvec_minmax,
1471 .extra1 = SYSCTL_ZERO,
1472 .extra2 = &one_hundred,
1473 },
1474#ifdef CONFIG_NUMA
1475 {
1476 .procname = "numa_stat",
1477 .data = &sysctl_vm_numa_stat,
1478 .maxlen = sizeof(int),
1479 .mode = 0644,
1480 .proc_handler = sysctl_vm_numa_stat_handler,
1481 .extra1 = SYSCTL_ZERO,
1482 .extra2 = SYSCTL_ONE,
1483 },
1484#endif
1485#ifdef CONFIG_HUGETLB_PAGE
1486 {
1487 .procname = "nr_hugepages",
1488 .data = NULL,
1489 .maxlen = sizeof(unsigned long),
1490 .mode = 0644,
1491 .proc_handler = hugetlb_sysctl_handler,
1492 },
1493#ifdef CONFIG_NUMA
1494 {
1495 .procname = "nr_hugepages_mempolicy",
1496 .data = NULL,
1497 .maxlen = sizeof(unsigned long),
1498 .mode = 0644,
1499 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1500 },
1501#endif
1502 {
1503 .procname = "hugetlb_shm_group",
1504 .data = &sysctl_hugetlb_shm_group,
1505 .maxlen = sizeof(gid_t),
1506 .mode = 0644,
1507 .proc_handler = proc_dointvec,
1508 },
1509 {
1510 .procname = "nr_overcommit_hugepages",
1511 .data = NULL,
1512 .maxlen = sizeof(unsigned long),
1513 .mode = 0644,
1514 .proc_handler = hugetlb_overcommit_handler,
1515 },
1516#endif
1517 {
1518 .procname = "lowmem_reserve_ratio",
1519 .data = &sysctl_lowmem_reserve_ratio,
1520 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1521 .mode = 0644,
1522 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1523 },
1524 {
1525 .procname = "drop_caches",
1526 .data = &sysctl_drop_caches,
1527 .maxlen = sizeof(int),
1528 .mode = 0200,
1529 .proc_handler = drop_caches_sysctl_handler,
1530 .extra1 = SYSCTL_ONE,
1531 .extra2 = &four,
1532 },
1533#ifdef CONFIG_COMPACTION
1534 {
1535 .procname = "compact_memory",
1536 .data = &sysctl_compact_memory,
1537 .maxlen = sizeof(int),
1538 .mode = 0200,
1539 .proc_handler = sysctl_compaction_handler,
1540 },
1541 {
1542 .procname = "extfrag_threshold",
1543 .data = &sysctl_extfrag_threshold,
1544 .maxlen = sizeof(int),
1545 .mode = 0644,
1546 .proc_handler = proc_dointvec_minmax,
1547 .extra1 = &min_extfrag_threshold,
1548 .extra2 = &max_extfrag_threshold,
1549 },
1550 {
1551 .procname = "compact_unevictable_allowed",
1552 .data = &sysctl_compact_unevictable_allowed,
1553 .maxlen = sizeof(int),
1554 .mode = 0644,
1555 .proc_handler = proc_dointvec,
1556 .extra1 = SYSCTL_ZERO,
1557 .extra2 = SYSCTL_ONE,
1558 },
1559
1560#endif /* CONFIG_COMPACTION */
1561 {
1562 .procname = "min_free_kbytes",
1563 .data = &min_free_kbytes,
1564 .maxlen = sizeof(min_free_kbytes),
1565 .mode = 0644,
1566 .proc_handler = min_free_kbytes_sysctl_handler,
1567 .extra1 = SYSCTL_ZERO,
1568 },
1569 {
1570 .procname = "watermark_boost_factor",
1571 .data = &watermark_boost_factor,
1572 .maxlen = sizeof(watermark_boost_factor),
1573 .mode = 0644,
1574 .proc_handler = watermark_boost_factor_sysctl_handler,
1575 .extra1 = SYSCTL_ZERO,
1576 },
1577 {
1578 .procname = "watermark_scale_factor",
1579 .data = &watermark_scale_factor,
1580 .maxlen = sizeof(watermark_scale_factor),
1581 .mode = 0644,
1582 .proc_handler = watermark_scale_factor_sysctl_handler,
1583 .extra1 = SYSCTL_ONE,
1584 .extra2 = &one_thousand,
1585 },
1586 {
1587 .procname = "extra_free_kbytes",
1588 .data = &extra_free_kbytes,
1589 .maxlen = sizeof(extra_free_kbytes),
1590 .mode = 0644,
1591 .proc_handler = extra_free_kbytes_sysctl_handler,
1592 .extra1 = SYSCTL_ZERO,
1593 },
1594 {
1595 .procname = "percpu_pagelist_fraction",
1596 .data = &percpu_pagelist_fraction,
1597 .maxlen = sizeof(percpu_pagelist_fraction),
1598 .mode = 0644,
1599 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1600 .extra1 = SYSCTL_ZERO,
1601 },
1602 {
1603 .procname = "page_lock_unfairness",
1604 .data = &sysctl_page_lock_unfairness,
1605 .maxlen = sizeof(sysctl_page_lock_unfairness),
1606 .mode = 0644,
1607 .proc_handler = proc_dointvec_minmax,
1608 .extra1 = SYSCTL_ZERO,
1609 },
1610#ifdef CONFIG_MMU
1611 {
1612 .procname = "max_map_count",
1613 .data = &sysctl_max_map_count,
1614 .maxlen = sizeof(sysctl_max_map_count),
1615 .mode = 0644,
1616 .proc_handler = proc_dointvec_minmax,
1617 .extra1 = SYSCTL_ZERO,
1618 },
1619#else
1620 {
1621 .procname = "nr_trim_pages",
1622 .data = &sysctl_nr_trim_pages,
1623 .maxlen = sizeof(sysctl_nr_trim_pages),
1624 .mode = 0644,
1625 .proc_handler = proc_dointvec_minmax,
1626 .extra1 = SYSCTL_ZERO,
1627 },
1628#endif
1629 {
1630 .procname = "laptop_mode",
1631 .data = &laptop_mode,
1632 .maxlen = sizeof(laptop_mode),
1633 .mode = 0644,
1634 .proc_handler = proc_dointvec_jiffies,
1635 },
1636 {
1637 .procname = "block_dump",
1638 .data = &block_dump,
1639 .maxlen = sizeof(block_dump),
1640 .mode = 0644,
1641 .proc_handler = proc_dointvec_minmax,
1642 .extra1 = SYSCTL_ZERO,
1643 },
1644 {
1645 .procname = "vfs_cache_pressure",
1646 .data = &sysctl_vfs_cache_pressure,
1647 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1648 .mode = 0644,
1649 .proc_handler = proc_dointvec_minmax,
1650 .extra1 = SYSCTL_ZERO,
1651 },
1652#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
1653 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
1654 {
1655 .procname = "legacy_va_layout",
1656 .data = &sysctl_legacy_va_layout,
1657 .maxlen = sizeof(sysctl_legacy_va_layout),
1658 .mode = 0644,
1659 .proc_handler = proc_dointvec_minmax,
1660 .extra1 = SYSCTL_ZERO,
1661 },
1662#endif
1663#ifdef CONFIG_NUMA
1664 {
1665 .procname = "zone_reclaim_mode",
1666 .data = &node_reclaim_mode,
1667 .maxlen = sizeof(node_reclaim_mode),
1668 .mode = 0644,
1669 .proc_handler = proc_dointvec_minmax,
1670 .extra1 = SYSCTL_ZERO,
1671 },
1672 {
1673 .procname = "min_unmapped_ratio",
1674 .data = &sysctl_min_unmapped_ratio,
1675 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1676 .mode = 0644,
1677 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1678 .extra1 = SYSCTL_ZERO,
1679 .extra2 = &one_hundred,
1680 },
1681 {
1682 .procname = "min_slab_ratio",
1683 .data = &sysctl_min_slab_ratio,
1684 .maxlen = sizeof(sysctl_min_slab_ratio),
1685 .mode = 0644,
1686 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1687 .extra1 = SYSCTL_ZERO,
1688 .extra2 = &one_hundred,
1689 },
1690#endif
1691#ifdef CONFIG_SMP
1692 {
1693 .procname = "stat_interval",
1694 .data = &sysctl_stat_interval,
1695 .maxlen = sizeof(sysctl_stat_interval),
1696 .mode = 0644,
1697 .proc_handler = proc_dointvec_jiffies,
1698 },
1699 {
1700 .procname = "stat_refresh",
1701 .data = NULL,
1702 .maxlen = 0,
1703 .mode = 0600,
1704 .proc_handler = vmstat_refresh,
1705 },
1706#endif
1707#ifdef CONFIG_MMU
1708 {
1709 .procname = "mmap_min_addr",
1710 .data = &dac_mmap_min_addr,
1711 .maxlen = sizeof(unsigned long),
1712 .mode = 0644,
1713 .proc_handler = mmap_min_addr_handler,
1714 },
1715#endif
1716#ifdef CONFIG_NUMA
1717 {
1718 .procname = "numa_zonelist_order",
1719 .data = &numa_zonelist_order,
1720 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1721 .mode = 0644,
1722 .proc_handler = numa_zonelist_order_handler,
1723 },
1724#endif
1725#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1726 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1727 {
1728 .procname = "vdso_enabled",
1729#ifdef CONFIG_X86_32
1730 .data = &vdso32_enabled,
1731 .maxlen = sizeof(vdso32_enabled),
1732#else
1733 .data = &vdso_enabled,
1734 .maxlen = sizeof(vdso_enabled),
1735#endif
1736 .mode = 0644,
1737 .proc_handler = proc_dointvec,
1738 .extra1 = SYSCTL_ZERO,
1739 },
1740#endif
1741#ifdef CONFIG_HIGHMEM
1742 {
1743 .procname = "highmem_is_dirtyable",
1744 .data = &vm_highmem_is_dirtyable,
1745 .maxlen = sizeof(vm_highmem_is_dirtyable),
1746 .mode = 0644,
1747 .proc_handler = proc_dointvec_minmax,
1748 .extra1 = SYSCTL_ZERO,
1749 .extra2 = SYSCTL_ONE,
1750 },
1751#endif
1752#ifdef CONFIG_MEMORY_FAILURE
1753 {
1754 .procname = "memory_failure_early_kill",
1755 .data = &sysctl_memory_failure_early_kill,
1756 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1757 .mode = 0644,
1758 .proc_handler = proc_dointvec_minmax,
1759 .extra1 = SYSCTL_ZERO,
1760 .extra2 = SYSCTL_ONE,
1761 },
1762 {
1763 .procname = "memory_failure_recovery",
1764 .data = &sysctl_memory_failure_recovery,
1765 .maxlen = sizeof(sysctl_memory_failure_recovery),
1766 .mode = 0644,
1767 .proc_handler = proc_dointvec_minmax,
1768 .extra1 = SYSCTL_ZERO,
1769 .extra2 = SYSCTL_ONE,
1770 },
1771#endif
1772 {
1773 .procname = "user_reserve_kbytes",
1774 .data = &sysctl_user_reserve_kbytes,
1775 .maxlen = sizeof(sysctl_user_reserve_kbytes),
1776 .mode = 0644,
1777 .proc_handler = proc_doulongvec_minmax,
1778 },
1779 {
1780 .procname = "admin_reserve_kbytes",
1781 .data = &sysctl_admin_reserve_kbytes,
1782 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
1783 .mode = 0644,
1784 .proc_handler = proc_doulongvec_minmax,
1785 },
1786#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1787 {
1788 .procname = "mmap_rnd_bits",
1789 .data = &mmap_rnd_bits,
1790 .maxlen = sizeof(mmap_rnd_bits),
1791 .mode = 0600,
1792 .proc_handler = proc_dointvec_minmax,
1793 .extra1 = (void *)&mmap_rnd_bits_min,
1794 .extra2 = (void *)&mmap_rnd_bits_max,
1795 },
1796#endif
1797#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1798 {
1799 .procname = "mmap_rnd_compat_bits",
1800 .data = &mmap_rnd_compat_bits,
1801 .maxlen = sizeof(mmap_rnd_compat_bits),
1802 .mode = 0600,
1803 .proc_handler = proc_dointvec_minmax,
1804 .extra1 = (void *)&mmap_rnd_compat_bits_min,
1805 .extra2 = (void *)&mmap_rnd_compat_bits_max,
1806 },
1807#endif
1808#ifdef CONFIG_USERFAULTFD
1809 {
1810 .procname = "unprivileged_userfaultfd",
1811 .data = &sysctl_unprivileged_userfaultfd,
1812 .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
1813 .mode = 0644,
1814 .proc_handler = proc_dointvec_minmax,
1815 .extra1 = SYSCTL_ZERO,
1816 .extra2 = SYSCTL_ONE,
1817 },
1818#endif
1819 { }
1820};
1821
1822static struct ctl_table fs_table[] = {
1823 {
1824 .procname = "inode-nr",
1825 .data = &inodes_stat,
1826 .maxlen = 2*sizeof(long),
1827 .mode = 0444,
1828 .proc_handler = proc_nr_inodes,
1829 },
1830 {
1831 .procname = "inode-state",
1832 .data = &inodes_stat,
1833 .maxlen = 7*sizeof(long),
1834 .mode = 0444,
1835 .proc_handler = proc_nr_inodes,
1836 },
1837 {
1838 .procname = "file-nr",
1839 .data = &files_stat,
1840 .maxlen = sizeof(files_stat),
1841 .mode = 0444,
1842 .proc_handler = proc_nr_files,
1843 },
1844 {
1845 .procname = "file-max",
1846 .data = &files_stat.max_files,
1847 .maxlen = sizeof(files_stat.max_files),
1848 .mode = 0644,
1849 .proc_handler = proc_doulongvec_minmax,
1850 .extra1 = &zero_ul,
1851 .extra2 = &long_max,
1852 },
1853 {
1854 .procname = "nr_open",
1855 .data = &sysctl_nr_open,
1856 .maxlen = sizeof(unsigned int),
1857 .mode = 0644,
1858 .proc_handler = proc_dointvec_minmax,
1859 .extra1 = &sysctl_nr_open_min,
1860 .extra2 = &sysctl_nr_open_max,
1861 },
1862 {
1863 .procname = "dentry-state",
1864 .data = &dentry_stat,
1865 .maxlen = 6*sizeof(long),
1866 .mode = 0444,
1867 .proc_handler = proc_nr_dentry,
1868 },
1869 {
1870 .procname = "overflowuid",
1871 .data = &fs_overflowuid,
1872 .maxlen = sizeof(int),
1873 .mode = 0644,
1874 .proc_handler = proc_dointvec_minmax,
1875 .extra1 = &minolduid,
1876 .extra2 = &maxolduid,
1877 },
1878 {
1879 .procname = "overflowgid",
1880 .data = &fs_overflowgid,
1881 .maxlen = sizeof(int),
1882 .mode = 0644,
1883 .proc_handler = proc_dointvec_minmax,
1884 .extra1 = &minolduid,
1885 .extra2 = &maxolduid,
1886 },
1887#ifdef CONFIG_FILE_LOCKING
1888 {
1889 .procname = "leases-enable",
1890 .data = &leases_enable,
1891 .maxlen = sizeof(int),
1892 .mode = 0644,
1893 .proc_handler = proc_dointvec,
1894 },
1895#endif
1896#ifdef CONFIG_DNOTIFY
1897 {
1898 .procname = "dir-notify-enable",
1899 .data = &dir_notify_enable,
1900 .maxlen = sizeof(int),
1901 .mode = 0644,
1902 .proc_handler = proc_dointvec,
1903 },
1904#endif
1905#ifdef CONFIG_MMU
1906#ifdef CONFIG_FILE_LOCKING
1907 {
1908 .procname = "lease-break-time",
1909 .data = &lease_break_time,
1910 .maxlen = sizeof(int),
1911 .mode = 0644,
1912 .proc_handler = proc_dointvec,
1913 },
1914#endif
1915#ifdef CONFIG_AIO
1916 {
1917 .procname = "aio-nr",
1918 .data = &aio_nr,
1919 .maxlen = sizeof(aio_nr),
1920 .mode = 0444,
1921 .proc_handler = proc_doulongvec_minmax,
1922 },
1923 {
1924 .procname = "aio-max-nr",
1925 .data = &aio_max_nr,
1926 .maxlen = sizeof(aio_max_nr),
1927 .mode = 0644,
1928 .proc_handler = proc_doulongvec_minmax,
1929 },
1930#endif /* CONFIG_AIO */
1931#ifdef CONFIG_INOTIFY_USER
1932 {
1933 .procname = "inotify",
1934 .mode = 0555,
1935 .child = inotify_table,
1936 },
1937#endif
1938#ifdef CONFIG_EPOLL
1939 {
1940 .procname = "epoll",
1941 .mode = 0555,
1942 .child = epoll_table,
1943 },
1944#endif
1945#endif
1946 {
1947 .procname = "protected_symlinks",
1948 .data = &sysctl_protected_symlinks,
1949 .maxlen = sizeof(int),
1950 .mode = 0600,
1951 .proc_handler = proc_dointvec_minmax,
1952 .extra1 = SYSCTL_ZERO,
1953 .extra2 = SYSCTL_ONE,
1954 },
1955 {
1956 .procname = "protected_hardlinks",
1957 .data = &sysctl_protected_hardlinks,
1958 .maxlen = sizeof(int),
1959 .mode = 0600,
1960 .proc_handler = proc_dointvec_minmax,
1961 .extra1 = SYSCTL_ZERO,
1962 .extra2 = SYSCTL_ONE,
1963 },
1964 {
1965 .procname = "protected_fifos",
1966 .data = &sysctl_protected_fifos,
1967 .maxlen = sizeof(int),
1968 .mode = 0600,
1969 .proc_handler = proc_dointvec_minmax,
1970 .extra1 = SYSCTL_ZERO,
1971 .extra2 = &two,
1972 },
1973 {
1974 .procname = "protected_regular",
1975 .data = &sysctl_protected_regular,
1976 .maxlen = sizeof(int),
1977 .mode = 0600,
1978 .proc_handler = proc_dointvec_minmax,
1979 .extra1 = SYSCTL_ZERO,
1980 .extra2 = &two,
1981 },
1982 {
1983 .procname = "suid_dumpable",
1984 .data = &suid_dumpable,
1985 .maxlen = sizeof(int),
1986 .mode = 0644,
1987 .proc_handler = proc_dointvec_minmax_coredump,
1988 .extra1 = SYSCTL_ZERO,
1989 .extra2 = &two,
1990 },
1991#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1992 {
1993 .procname = "binfmt_misc",
1994 .mode = 0555,
1995 .child = sysctl_mount_point,
1996 },
1997#endif
1998 {
1999 .procname = "pipe-max-size",
2000 .data = &pipe_max_size,
2001 .maxlen = sizeof(pipe_max_size),
2002 .mode = 0644,
2003 .proc_handler = proc_dopipe_max_size,
2004 },
2005 {
2006 .procname = "pipe-user-pages-hard",
2007 .data = &pipe_user_pages_hard,
2008 .maxlen = sizeof(pipe_user_pages_hard),
2009 .mode = 0644,
2010 .proc_handler = proc_doulongvec_minmax,
2011 },
2012 {
2013 .procname = "pipe-user-pages-soft",
2014 .data = &pipe_user_pages_soft,
2015 .maxlen = sizeof(pipe_user_pages_soft),
2016 .mode = 0644,
2017 .proc_handler = proc_doulongvec_minmax,
2018 },
2019 {
2020 .procname = "mount-max",
2021 .data = &sysctl_mount_max,
2022 .maxlen = sizeof(unsigned int),
2023 .mode = 0644,
2024 .proc_handler = proc_dointvec_minmax,
2025 .extra1 = SYSCTL_ONE,
2026 },
2027 { }
2028};
2029
2030static struct ctl_table debug_table[] = {
2031#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2032 {
2033 .procname = "exception-trace",
2034 .data = &show_unhandled_signals,
2035 .maxlen = sizeof(int),
2036 .mode = 0644,
2037 .proc_handler = proc_dointvec
2038 },
2039#endif
2040#if defined(CONFIG_OPTPROBES)
2041 {
2042 .procname = "kprobes-optimization",
2043 .data = &sysctl_kprobes_optimization,
2044 .maxlen = sizeof(int),
2045 .mode = 0644,
2046 .proc_handler = proc_kprobes_optimization_handler,
2047 .extra1 = SYSCTL_ZERO,
2048 .extra2 = SYSCTL_ONE,
2049 },
2050#endif
2051 { }
2052};
2053
2054static struct ctl_table dev_table[] = {
2055 { }
2056};
2057
2058int __init sysctl_init(void)
2059{
2060 struct ctl_table_header *hdr;
2061
2062 hdr = register_sysctl_table(sysctl_base_table);
2063 kmemleak_not_leak(hdr);
2064 return 0;
2065}
2066
2067#endif /* CONFIG_SYSCTL */
2068
2069/*
2070 * /proc/sys support
2071 */
2072
2073#ifdef CONFIG_PROC_SYSCTL
2074
2075static int _proc_do_string(char *data, int maxlen, int write,
2076 char __user *buffer,
2077 size_t *lenp, loff_t *ppos)
2078{
2079 size_t len;
2080 char __user *p;
2081 char c;
2082
2083 if (!data || !maxlen || !*lenp) {
2084 *lenp = 0;
2085 return 0;
2086 }
2087
2088 if (write) {
2089 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
2090 /* Only continue writes not past the end of buffer. */
2091 len = strlen(data);
2092 if (len > maxlen - 1)
2093 len = maxlen - 1;
2094
2095 if (*ppos > len)
2096 return 0;
2097 len = *ppos;
2098 } else {
2099 /* Start writing from beginning of buffer. */
2100 len = 0;
2101 }
2102
2103 *ppos += *lenp;
2104 p = buffer;
2105 while ((p - buffer) < *lenp && len < maxlen - 1) {
2106 if (get_user(c, p++))
2107 return -EFAULT;
2108 if (c == 0 || c == '\n')
2109 break;
2110 data[len++] = c;
2111 }
2112 data[len] = 0;
2113 } else {
2114 len = strlen(data);
2115 if (len > maxlen)
2116 len = maxlen;
2117
2118 if (*ppos > len) {
2119 *lenp = 0;
2120 return 0;
2121 }
2122
2123 data += *ppos;
2124 len -= *ppos;
2125
2126 if (len > *lenp)
2127 len = *lenp;
2128 if (len)
2129 if (copy_to_user(buffer, data, len))
2130 return -EFAULT;
2131 if (len < *lenp) {
2132 if (put_user('\n', buffer + len))
2133 return -EFAULT;
2134 len++;
2135 }
2136 *lenp = len;
2137 *ppos += len;
2138 }
2139 return 0;
2140}
2141
2142static void warn_sysctl_write(struct ctl_table *table)
2143{
2144 pr_warn_once("%s wrote to %s when file position was not 0!\n"
2145 "This will not be supported in the future. To silence this\n"
2146 "warning, set kernel.sysctl_writes_strict = -1\n",
2147 current->comm, table->procname);
2148}
2149
2150/**
2151 * proc_first_pos_non_zero_ignore - check if first position is allowed
2152 * @ppos: file position
2153 * @table: the sysctl table
2154 *
2155 * Returns true if the first position is non-zero and the sysctl_writes_strict
2156 * mode indicates this is not allowed for numeric input types. String proc
2157 * handlers can ignore the return value.
2158 */
2159static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2160 struct ctl_table *table)
2161{
2162 if (!*ppos)
2163 return false;
2164
2165 switch (sysctl_writes_strict) {
2166 case SYSCTL_WRITES_STRICT:
2167 return true;
2168 case SYSCTL_WRITES_WARN:
2169 warn_sysctl_write(table);
2170 return false;
2171 default:
2172 return false;
2173 }
2174}
2175
2176/**
2177 * proc_dostring - read a string sysctl
2178 * @table: the sysctl table
2179 * @write: %TRUE if this is a write to the sysctl file
2180 * @buffer: the user buffer
2181 * @lenp: the size of the user buffer
2182 * @ppos: file position
2183 *
2184 * Reads/writes a string from/to the user buffer. If the kernel
2185 * buffer provided is not large enough to hold the string, the
2186 * string is truncated. The copied string is %NULL-terminated.
2187 * If the string is being read by the user process, it is copied
2188 * and a newline '\n' is added. It is truncated if the buffer is
2189 * not large enough.
2190 *
2191 * Returns 0 on success.
2192 */
2193int proc_dostring(struct ctl_table *table, int write,
2194 void __user *buffer, size_t *lenp, loff_t *ppos)
2195{
2196 if (write)
2197 proc_first_pos_non_zero_ignore(ppos, table);
2198
2199 return _proc_do_string((char *)(table->data), table->maxlen, write,
2200 (char __user *)buffer, lenp, ppos);
2201}
2202
2203static void proc_skip_spaces(char **buf, size_t *size)
2204{
2205 while (*size) {
2206 if (!isspace(**buf))
2207 break;
2208 (*size)--;
2209 (*buf)++;
2210 }
2211}
2212
2213static void proc_skip_char(char **buf, size_t *size, const char v)
2214{
2215 while (*size) {
2216 if (**buf != v)
2217 break;
2218 (*size)--;
2219 (*buf)++;
2220 }
2221}
2222
2223/**
2224 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
2225 * fail on overflow
2226 *
2227 * @cp: kernel buffer containing the string to parse
2228 * @endp: pointer to store the trailing characters
2229 * @base: the base to use
2230 * @res: where the parsed integer will be stored
2231 *
2232 * In case of success 0 is returned and @res will contain the parsed integer,
2233 * @endp will hold any trailing characters.
2234 * This function will fail the parse on overflow. If there wasn't an overflow
2235 * the function will defer the decision what characters count as invalid to the
2236 * caller.
2237 */
2238static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
2239 unsigned long *res)
2240{
2241 unsigned long long result;
2242 unsigned int rv;
2243
2244 cp = _parse_integer_fixup_radix(cp, &base);
2245 rv = _parse_integer(cp, base, &result);
2246 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
2247 return -ERANGE;
2248
2249 cp += rv;
2250
2251 if (endp)
2252 *endp = (char *)cp;
2253
2254 *res = (unsigned long)result;
2255 return 0;
2256}
2257
2258#define TMPBUFLEN 22
2259/**
2260 * proc_get_long - reads an ASCII formatted integer from a user buffer
2261 *
2262 * @buf: a kernel buffer
2263 * @size: size of the kernel buffer
2264 * @val: this is where the number will be stored
2265 * @neg: set to %TRUE if number is negative
2266 * @perm_tr: a vector which contains the allowed trailers
2267 * @perm_tr_len: size of the perm_tr vector
2268 * @tr: pointer to store the trailer character
2269 *
2270 * In case of success %0 is returned and @buf and @size are updated with
2271 * the amount of bytes read. If @tr is non-NULL and a trailing
2272 * character exists (size is non-zero after returning from this
2273 * function), @tr is updated with the trailing character.
2274 */
2275static int proc_get_long(char **buf, size_t *size,
2276 unsigned long *val, bool *neg,
2277 const char *perm_tr, unsigned perm_tr_len, char *tr)
2278{
2279 char *p, tmp[TMPBUFLEN];
2280 ssize_t len = *size;
2281
2282 if (len <= 0)
2283 return -EINVAL;
2284
2285 if (len > TMPBUFLEN - 1)
2286 len = TMPBUFLEN - 1;
2287
2288 memcpy(tmp, *buf, len);
2289
2290 tmp[len] = 0;
2291 p = tmp;
2292 if (*p == '-' && *size > 1) {
2293 *neg = true;
2294 p++;
2295 } else
2296 *neg = false;
2297 if (!isdigit(*p))
2298 return -EINVAL;
2299
2300 if (strtoul_lenient(p, &p, 0, val))
2301 return -EINVAL;
2302
2303 len = p - tmp;
2304
2305 /* We don't know if the next char is whitespace thus we may accept
2306 * invalid integers (e.g. 1234...a) or two integers instead of one
2307 * (e.g. 123...1). So lets not allow such large numbers. */
2308 if (len == TMPBUFLEN - 1)
2309 return -EINVAL;
2310
2311 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2312 return -EINVAL;
2313
2314 if (tr && (len < *size))
2315 *tr = *p;
2316
2317 *buf += len;
2318 *size -= len;
2319
2320 return 0;
2321}
2322
2323/**
2324 * proc_put_long - converts an integer to a decimal ASCII formatted string
2325 *
2326 * @buf: the user buffer
2327 * @size: the size of the user buffer
2328 * @val: the integer to be converted
2329 * @neg: sign of the number, %TRUE for negative
2330 *
2331 * In case of success %0 is returned and @buf and @size are updated with
2332 * the amount of bytes written.
2333 */
2334static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2335 bool neg)
2336{
2337 int len;
2338 char tmp[TMPBUFLEN], *p = tmp;
2339
2340 sprintf(p, "%s%lu", neg ? "-" : "", val);
2341 len = strlen(tmp);
2342 if (len > *size)
2343 len = *size;
2344 if (copy_to_user(*buf, tmp, len))
2345 return -EFAULT;
2346 *size -= len;
2347 *buf += len;
2348 return 0;
2349}
2350#undef TMPBUFLEN
2351
2352static int proc_put_char(void __user **buf, size_t *size, char c)
2353{
2354 if (*size) {
2355 char __user **buffer = (char __user **)buf;
2356 if (put_user(c, *buffer))
2357 return -EFAULT;
2358 (*size)--, (*buffer)++;
2359 *buf = *buffer;
2360 }
2361 return 0;
2362}
2363
2364static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2365 int *valp,
2366 int write, void *data)
2367{
2368 if (write) {
2369 if (*negp) {
2370 if (*lvalp > (unsigned long) INT_MAX + 1)
2371 return -EINVAL;
2372 *valp = -*lvalp;
2373 } else {
2374 if (*lvalp > (unsigned long) INT_MAX)
2375 return -EINVAL;
2376 *valp = *lvalp;
2377 }
2378 } else {
2379 int val = *valp;
2380 if (val < 0) {
2381 *negp = true;
2382 *lvalp = -(unsigned long)val;
2383 } else {
2384 *negp = false;
2385 *lvalp = (unsigned long)val;
2386 }
2387 }
2388 return 0;
2389}
2390
2391static int do_proc_douintvec_conv(unsigned long *lvalp,
2392 unsigned int *valp,
2393 int write, void *data)
2394{
2395 if (write) {
2396 if (*lvalp > UINT_MAX)
2397 return -EINVAL;
2398 *valp = *lvalp;
2399 } else {
2400 unsigned int val = *valp;
2401 *lvalp = (unsigned long)val;
2402 }
2403 return 0;
2404}
2405
2406static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2407
2408static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2409 int write, void __user *buffer,
2410 size_t *lenp, loff_t *ppos,
2411 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2412 int write, void *data),
2413 void *data)
2414{
2415 int *i, vleft, first = 1, err = 0;
2416 size_t left;
2417 char *kbuf = NULL, *p;
2418
2419 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2420 *lenp = 0;
2421 return 0;
2422 }
2423
2424 i = (int *) tbl_data;
2425 vleft = table->maxlen / sizeof(*i);
2426 left = *lenp;
2427
2428 if (!conv)
2429 conv = do_proc_dointvec_conv;
2430
2431 if (write) {
2432 if (proc_first_pos_non_zero_ignore(ppos, table))
2433 goto out;
2434
2435 if (left > PAGE_SIZE - 1)
2436 left = PAGE_SIZE - 1;
2437 p = kbuf = memdup_user_nul(buffer, left);
2438 if (IS_ERR(kbuf))
2439 return PTR_ERR(kbuf);
2440 }
2441
2442 for (; left && vleft--; i++, first=0) {
2443 unsigned long lval;
2444 bool neg;
2445
2446 if (write) {
2447 proc_skip_spaces(&p, &left);
2448
2449 if (!left)
2450 break;
2451 err = proc_get_long(&p, &left, &lval, &neg,
2452 proc_wspace_sep,
2453 sizeof(proc_wspace_sep), NULL);
2454 if (err)
2455 break;
2456 if (conv(&neg, &lval, i, 1, data)) {
2457 err = -EINVAL;
2458 break;
2459 }
2460 } else {
2461 if (conv(&neg, &lval, i, 0, data)) {
2462 err = -EINVAL;
2463 break;
2464 }
2465 if (!first)
2466 err = proc_put_char(&buffer, &left, '\t');
2467 if (err)
2468 break;
2469 err = proc_put_long(&buffer, &left, lval, neg);
2470 if (err)
2471 break;
2472 }
2473 }
2474
2475 if (!write && !first && left && !err)
2476 err = proc_put_char(&buffer, &left, '\n');
2477 if (write && !err && left)
2478 proc_skip_spaces(&p, &left);
2479 if (write) {
2480 kfree(kbuf);
2481 if (first)
2482 return err ? : -EINVAL;
2483 }
2484 *lenp -= left;
2485out:
2486 *ppos += *lenp;
2487 return err;
2488}
2489
2490static int do_proc_dointvec(struct ctl_table *table, int write,
2491 void __user *buffer, size_t *lenp, loff_t *ppos,
2492 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2493 int write, void *data),
2494 void *data)
2495{
2496 return __do_proc_dointvec(table->data, table, write,
2497 buffer, lenp, ppos, conv, data);
2498}
2499
2500static int do_proc_douintvec_w(unsigned int *tbl_data,
2501 struct ctl_table *table,
2502 void __user *buffer,
2503 size_t *lenp, loff_t *ppos,
2504 int (*conv)(unsigned long *lvalp,
2505 unsigned int *valp,
2506 int write, void *data),
2507 void *data)
2508{
2509 unsigned long lval;
2510 int err = 0;
2511 size_t left;
2512 bool neg;
2513 char *kbuf = NULL, *p;
2514
2515 left = *lenp;
2516
2517 if (proc_first_pos_non_zero_ignore(ppos, table))
2518 goto bail_early;
2519
2520 if (left > PAGE_SIZE - 1)
2521 left = PAGE_SIZE - 1;
2522
2523 p = kbuf = memdup_user_nul(buffer, left);
2524 if (IS_ERR(kbuf))
2525 return -EINVAL;
2526
2527 proc_skip_spaces(&p, &left);
2528 if (!left) {
2529 err = -EINVAL;
2530 goto out_free;
2531 }
2532
2533 err = proc_get_long(&p, &left, &lval, &neg,
2534 proc_wspace_sep,
2535 sizeof(proc_wspace_sep), NULL);
2536 if (err || neg) {
2537 err = -EINVAL;
2538 goto out_free;
2539 }
2540
2541 if (conv(&lval, tbl_data, 1, data)) {
2542 err = -EINVAL;
2543 goto out_free;
2544 }
2545
2546 if (!err && left)
2547 proc_skip_spaces(&p, &left);
2548
2549out_free:
2550 kfree(kbuf);
2551 if (err)
2552 return -EINVAL;
2553
2554 return 0;
2555
2556 /* This is in keeping with old __do_proc_dointvec() */
2557bail_early:
2558 *ppos += *lenp;
2559 return err;
2560}
2561
2562static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2563 size_t *lenp, loff_t *ppos,
2564 int (*conv)(unsigned long *lvalp,
2565 unsigned int *valp,
2566 int write, void *data),
2567 void *data)
2568{
2569 unsigned long lval;
2570 int err = 0;
2571 size_t left;
2572
2573 left = *lenp;
2574
2575 if (conv(&lval, tbl_data, 0, data)) {
2576 err = -EINVAL;
2577 goto out;
2578 }
2579
2580 err = proc_put_long(&buffer, &left, lval, false);
2581 if (err || !left)
2582 goto out;
2583
2584 err = proc_put_char(&buffer, &left, '\n');
2585
2586out:
2587 *lenp -= left;
2588 *ppos += *lenp;
2589
2590 return err;
2591}
2592
2593static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2594 int write, void __user *buffer,
2595 size_t *lenp, loff_t *ppos,
2596 int (*conv)(unsigned long *lvalp,
2597 unsigned int *valp,
2598 int write, void *data),
2599 void *data)
2600{
2601 unsigned int *i, vleft;
2602
2603 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2604 *lenp = 0;
2605 return 0;
2606 }
2607
2608 i = (unsigned int *) tbl_data;
2609 vleft = table->maxlen / sizeof(*i);
2610
2611 /*
2612 * Arrays are not supported, keep this simple. *Do not* add
2613 * support for them.
2614 */
2615 if (vleft != 1) {
2616 *lenp = 0;
2617 return -EINVAL;
2618 }
2619
2620 if (!conv)
2621 conv = do_proc_douintvec_conv;
2622
2623 if (write)
2624 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2625 conv, data);
2626 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2627}
2628
2629static int do_proc_douintvec(struct ctl_table *table, int write,
2630 void __user *buffer, size_t *lenp, loff_t *ppos,
2631 int (*conv)(unsigned long *lvalp,
2632 unsigned int *valp,
2633 int write, void *data),
2634 void *data)
2635{
2636 return __do_proc_douintvec(table->data, table, write,
2637 buffer, lenp, ppos, conv, data);
2638}
2639
2640/**
2641 * proc_dointvec - read a vector of integers
2642 * @table: the sysctl table
2643 * @write: %TRUE if this is a write to the sysctl file
2644 * @buffer: the user buffer
2645 * @lenp: the size of the user buffer
2646 * @ppos: file position
2647 *
2648 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2649 * values from/to the user buffer, treated as an ASCII string.
2650 *
2651 * Returns 0 on success.
2652 */
2653int proc_dointvec(struct ctl_table *table, int write,
2654 void __user *buffer, size_t *lenp, loff_t *ppos)
2655{
2656 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2657}
2658
2659/**
2660 * proc_douintvec - read a vector of unsigned integers
2661 * @table: the sysctl table
2662 * @write: %TRUE if this is a write to the sysctl file
2663 * @buffer: the user buffer
2664 * @lenp: the size of the user buffer
2665 * @ppos: file position
2666 *
2667 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2668 * values from/to the user buffer, treated as an ASCII string.
2669 *
2670 * Returns 0 on success.
2671 */
2672int proc_douintvec(struct ctl_table *table, int write,
2673 void __user *buffer, size_t *lenp, loff_t *ppos)
2674{
2675 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2676 do_proc_douintvec_conv, NULL);
2677}
2678
2679/*
2680 * Taint values can only be increased
2681 * This means we can safely use a temporary.
2682 */
2683static int proc_taint(struct ctl_table *table, int write,
2684 void __user *buffer, size_t *lenp, loff_t *ppos)
2685{
2686 struct ctl_table t;
2687 unsigned long tmptaint = get_taint();
2688 int err;
2689
2690 if (write && !capable(CAP_SYS_ADMIN))
2691 return -EPERM;
2692
2693 t = *table;
2694 t.data = &tmptaint;
2695 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2696 if (err < 0)
2697 return err;
2698
2699 if (write) {
2700 /*
2701 * Poor man's atomic or. Not worth adding a primitive
2702 * to everyone's atomic.h for this
2703 */
2704 int i;
2705 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2706 if ((tmptaint >> i) & 1)
2707 add_taint(i, LOCKDEP_STILL_OK);
2708 }
2709 }
2710
2711 return err;
2712}
2713
2714#ifdef CONFIG_PRINTK
2715static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2716 void __user *buffer, size_t *lenp, loff_t *ppos)
2717{
2718 if (write && !capable(CAP_SYS_ADMIN))
2719 return -EPERM;
2720
2721 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2722}
2723#endif
2724
2725/**
2726 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2727 * @min: pointer to minimum allowable value
2728 * @max: pointer to maximum allowable value
2729 *
2730 * The do_proc_dointvec_minmax_conv_param structure provides the
2731 * minimum and maximum values for doing range checking for those sysctl
2732 * parameters that use the proc_dointvec_minmax() handler.
2733 */
2734struct do_proc_dointvec_minmax_conv_param {
2735 int *min;
2736 int *max;
2737};
2738
2739static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2740 int *valp,
2741 int write, void *data)
2742{
2743 int tmp, ret;
2744 struct do_proc_dointvec_minmax_conv_param *param = data;
2745 /*
2746 * If writing, first do so via a temporary local int so we can
2747 * bounds-check it before touching *valp.
2748 */
2749 int *ip = write ? &tmp : valp;
2750
2751 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
2752 if (ret)
2753 return ret;
2754
2755 if (write) {
2756 if ((param->min && *param->min > tmp) ||
2757 (param->max && *param->max < tmp))
2758 return -EINVAL;
2759 *valp = tmp;
2760 }
2761
2762 return 0;
2763}
2764
2765/**
2766 * proc_dointvec_minmax - read a vector of integers with min/max values
2767 * @table: the sysctl table
2768 * @write: %TRUE if this is a write to the sysctl file
2769 * @buffer: the user buffer
2770 * @lenp: the size of the user buffer
2771 * @ppos: file position
2772 *
2773 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2774 * values from/to the user buffer, treated as an ASCII string.
2775 *
2776 * This routine will ensure the values are within the range specified by
2777 * table->extra1 (min) and table->extra2 (max).
2778 *
2779 * Returns 0 on success or -EINVAL on write when the range check fails.
2780 */
2781int proc_dointvec_minmax(struct ctl_table *table, int write,
2782 void __user *buffer, size_t *lenp, loff_t *ppos)
2783{
2784 struct do_proc_dointvec_minmax_conv_param param = {
2785 .min = (int *) table->extra1,
2786 .max = (int *) table->extra2,
2787 };
2788 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2789 do_proc_dointvec_minmax_conv, &param);
2790}
2791
2792/**
2793 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2794 * @min: pointer to minimum allowable value
2795 * @max: pointer to maximum allowable value
2796 *
2797 * The do_proc_douintvec_minmax_conv_param structure provides the
2798 * minimum and maximum values for doing range checking for those sysctl
2799 * parameters that use the proc_douintvec_minmax() handler.
2800 */
2801struct do_proc_douintvec_minmax_conv_param {
2802 unsigned int *min;
2803 unsigned int *max;
2804};
2805
2806static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2807 unsigned int *valp,
2808 int write, void *data)
2809{
2810 int ret;
2811 unsigned int tmp;
2812 struct do_proc_douintvec_minmax_conv_param *param = data;
2813 /* write via temporary local uint for bounds-checking */
2814 unsigned int *up = write ? &tmp : valp;
2815
2816 ret = do_proc_douintvec_conv(lvalp, up, write, data);
2817 if (ret)
2818 return ret;
2819
2820 if (write) {
2821 if ((param->min && *param->min > tmp) ||
2822 (param->max && *param->max < tmp))
2823 return -ERANGE;
2824
2825 *valp = tmp;
2826 }
2827
2828 return 0;
2829}
2830
2831/**
2832 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2833 * @table: the sysctl table
2834 * @write: %TRUE if this is a write to the sysctl file
2835 * @buffer: the user buffer
2836 * @lenp: the size of the user buffer
2837 * @ppos: file position
2838 *
2839 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2840 * values from/to the user buffer, treated as an ASCII string. Negative
2841 * strings are not allowed.
2842 *
2843 * This routine will ensure the values are within the range specified by
2844 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2845 * check for UINT_MAX to avoid having to support wrap around uses from
2846 * userspace.
2847 *
2848 * Returns 0 on success or -ERANGE on write when the range check fails.
2849 */
2850int proc_douintvec_minmax(struct ctl_table *table, int write,
2851 void __user *buffer, size_t *lenp, loff_t *ppos)
2852{
2853 struct do_proc_douintvec_minmax_conv_param param = {
2854 .min = (unsigned int *) table->extra1,
2855 .max = (unsigned int *) table->extra2,
2856 };
2857 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2858 do_proc_douintvec_minmax_conv, &param);
2859}
2860
2861static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2862 unsigned int *valp,
2863 int write, void *data)
2864{
2865 if (write) {
2866 unsigned int val;
2867
2868 val = round_pipe_size(*lvalp);
2869 if (val == 0)
2870 return -EINVAL;
2871
2872 *valp = val;
2873 } else {
2874 unsigned int val = *valp;
2875 *lvalp = (unsigned long) val;
2876 }
2877
2878 return 0;
2879}
2880
2881static int proc_dopipe_max_size(struct ctl_table *table, int write,
2882 void __user *buffer, size_t *lenp, loff_t *ppos)
2883{
2884 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2885 do_proc_dopipe_max_size_conv, NULL);
2886}
2887
2888static void validate_coredump_safety(void)
2889{
2890#ifdef CONFIG_COREDUMP
2891 if (suid_dumpable == SUID_DUMP_ROOT &&
2892 core_pattern[0] != '/' && core_pattern[0] != '|') {
2893 printk(KERN_WARNING
2894"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2895"Pipe handler or fully qualified core dump path required.\n"
2896"Set kernel.core_pattern before fs.suid_dumpable.\n"
2897 );
2898 }
2899#endif
2900}
2901
2902static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2903 void __user *buffer, size_t *lenp, loff_t *ppos)
2904{
2905 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2906 if (!error)
2907 validate_coredump_safety();
2908 return error;
2909}
2910
2911#ifdef CONFIG_COREDUMP
2912static int proc_dostring_coredump(struct ctl_table *table, int write,
2913 void __user *buffer, size_t *lenp, loff_t *ppos)
2914{
2915 int error = proc_dostring(table, write, buffer, lenp, ppos);
2916 if (!error)
2917 validate_coredump_safety();
2918 return error;
2919}
2920#endif
2921
2922static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2923 void __user *buffer,
2924 size_t *lenp, loff_t *ppos,
2925 unsigned long convmul,
2926 unsigned long convdiv)
2927{
2928 unsigned long *i, *min, *max;
2929 int vleft, first = 1, err = 0;
2930 size_t left;
2931 char *kbuf = NULL, *p;
2932
2933 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2934 *lenp = 0;
2935 return 0;
2936 }
2937
2938 i = (unsigned long *) data;
2939 min = (unsigned long *) table->extra1;
2940 max = (unsigned long *) table->extra2;
2941 vleft = table->maxlen / sizeof(unsigned long);
2942 left = *lenp;
2943
2944 if (write) {
2945 if (proc_first_pos_non_zero_ignore(ppos, table))
2946 goto out;
2947
2948 if (left > PAGE_SIZE - 1)
2949 left = PAGE_SIZE - 1;
2950 p = kbuf = memdup_user_nul(buffer, left);
2951 if (IS_ERR(kbuf))
2952 return PTR_ERR(kbuf);
2953 }
2954
2955 for (; left && vleft--; i++, first = 0) {
2956 unsigned long val;
2957
2958 if (write) {
2959 bool neg;
2960
2961 proc_skip_spaces(&p, &left);
2962 if (!left)
2963 break;
2964
2965 err = proc_get_long(&p, &left, &val, &neg,
2966 proc_wspace_sep,
2967 sizeof(proc_wspace_sep), NULL);
2968 if (err)
2969 break;
2970 if (neg)
2971 continue;
2972 val = convmul * val / convdiv;
2973 if ((min && val < *min) || (max && val > *max)) {
2974 err = -EINVAL;
2975 break;
2976 }
2977 *i = val;
2978 } else {
2979 val = convdiv * (*i) / convmul;
2980 if (!first) {
2981 err = proc_put_char(&buffer, &left, '\t');
2982 if (err)
2983 break;
2984 }
2985 err = proc_put_long(&buffer, &left, val, false);
2986 if (err)
2987 break;
2988 }
2989 }
2990
2991 if (!write && !first && left && !err)
2992 err = proc_put_char(&buffer, &left, '\n');
2993 if (write && !err)
2994 proc_skip_spaces(&p, &left);
2995 if (write) {
2996 kfree(kbuf);
2997 if (first)
2998 return err ? : -EINVAL;
2999 }
3000 *lenp -= left;
3001out:
3002 *ppos += *lenp;
3003 return err;
3004}
3005
3006static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
3007 void __user *buffer,
3008 size_t *lenp, loff_t *ppos,
3009 unsigned long convmul,
3010 unsigned long convdiv)
3011{
3012 return __do_proc_doulongvec_minmax(table->data, table, write,
3013 buffer, lenp, ppos, convmul, convdiv);
3014}
3015
3016/**
3017 * proc_doulongvec_minmax - read a vector of long integers with min/max values
3018 * @table: the sysctl table
3019 * @write: %TRUE if this is a write to the sysctl file
3020 * @buffer: the user buffer
3021 * @lenp: the size of the user buffer
3022 * @ppos: file position
3023 *
3024 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
3025 * values from/to the user buffer, treated as an ASCII string.
3026 *
3027 * This routine will ensure the values are within the range specified by
3028 * table->extra1 (min) and table->extra2 (max).
3029 *
3030 * Returns 0 on success.
3031 */
3032int proc_doulongvec_minmax(struct ctl_table *table, int write,
3033 void __user *buffer, size_t *lenp, loff_t *ppos)
3034{
3035 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
3036}
3037
3038/**
3039 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
3040 * @table: the sysctl table
3041 * @write: %TRUE if this is a write to the sysctl file
3042 * @buffer: the user buffer
3043 * @lenp: the size of the user buffer
3044 * @ppos: file position
3045 *
3046 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
3047 * values from/to the user buffer, treated as an ASCII string. The values
3048 * are treated as milliseconds, and converted to jiffies when they are stored.
3049 *
3050 * This routine will ensure the values are within the range specified by
3051 * table->extra1 (min) and table->extra2 (max).
3052 *
3053 * Returns 0 on success.
3054 */
3055int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3056 void __user *buffer,
3057 size_t *lenp, loff_t *ppos)
3058{
3059 return do_proc_doulongvec_minmax(table, write, buffer,
3060 lenp, ppos, HZ, 1000l);
3061}
3062
3063
3064static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
3065 int *valp,
3066 int write, void *data)
3067{
3068 if (write) {
3069 if (*lvalp > INT_MAX / HZ)
3070 return 1;
3071 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
3072 } else {
3073 int val = *valp;
3074 unsigned long lval;
3075 if (val < 0) {
3076 *negp = true;
3077 lval = -(unsigned long)val;
3078 } else {
3079 *negp = false;
3080 lval = (unsigned long)val;
3081 }
3082 *lvalp = lval / HZ;
3083 }
3084 return 0;
3085}
3086
3087static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
3088 int *valp,
3089 int write, void *data)
3090{
3091 if (write) {
3092 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
3093 return 1;
3094 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
3095 } else {
3096 int val = *valp;
3097 unsigned long lval;
3098 if (val < 0) {
3099 *negp = true;
3100 lval = -(unsigned long)val;
3101 } else {
3102 *negp = false;
3103 lval = (unsigned long)val;
3104 }
3105 *lvalp = jiffies_to_clock_t(lval);
3106 }
3107 return 0;
3108}
3109
3110static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
3111 int *valp,
3112 int write, void *data)
3113{
3114 if (write) {
3115 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
3116
3117 if (jif > INT_MAX)
3118 return 1;
3119 *valp = (int)jif;
3120 } else {
3121 int val = *valp;
3122 unsigned long lval;
3123 if (val < 0) {
3124 *negp = true;
3125 lval = -(unsigned long)val;
3126 } else {
3127 *negp = false;
3128 lval = (unsigned long)val;
3129 }
3130 *lvalp = jiffies_to_msecs(lval);
3131 }
3132 return 0;
3133}
3134
3135/**
3136 * proc_dointvec_jiffies - read a vector of integers as seconds
3137 * @table: the sysctl table
3138 * @write: %TRUE if this is a write to the sysctl file
3139 * @buffer: the user buffer
3140 * @lenp: the size of the user buffer
3141 * @ppos: file position
3142 *
3143 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3144 * values from/to the user buffer, treated as an ASCII string.
3145 * The values read are assumed to be in seconds, and are converted into
3146 * jiffies.
3147 *
3148 * Returns 0 on success.
3149 */
3150int proc_dointvec_jiffies(struct ctl_table *table, int write,
3151 void __user *buffer, size_t *lenp, loff_t *ppos)
3152{
3153 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3154 do_proc_dointvec_jiffies_conv,NULL);
3155}
3156
3157/**
3158 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3159 * @table: the sysctl table
3160 * @write: %TRUE if this is a write to the sysctl file
3161 * @buffer: the user buffer
3162 * @lenp: the size of the user buffer
3163 * @ppos: pointer to the file position
3164 *
3165 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3166 * values from/to the user buffer, treated as an ASCII string.
3167 * The values read are assumed to be in 1/USER_HZ seconds, and
3168 * are converted into jiffies.
3169 *
3170 * Returns 0 on success.
3171 */
3172int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3173 void __user *buffer, size_t *lenp, loff_t *ppos)
3174{
3175 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3176 do_proc_dointvec_userhz_jiffies_conv,NULL);
3177}
3178
3179/**
3180 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3181 * @table: the sysctl table
3182 * @write: %TRUE if this is a write to the sysctl file
3183 * @buffer: the user buffer
3184 * @lenp: the size of the user buffer
3185 * @ppos: file position
3186 * @ppos: the current position in the file
3187 *
3188 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3189 * values from/to the user buffer, treated as an ASCII string.
3190 * The values read are assumed to be in 1/1000 seconds, and
3191 * are converted into jiffies.
3192 *
3193 * Returns 0 on success.
3194 */
3195int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3196 void __user *buffer, size_t *lenp, loff_t *ppos)
3197{
3198 return do_proc_dointvec(table, write, buffer, lenp, ppos,
3199 do_proc_dointvec_ms_jiffies_conv, NULL);
3200}
3201
3202static int proc_do_cad_pid(struct ctl_table *table, int write,
3203 void __user *buffer, size_t *lenp, loff_t *ppos)
3204{
3205 struct pid *new_pid;
3206 pid_t tmp;
3207 int r;
3208
3209 tmp = pid_vnr(cad_pid);
3210
3211 r = __do_proc_dointvec(&tmp, table, write, buffer,
3212 lenp, ppos, NULL, NULL);
3213 if (r || !write)
3214 return r;
3215
3216 new_pid = find_get_pid(tmp);
3217 if (!new_pid)
3218 return -ESRCH;
3219
3220 put_pid(xchg(&cad_pid, new_pid));
3221 return 0;
3222}
3223
3224/**
3225 * proc_do_large_bitmap - read/write from/to a large bitmap
3226 * @table: the sysctl table
3227 * @write: %TRUE if this is a write to the sysctl file
3228 * @buffer: the user buffer
3229 * @lenp: the size of the user buffer
3230 * @ppos: file position
3231 *
3232 * The bitmap is stored at table->data and the bitmap length (in bits)
3233 * in table->maxlen.
3234 *
3235 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3236 * large bitmaps may be represented in a compact manner. Writing into
3237 * the file will clear the bitmap then update it with the given input.
3238 *
3239 * Returns 0 on success.
3240 */
3241int proc_do_large_bitmap(struct ctl_table *table, int write,
3242 void __user *buffer, size_t *lenp, loff_t *ppos)
3243{
3244 int err = 0;
3245 bool first = 1;
3246 size_t left = *lenp;
3247 unsigned long bitmap_len = table->maxlen;
3248 unsigned long *bitmap = *(unsigned long **) table->data;
3249 unsigned long *tmp_bitmap = NULL;
3250 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3251
3252 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3253 *lenp = 0;
3254 return 0;
3255 }
3256
3257 if (write) {
3258 char *kbuf, *p;
3259 size_t skipped = 0;
3260
3261 if (left > PAGE_SIZE - 1) {
3262 left = PAGE_SIZE - 1;
3263 /* How much of the buffer we'll skip this pass */
3264 skipped = *lenp - left;
3265 }
3266
3267 p = kbuf = memdup_user_nul(buffer, left);
3268 if (IS_ERR(kbuf))
3269 return PTR_ERR(kbuf);
3270
3271 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
3272 if (!tmp_bitmap) {
3273 kfree(kbuf);
3274 return -ENOMEM;
3275 }
3276 proc_skip_char(&p, &left, '\n');
3277 while (!err && left) {
3278 unsigned long val_a, val_b;
3279 bool neg;
3280 size_t saved_left;
3281
3282 /* In case we stop parsing mid-number, we can reset */
3283 saved_left = left;
3284 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3285 sizeof(tr_a), &c);
3286 /*
3287 * If we consumed the entirety of a truncated buffer or
3288 * only one char is left (may be a "-"), then stop here,
3289 * reset, & come back for more.
3290 */
3291 if ((left <= 1) && skipped) {
3292 left = saved_left;
3293 break;
3294 }
3295
3296 if (err)
3297 break;
3298 if (val_a >= bitmap_len || neg) {
3299 err = -EINVAL;
3300 break;
3301 }
3302
3303 val_b = val_a;
3304 if (left) {
3305 p++;
3306 left--;
3307 }
3308
3309 if (c == '-') {
3310 err = proc_get_long(&p, &left, &val_b,
3311 &neg, tr_b, sizeof(tr_b),
3312 &c);
3313 /*
3314 * If we consumed all of a truncated buffer or
3315 * then stop here, reset, & come back for more.
3316 */
3317 if (!left && skipped) {
3318 left = saved_left;
3319 break;
3320 }
3321
3322 if (err)
3323 break;
3324 if (val_b >= bitmap_len || neg ||
3325 val_a > val_b) {
3326 err = -EINVAL;
3327 break;
3328 }
3329 if (left) {
3330 p++;
3331 left--;
3332 }
3333 }
3334
3335 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3336 first = 0;
3337 proc_skip_char(&p, &left, '\n');
3338 }
3339 kfree(kbuf);
3340 left += skipped;
3341 } else {
3342 unsigned long bit_a, bit_b = 0;
3343
3344 while (left) {
3345 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3346 if (bit_a >= bitmap_len)
3347 break;
3348 bit_b = find_next_zero_bit(bitmap, bitmap_len,
3349 bit_a + 1) - 1;
3350
3351 if (!first) {
3352 err = proc_put_char(&buffer, &left, ',');
3353 if (err)
3354 break;
3355 }
3356 err = proc_put_long(&buffer, &left, bit_a, false);
3357 if (err)
3358 break;
3359 if (bit_a != bit_b) {
3360 err = proc_put_char(&buffer, &left, '-');
3361 if (err)
3362 break;
3363 err = proc_put_long(&buffer, &left, bit_b, false);
3364 if (err)
3365 break;
3366 }
3367
3368 first = 0; bit_b++;
3369 }
3370 if (!err)
3371 err = proc_put_char(&buffer, &left, '\n');
3372 }
3373
3374 if (!err) {
3375 if (write) {
3376 if (*ppos)
3377 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3378 else
3379 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3380 }
3381 *lenp -= left;
3382 *ppos += *lenp;
3383 }
3384
3385 bitmap_free(tmp_bitmap);
3386 return err;
3387}
3388
3389#else /* CONFIG_PROC_SYSCTL */
3390
3391int proc_dostring(struct ctl_table *table, int write,
3392 void __user *buffer, size_t *lenp, loff_t *ppos)
3393{
3394 return -ENOSYS;
3395}
3396
3397int proc_dointvec(struct ctl_table *table, int write,
3398 void __user *buffer, size_t *lenp, loff_t *ppos)
3399{
3400 return -ENOSYS;
3401}
3402
3403int proc_douintvec(struct ctl_table *table, int write,
3404 void __user *buffer, size_t *lenp, loff_t *ppos)
3405{
3406 return -ENOSYS;
3407}
3408
3409int proc_dointvec_minmax(struct ctl_table *table, int write,
3410 void __user *buffer, size_t *lenp, loff_t *ppos)
3411{
3412 return -ENOSYS;
3413}
3414
3415int proc_douintvec_minmax(struct ctl_table *table, int write,
3416 void __user *buffer, size_t *lenp, loff_t *ppos)
3417{
3418 return -ENOSYS;
3419}
3420
3421int proc_dointvec_jiffies(struct ctl_table *table, int write,
3422 void __user *buffer, size_t *lenp, loff_t *ppos)
3423{
3424 return -ENOSYS;
3425}
3426
3427int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3428 void __user *buffer, size_t *lenp, loff_t *ppos)
3429{
3430 return -ENOSYS;
3431}
3432
3433int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3434 void __user *buffer, size_t *lenp, loff_t *ppos)
3435{
3436 return -ENOSYS;
3437}
3438
3439int proc_doulongvec_minmax(struct ctl_table *table, int write,
3440 void __user *buffer, size_t *lenp, loff_t *ppos)
3441{
3442 return -ENOSYS;
3443}
3444
3445int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3446 void __user *buffer,
3447 size_t *lenp, loff_t *ppos)
3448{
3449 return -ENOSYS;
3450}
3451
3452int proc_do_large_bitmap(struct ctl_table *table, int write,
3453 void __user *buffer, size_t *lenp, loff_t *ppos)
3454{
3455 return -ENOSYS;
3456}
3457
3458#endif /* CONFIG_PROC_SYSCTL */
3459
3460#if defined(CONFIG_SYSCTL)
3461int proc_do_static_key(struct ctl_table *table, int write,
3462 void __user *buffer, size_t *lenp,
3463 loff_t *ppos)
3464{
3465 struct static_key *key = (struct static_key *)table->data;
3466 static DEFINE_MUTEX(static_key_mutex);
3467 int val, ret;
3468 struct ctl_table tmp = {
3469 .data = &val,
3470 .maxlen = sizeof(val),
3471 .mode = table->mode,
3472 .extra1 = SYSCTL_ZERO,
3473 .extra2 = SYSCTL_ONE,
3474 };
3475
3476 if (write && !capable(CAP_SYS_ADMIN))
3477 return -EPERM;
3478
3479 mutex_lock(&static_key_mutex);
3480 val = static_key_enabled(key);
3481 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3482 if (write && !ret) {
3483 if (val)
3484 static_key_enable(key);
3485 else
3486 static_key_disable(key);
3487 }
3488 mutex_unlock(&static_key_mutex);
3489 return ret;
3490}
3491#endif
3492/*
3493 * No sense putting this after each symbol definition, twice,
3494 * exception granted :-)
3495 */
3496EXPORT_SYMBOL(proc_dointvec);
3497EXPORT_SYMBOL(proc_douintvec);
3498EXPORT_SYMBOL(proc_dointvec_jiffies);
3499EXPORT_SYMBOL(proc_dointvec_minmax);
3500EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3501EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3502EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3503EXPORT_SYMBOL(proc_dostring);
3504EXPORT_SYMBOL(proc_doulongvec_minmax);
3505EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3506EXPORT_SYMBOL(proc_do_large_bitmap);