Blame - marvell/linux/fs/proc/base.c - T108

blob: 669f9a27ff7678118b09f732dd3b483cbd944dec [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/fs/proc/base.c
				4	*
				5	* Copyright (C) 1991, 1992 Linus Torvalds
				6	*
				7	* proc base directory handling functions
				8	*
				9	* 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
				10	* Instead of using magical inumbers to determine the kind of object
				11	* we allocate and fill in-core inodes upon lookup. They don't even
				12	* go into icache. We cache the reference to task_struct upon lookup too.
				13	* Eventually it should become a filesystem in its own. We don't use the
				14	* rest of procfs anymore.
				15	*
				16	*
				17	* Changelog:
				18	* 17-Jan-2005
				19	* Allan Bezerra
				20	* Bruna Moreira <bruna.moreira@indt.org.br>
				21	* Edjard Mota <edjard.mota@indt.org.br>
				22	* Ilias Biris <ilias.biris@indt.org.br>
				23	* Mauricio Lin <mauricio.lin@indt.org.br>
				24	*
				25	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				26	*
				27	* A new process specific entry (smaps) included in /proc. It shows the
				28	* size of rss for each memory area. The maps entry lacks information
				29	* about physical memory size (rss) for each mapped file, i.e.,
				30	* rss information for executables and library files.
				31	* This additional information is useful for any tools that need to know
				32	* about physical memory consumption for a process specific library.
				33	*
				34	* Changelog:
				35	* 21-Feb-2005
				36	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				37	* Pud inclusion in the page table walking.
				38	*
				39	* ChangeLog:
				40	* 10-Mar-2005
				41	* 10LE Instituto Nokia de Tecnologia - INdT:
				42	* A better way to walks through the page table as suggested by Hugh Dickins.
				43	*
				44	* Simo Piiroinen <simo.piiroinen@nokia.com>:
				45	* Smaps information related to shared, private, clean and dirty pages.
				46	*
				47	* Paul Mundt <paul.mundt@nokia.com>:
				48	* Overall revision about smaps.
				49	*/
				50
				51	#include <linux/uaccess.h>
				52
				53	#include <linux/errno.h>
				54	#include <linux/time.h>
				55	#include <linux/proc_fs.h>
				56	#include <linux/stat.h>
				57	#include <linux/task_io_accounting_ops.h>
				58	#include <linux/init.h>
				59	#include <linux/capability.h>
				60	#include <linux/file.h>
				61	#include <linux/fdtable.h>
				62	#include <linux/generic-radix-tree.h>
				63	#include <linux/string.h>
				64	#include <linux/seq_file.h>
				65	#include <linux/namei.h>
				66	#include <linux/mnt_namespace.h>
				67	#include <linux/mm.h>
				68	#include <linux/swap.h>
				69	#include <linux/rcupdate.h>
				70	#include <linux/kallsyms.h>
				71	#include <linux/stacktrace.h>
				72	#include <linux/resource.h>
				73	#include <linux/module.h>
				74	#include <linux/mount.h>
				75	#include <linux/security.h>
				76	#include <linux/ptrace.h>
				77	#include <linux/tracehook.h>
				78	#include <linux/printk.h>
				79	#include <linux/cache.h>
				80	#include <linux/cgroup.h>
				81	#include <linux/cpuset.h>
				82	#include <linux/audit.h>
				83	#include <linux/poll.h>
				84	#include <linux/nsproxy.h>
				85	#include <linux/oom.h>
				86	#include <linux/elf.h>
				87	#include <linux/pid_namespace.h>
				88	#include <linux/user_namespace.h>
				89	#include <linux/fs_parser.h>
				90	#include <linux/fs_struct.h>
				91	#include <linux/slab.h>
				92	#include <linux/sched/autogroup.h>
				93	#include <linux/sched/mm.h>
				94	#include <linux/sched/coredump.h>
				95	#include <linux/sched/debug.h>
				96	#include <linux/sched/stat.h>
				97	#include <linux/posix-timers.h>
				98	#include <linux/cpufreq_times.h>
				99	#include <trace/events/oom.h>
				100	#include "internal.h"
				101	#include "fd.h"
				102
				103	#include "../../lib/kstrtox.h"
				104
				105	/* NOTE:
				106	* Implementing inode permission operations in /proc is almost
				107	* certainly an error. Permission checks need to happen during
				108	* each system call not at open time. The reason is that most of
				109	* what we wish to check for permissions in /proc varies at runtime.
				110	*
				111	* The classic example of a problem is opening file descriptors
				112	* in /proc for a task before it execs a suid executable.
				113	*/
				114
				115	static u8 nlink_tid __ro_after_init;
				116	static u8 nlink_tgid __ro_after_init;
				117
				118	enum proc_mem_force {
				119	PROC_MEM_FORCE_ALWAYS,
				120	PROC_MEM_FORCE_PTRACE,
				121	PROC_MEM_FORCE_NEVER
				122	};
				123
				124	static enum proc_mem_force proc_mem_force_override __ro_after_init =
				125	IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
				126	IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
				127	PROC_MEM_FORCE_ALWAYS;
				128
				129	static const struct constant_table proc_mem_force_table[] __initconst = {
				130	{ "always", PROC_MEM_FORCE_ALWAYS },
				131	{ "ptrace", PROC_MEM_FORCE_PTRACE },
				132	{ "never", PROC_MEM_FORCE_NEVER },
				133	{ }
				134	};
				135
				136	static int __init early_proc_mem_force_override(char *buf)
				137	{
				138	if (!buf)
				139	return -EINVAL;
				140
				141	/*
				142	* lookup_constant() defaults to proc_mem_force_override to preseve
				143	* the initial Kconfig choice in case an invalid param gets passed.
				144	*/
				145	proc_mem_force_override = lookup_constant(proc_mem_force_table,
				146	buf, proc_mem_force_override);
				147
				148	return 0;
				149	}
				150	early_param("proc_mem.force_override", early_proc_mem_force_override);
				151
				152	struct pid_entry {
				153	const char *name;
				154	unsigned int len;
				155	umode_t mode;
				156	const struct inode_operations *iop;
				157	const struct file_operations *fop;
				158	union proc_op op;
				159	};
				160
				161	#define NOD(NAME, MODE, IOP, FOP, OP) { \
				162	.name = (NAME), \
				163	.len = sizeof(NAME) - 1, \
				164	.mode = MODE, \
				165	.iop = IOP, \
				166	.fop = FOP, \
				167	.op = OP, \
				168	}
				169
				170	#define DIR(NAME, MODE, iops, fops) \
				171	NOD(NAME, (S_IFDIR\|(MODE)), &iops, &fops, {} )
				172	#define LNK(NAME, get_link) \
				173	NOD(NAME, (S_IFLNK\|S_IRWXUGO), \
				174	&proc_pid_link_inode_operations, NULL, \
				175	{ .proc_get_link = get_link } )
				176	#define REG(NAME, MODE, fops) \
				177	NOD(NAME, (S_IFREG\|(MODE)), NULL, &fops, {})
				178	#define ONE(NAME, MODE, show) \
				179	NOD(NAME, (S_IFREG\|(MODE)), \
				180	NULL, &proc_single_file_operations, \
				181	{ .proc_show = show } )
				182	#define ATTR(LSM, NAME, MODE) \
				183	NOD(NAME, (S_IFREG\|(MODE)), \
				184	NULL, &proc_pid_attr_operations, \
				185	{ .lsm = LSM })
				186
				187	/*
				188	* Count the number of hardlinks for the pid_entry table, excluding the .
				189	* and .. links.
				190	*/
				191	static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
				192	unsigned int n)
				193	{
				194	unsigned int i;
				195	unsigned int count;
				196
				197	count = 2;
				198	for (i = 0; i < n; ++i) {
				199	if (S_ISDIR(entries[i].mode))
				200	++count;
				201	}
				202
				203	return count;
				204	}
				205
				206	static int get_task_root(struct task_struct task, struct path root)
				207	{
				208	int result = -ENOENT;
				209
				210	task_lock(task);
				211	if (task->fs) {
				212	get_fs_root(task->fs, root);
				213	result = 0;
				214	}
				215	task_unlock(task);
				216	return result;
				217	}
				218
				219	static int proc_cwd_link(struct dentry dentry, struct path path)
				220	{
				221	struct task_struct *task = get_proc_task(d_inode(dentry));
				222	int result = -ENOENT;
				223
				224	if (task) {
				225	task_lock(task);
				226	if (task->fs) {
				227	get_fs_pwd(task->fs, path);
				228	result = 0;
				229	}
				230	task_unlock(task);
				231	put_task_struct(task);
				232	}
				233	return result;
				234	}
				235
				236	static int proc_root_link(struct dentry dentry, struct path path)
				237	{
				238	struct task_struct *task = get_proc_task(d_inode(dentry));
				239	int result = -ENOENT;
				240
				241	if (task) {
				242	result = get_task_root(task, path);
				243	put_task_struct(task);
				244	}
				245	return result;
				246	}
				247
				248	/*
				249	* If the user used setproctitle(), we just get the string from
				250	* user space at arg_start, and limit it to a maximum of one page.
				251	*/
				252	static ssize_t get_mm_proctitle(struct mm_struct mm, char __user buf,
				253	size_t count, unsigned long pos,
				254	unsigned long arg_start)
				255	{
				256	char *page;
				257	int ret, got;
				258
				259	if (pos >= PAGE_SIZE)
				260	return 0;
				261
				262	page = (char *)__get_free_page(GFP_KERNEL);
				263	if (!page)
				264	return -ENOMEM;
				265
				266	ret = 0;
				267	got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
				268	if (got > 0) {
				269	int len = strnlen(page, got);
				270
				271	/* Include the NUL character if it was found */
				272	if (len < got)
				273	len++;
				274
				275	if (len > pos) {
				276	len -= pos;
				277	if (len > count)
				278	len = count;
				279	len -= copy_to_user(buf, page+pos, len);
				280	if (!len)
				281	len = -EFAULT;
				282	ret = len;
				283	}
				284	}
				285	free_page((unsigned long)page);
				286	return ret;
				287	}
				288
				289	static ssize_t get_mm_cmdline(struct mm_struct mm, char __user buf,
				290	size_t count, loff_t *ppos)
				291	{
				292	unsigned long arg_start, arg_end, env_start, env_end;
				293	unsigned long pos, len;
				294	char *page, c;
				295
				296	/* Check if process spawned far enough to have cmdline. */
				297	if (!mm->env_end)
				298	return 0;
				299
				300	spin_lock(&mm->arg_lock);
				301	arg_start = mm->arg_start;
				302	arg_end = mm->arg_end;
				303	env_start = mm->env_start;
				304	env_end = mm->env_end;
				305	spin_unlock(&mm->arg_lock);
				306
				307	if (arg_start >= arg_end)
				308	return 0;
				309
				310	/*
				311	* We allow setproctitle() to overwrite the argument
				312	* strings, and overflow past the original end. But
				313	* only when it overflows into the environment area.
				314	*/
				315	if (env_start != arg_end \|\| env_end < env_start)
				316	env_start = env_end = arg_end;
				317	len = env_end - arg_start;
				318
				319	/* We're not going to care if "ppos" has high bits set /
				320	pos = *ppos;
				321	if (pos >= len)
				322	return 0;
				323	if (count > len - pos)
				324	count = len - pos;
				325	if (!count)
				326	return 0;
				327
				328	/*
				329	* Magical special case: if the argv[] end byte is not
				330	* zero, the user has overwritten it with setproctitle(3).
				331	*
				332	* Possible future enhancement: do this only once when
				333	* pos is 0, and set a flag in the 'struct file'.
				334	*/
				335	if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
				336	return get_mm_proctitle(mm, buf, count, pos, arg_start);
				337
				338	/*
				339	* For the non-setproctitle() case we limit things strictly
				340	* to the [arg_start, arg_end[ range.
				341	*/
				342	pos += arg_start;
				343	if (pos < arg_start \|\| pos >= arg_end)
				344	return 0;
				345	if (count > arg_end - pos)
				346	count = arg_end - pos;
				347
				348	page = (char *)__get_free_page(GFP_KERNEL);
				349	if (!page)
				350	return -ENOMEM;
				351
				352	len = 0;
				353	while (count) {
				354	int got;
				355	size_t size = min_t(size_t, PAGE_SIZE, count);
				356
				357	got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
				358	if (got <= 0)
				359	break;
				360	got -= copy_to_user(buf, page, got);
				361	if (unlikely(!got)) {
				362	if (!len)
				363	len = -EFAULT;
				364	break;
				365	}
				366	pos += got;
				367	buf += got;
				368	len += got;
				369	count -= got;
				370	}
				371
				372	free_page((unsigned long)page);
				373	return len;
				374	}
				375
				376	static ssize_t get_task_cmdline(struct task_struct tsk, char __user buf,
				377	size_t count, loff_t *pos)
				378	{
				379	struct mm_struct *mm;
				380	ssize_t ret;
				381
				382	mm = get_task_mm(tsk);
				383	if (!mm)
				384	return 0;
				385
				386	ret = get_mm_cmdline(mm, buf, count, pos);
				387	mmput(mm);
				388	return ret;
				389	}
				390
				391	static ssize_t proc_pid_cmdline_read(struct file file, char __user buf,
				392	size_t count, loff_t *pos)
				393	{
				394	struct task_struct *tsk;
				395	ssize_t ret;
				396
				397	BUG_ON(*pos < 0);
				398
				399	tsk = get_proc_task(file_inode(file));
				400	if (!tsk)
				401	return -ESRCH;
				402	ret = get_task_cmdline(tsk, buf, count, pos);
				403	put_task_struct(tsk);
				404	if (ret > 0)
				405	*pos += ret;
				406	return ret;
				407	}
				408
				409	static const struct file_operations proc_pid_cmdline_ops = {
				410	.read = proc_pid_cmdline_read,
				411	.llseek = generic_file_llseek,
				412	};
				413
				414	#ifdef CONFIG_KALLSYMS
				415	/*
				416	* Provides a wchan file via kallsyms in a proper one-value-per-file format.
				417	* Returns the resolved symbol. If that fails, simply return the address.
				418	*/
				419	static int proc_pid_wchan(struct seq_file m, struct pid_namespace ns,
				420	struct pid pid, struct task_struct task)
				421	{
				422	unsigned long wchan;
				423	char symname[KSYM_NAME_LEN];
				424
				425	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				426	goto print0;
				427
				428	wchan = get_wchan(task);
				429	if (wchan && !lookup_symbol_name(wchan, symname)) {
				430	seq_puts(m, symname);
				431	return 0;
				432	}
				433
				434	print0:
				435	seq_putc(m, '0');
				436	return 0;
				437	}
				438	#endif /* CONFIG_KALLSYMS */
				439
				440	static int lock_trace(struct task_struct *task)
				441	{
				442	int err = down_read_killable(&task->signal->exec_update_lock);
				443	if (err)
				444	return err;
				445	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
				446	up_read(&task->signal->exec_update_lock);
				447	return -EPERM;
				448	}
				449	return 0;
				450	}
				451
				452	static void unlock_trace(struct task_struct *task)
				453	{
				454	up_read(&task->signal->exec_update_lock);
				455	}
				456
				457	#ifdef CONFIG_STACKTRACE
				458
				459	#define MAX_STACK_TRACE_DEPTH 64
				460
				461	static int proc_pid_stack(struct seq_file m, struct pid_namespace ns,
				462	struct pid pid, struct task_struct task)
				463	{
				464	unsigned long *entries;
				465	int err;
				466
				467	/*
				468	* The ability to racily run the kernel stack unwinder on a running task
				469	* and then observe the unwinder output is scary; while it is useful for
				470	* debugging kernel issues, it can also allow an attacker to leak kernel
				471	* stack contents.
				472	* Doing this in a manner that is at least safe from races would require
				473	* some work to ensure that the remote task can not be scheduled; and
				474	* even then, this would still expose the unwinder as local attack
				475	* surface.
				476	* Therefore, this interface is restricted to root.
				477	*/
				478	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
				479	return -EACCES;
				480
				481	entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
				482	GFP_KERNEL);
				483	if (!entries)
				484	return -ENOMEM;
				485
				486	err = lock_trace(task);
				487	if (!err) {
				488	unsigned int i, nr_entries;
				489
				490	nr_entries = stack_trace_save_tsk(task, entries,
				491	MAX_STACK_TRACE_DEPTH, 0);
				492
				493	for (i = 0; i < nr_entries; i++) {
				494	seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
				495	}
				496
				497	unlock_trace(task);
				498	}
				499	kfree(entries);
				500
				501	return err;
				502	}
				503	#endif
				504
				505	#ifdef CONFIG_SCHED_INFO
				506	/*
				507	* Provides /proc/PID/schedstat
				508	*/
				509	static int proc_pid_schedstat(struct seq_file m, struct pid_namespace ns,
				510	struct pid pid, struct task_struct task)
				511	{
				512	if (unlikely(!sched_info_on()))
				513	seq_puts(m, "0 0 0\n");
				514	else
				515	seq_printf(m, "%llu %llu %lu\n",
				516	(unsigned long long)task->se.sum_exec_runtime,
				517	(unsigned long long)task->sched_info.run_delay,
				518	task->sched_info.pcount);
				519
				520	return 0;
				521	}
				522	#endif
				523
				524	#ifdef CONFIG_LATENCYTOP
				525	static int lstats_show_proc(struct seq_file m, void v)
				526	{
				527	int i;
				528	struct inode *inode = m->private;
				529	struct task_struct *task = get_proc_task(inode);
				530
				531	if (!task)
				532	return -ESRCH;
				533	seq_puts(m, "Latency Top version : v0.1\n");
				534	for (i = 0; i < LT_SAVECOUNT; i++) {
				535	struct latency_record *lr = &task->latency_record[i];
				536	if (lr->backtrace[0]) {
				537	int q;
				538	seq_printf(m, "%i %li %li",
				539	lr->count, lr->time, lr->max);
				540	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
				541	unsigned long bt = lr->backtrace[q];
				542
				543	if (!bt)
				544	break;
				545	seq_printf(m, " %ps", (void *)bt);
				546	}
				547	seq_putc(m, '\n');
				548	}
				549
				550	}
				551	put_task_struct(task);
				552	return 0;
				553	}
				554
				555	static int lstats_open(struct inode inode, struct file file)
				556	{
				557	return single_open(file, lstats_show_proc, inode);
				558	}
				559
				560	static ssize_t lstats_write(struct file file, const char __user buf,
				561	size_t count, loff_t *offs)
				562	{
				563	struct task_struct *task = get_proc_task(file_inode(file));
				564
				565	if (!task)
				566	return -ESRCH;
				567	clear_tsk_latency_tracing(task);
				568	put_task_struct(task);
				569
				570	return count;
				571	}
				572
				573	static const struct file_operations proc_lstats_operations = {
				574	.open = lstats_open,
				575	.read = seq_read,
				576	.write = lstats_write,
				577	.llseek = seq_lseek,
				578	.release = single_release,
				579	};
				580
				581	#endif
				582
				583	static int proc_oom_score(struct seq_file m, struct pid_namespace ns,
				584	struct pid pid, struct task_struct task)
				585	{
				586	unsigned long totalpages = totalram_pages() + total_swap_pages;
				587	unsigned long points = 0;
				588	long badness;
				589
				590	badness = oom_badness(task, totalpages);
				591	/*
				592	* Special case OOM_SCORE_ADJ_MIN for all others scale the
				593	* badness value into [0, 2000] range which we have been
				594	* exporting for a long time so userspace might depend on it.
				595	*/
				596	if (badness != LONG_MIN)
				597	points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
				598
				599	seq_printf(m, "%lu\n", points);
				600
				601	return 0;
				602	}
				603
				604	struct limit_names {
				605	const char *name;
				606	const char *unit;
				607	};
				608
				609	static const struct limit_names lnames[RLIM_NLIMITS] = {
				610	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
				611	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
				612	[RLIMIT_DATA] = {"Max data size", "bytes"},
				613	[RLIMIT_STACK] = {"Max stack size", "bytes"},
				614	[RLIMIT_CORE] = {"Max core file size", "bytes"},
				615	[RLIMIT_RSS] = {"Max resident set", "bytes"},
				616	[RLIMIT_NPROC] = {"Max processes", "processes"},
				617	[RLIMIT_NOFILE] = {"Max open files", "files"},
				618	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
				619	[RLIMIT_AS] = {"Max address space", "bytes"},
				620	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
				621	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
				622	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
				623	[RLIMIT_NICE] = {"Max nice priority", NULL},
				624	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
				625	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
				626	};
				627
				628	/* Display limits for a process */
				629	static int proc_pid_limits(struct seq_file m, struct pid_namespace ns,
				630	struct pid pid, struct task_struct task)
				631	{
				632	unsigned int i;
				633	unsigned long flags;
				634
				635	struct rlimit rlim[RLIM_NLIMITS];
				636
				637	if (!lock_task_sighand(task, &flags))
				638	return 0;
				639	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
				640	unlock_task_sighand(task, &flags);
				641
				642	/*
				643	* print the file header
				644	*/
				645	seq_puts(m, "Limit "
				646	"Soft Limit "
				647	"Hard Limit "
				648	"Units \n");
				649
				650	for (i = 0; i < RLIM_NLIMITS; i++) {
				651	if (rlim[i].rlim_cur == RLIM_INFINITY)
				652	seq_printf(m, "%-25s %-20s ",
				653	lnames[i].name, "unlimited");
				654	else
				655	seq_printf(m, "%-25s %-20lu ",
				656	lnames[i].name, rlim[i].rlim_cur);
				657
				658	if (rlim[i].rlim_max == RLIM_INFINITY)
				659	seq_printf(m, "%-20s ", "unlimited");
				660	else
				661	seq_printf(m, "%-20lu ", rlim[i].rlim_max);
				662
				663	if (lnames[i].unit)
				664	seq_printf(m, "%-10s\n", lnames[i].unit);
				665	else
				666	seq_putc(m, '\n');
				667	}
				668
				669	return 0;
				670	}
				671
				672	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				673	static int proc_pid_syscall(struct seq_file m, struct pid_namespace ns,
				674	struct pid pid, struct task_struct task)
				675	{
				676	struct syscall_info info;
				677	u64 *args = &info.data.args[0];
				678	int res;
				679
				680	res = lock_trace(task);
				681	if (res)
				682	return res;
				683
				684	if (task_current_syscall(task, &info))
				685	seq_puts(m, "running\n");
				686	else if (info.data.nr < 0)
				687	seq_printf(m, "%d 0x%llx 0x%llx\n",
				688	info.data.nr, info.sp, info.data.instruction_pointer);
				689	else
				690	seq_printf(m,
				691	"%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
				692	info.data.nr,
				693	args[0], args[1], args[2], args[3], args[4], args[5],
				694	info.sp, info.data.instruction_pointer);
				695	unlock_trace(task);
				696
				697	return 0;
				698	}
				699	#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
				700
				701	/************************************************************************/
				702	/* Here the fs part begins */
				703	/************************************************************************/
				704
				705	/* permission checks */
				706	static int proc_fd_access_allowed(struct inode *inode)
				707	{
				708	struct task_struct *task;
				709	int allowed = 0;
				710	/* Allow access to a task's file descriptors if it is us or we
				711	* may use ptrace attach to the process and find out that
				712	* information.
				713	*/
				714	task = get_proc_task(inode);
				715	if (task) {
				716	allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
				717	put_task_struct(task);
				718	}
				719	return allowed;
				720	}
				721
				722	int proc_setattr(struct dentry dentry, struct iattr attr)
				723	{
				724	int error;
				725	struct inode *inode = d_inode(dentry);
				726
				727	if (attr->ia_valid & ATTR_MODE)
				728	return -EPERM;
				729
				730	error = setattr_prepare(dentry, attr);
				731	if (error)
				732	return error;
				733
				734	setattr_copy(inode, attr);
				735	mark_inode_dirty(inode);
				736	return 0;
				737	}
				738
				739	/*
				740	* May current process learn task's sched/cmdline info (for hide_pid_min=1)
				741	* or euid/egid (for hide_pid_min=2)?
				742	*/
				743	static bool has_pid_permissions(struct pid_namespace *pid,
				744	struct task_struct *task,
				745	int hide_pid_min)
				746	{
				747	if (pid->hide_pid < hide_pid_min)
				748	return true;
				749	if (in_group_p(pid->pid_gid))
				750	return true;
				751	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
				752	}
				753
				754
				755	static int proc_pid_permission(struct inode *inode, int mask)
				756	{
				757	struct pid_namespace *pid = proc_pid_ns(inode);
				758	struct task_struct *task;
				759	bool has_perms;
				760
				761	task = get_proc_task(inode);
				762	if (!task)
				763	return -ESRCH;
				764	has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
				765	put_task_struct(task);
				766
				767	if (!has_perms) {
				768	if (pid->hide_pid == HIDEPID_INVISIBLE) {
				769	/*
				770	* Let's make getdents(), stat(), and open()
				771	* consistent with each other. If a process
				772	* may not stat() a file, it shouldn't be seen
				773	* in procfs at all.
				774	*/
				775	return -ENOENT;
				776	}
				777
				778	return -EPERM;
				779	}
				780	return generic_permission(inode, mask);
				781	}
				782
				783
				784
				785	static const struct inode_operations proc_def_inode_operations = {
				786	.setattr = proc_setattr,
				787	};
				788
				789	static int proc_single_show(struct seq_file m, void v)
				790	{
				791	struct inode *inode = m->private;
				792	struct pid_namespace *ns = proc_pid_ns(inode);
				793	struct pid *pid = proc_pid(inode);
				794	struct task_struct *task;
				795	int ret;
				796
				797	task = get_pid_task(pid, PIDTYPE_PID);
				798	if (!task)
				799	return -ESRCH;
				800
				801	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
				802
				803	put_task_struct(task);
				804	return ret;
				805	}
				806
				807	static int proc_single_open(struct inode inode, struct file filp)
				808	{
				809	return single_open(filp, proc_single_show, inode);
				810	}
				811
				812	static const struct file_operations proc_single_file_operations = {
				813	.open = proc_single_open,
				814	.read = seq_read,
				815	.llseek = seq_lseek,
				816	.release = single_release,
				817	};
				818
				819
				820	struct mm_struct proc_mem_open(struct inode inode, unsigned int mode)
				821	{
				822	struct task_struct *task = get_proc_task(inode);
				823	struct mm_struct *mm = ERR_PTR(-ESRCH);
				824
				825	if (task) {
				826	mm = mm_access(task, mode \| PTRACE_MODE_FSCREDS);
				827	put_task_struct(task);
				828
				829	if (!IS_ERR_OR_NULL(mm)) {
				830	/* ensure this mm_struct can't be freed */
				831	mmgrab(mm);
				832	/* but do not pin its memory */
				833	mmput(mm);
				834	}
				835	}
				836
				837	return mm;
				838	}
				839
				840	static int __mem_open(struct inode inode, struct file file, unsigned int mode)
				841	{
				842	struct mm_struct *mm = proc_mem_open(inode, mode);
				843
				844	if (IS_ERR(mm))
				845	return PTR_ERR(mm);
				846
				847	file->private_data = mm;
				848	return 0;
				849	}
				850
				851	static int mem_open(struct inode inode, struct file file)
				852	{
				853	int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
				854
				855	/* OK to pass negative loff_t, we can catch out-of-range */
				856	file->f_mode \|= FMODE_UNSIGNED_OFFSET;
				857
				858	return ret;
				859	}
				860
				861	static bool proc_mem_foll_force(struct file file, struct mm_struct mm)
				862	{
				863	struct task_struct *task;
				864	bool ptrace_active = false;
				865
				866	switch (proc_mem_force_override) {
				867	case PROC_MEM_FORCE_NEVER:
				868	return false;
				869	case PROC_MEM_FORCE_PTRACE:
				870	task = get_proc_task(file_inode(file));
				871	if (task) {
				872	ptrace_active = READ_ONCE(task->ptrace) &&
				873	READ_ONCE(task->mm) == mm &&
				874	READ_ONCE(task->parent) == current;
				875	put_task_struct(task);
				876	}
				877	return ptrace_active;
				878	default:
				879	return true;
				880	}
				881	}
				882
				883	static ssize_t mem_rw(struct file file, char __user buf,
				884	size_t count, loff_t *ppos, int write)
				885	{
				886	struct mm_struct *mm = file->private_data;
				887	unsigned long addr = *ppos;
				888	ssize_t copied;
				889	char *page;
				890	unsigned int flags;
				891
				892	if (!mm)
				893	return 0;
				894
				895	page = (char *)__get_free_page(GFP_KERNEL);
				896	if (!page)
				897	return -ENOMEM;
				898
				899	copied = 0;
				900	if (!mmget_not_zero(mm))
				901	goto free;
				902
				903	flags = write ? FOLL_WRITE : 0;
				904	if (proc_mem_foll_force(file, mm))
				905	flags \|= FOLL_FORCE;
				906
				907	while (count > 0) {
				908	size_t this_len = min_t(size_t, count, PAGE_SIZE);
				909
				910	if (write && copy_from_user(page, buf, this_len)) {
				911	copied = -EFAULT;
				912	break;
				913	}
				914
				915	this_len = access_remote_vm(mm, addr, page, this_len, flags);
				916	if (!this_len) {
				917	if (!copied)
				918	copied = -EIO;
				919	break;
				920	}
				921
				922	if (!write && copy_to_user(buf, page, this_len)) {
				923	copied = -EFAULT;
				924	break;
				925	}
				926
				927	buf += this_len;
				928	addr += this_len;
				929	copied += this_len;
				930	count -= this_len;
				931	}
				932	*ppos = addr;
				933
				934	mmput(mm);
				935	free:
				936	free_page((unsigned long) page);
				937	return copied;
				938	}
				939
				940	static ssize_t mem_read(struct file file, char __user buf,
				941	size_t count, loff_t *ppos)
				942	{
				943	return mem_rw(file, buf, count, ppos, 0);
				944	}
				945
				946	static ssize_t mem_write(struct file file, const char __user buf,
				947	size_t count, loff_t *ppos)
				948	{
				949	return mem_rw(file, (char __user*)buf, count, ppos, 1);
				950	}
				951
				952	loff_t mem_lseek(struct file *file, loff_t offset, int orig)
				953	{
				954	switch (orig) {
				955	case 0:
				956	file->f_pos = offset;
				957	break;
				958	case 1:
				959	file->f_pos += offset;
				960	break;
				961	default:
				962	return -EINVAL;
				963	}
				964	force_successful_syscall_return();
				965	return file->f_pos;
				966	}
				967
				968	static int mem_release(struct inode inode, struct file file)
				969	{
				970	struct mm_struct *mm = file->private_data;
				971	if (mm)
				972	mmdrop(mm);
				973	return 0;
				974	}
				975
				976	static const struct file_operations proc_mem_operations = {
				977	.llseek = mem_lseek,
				978	.read = mem_read,
				979	.write = mem_write,
				980	.open = mem_open,
				981	.release = mem_release,
				982	};
				983
				984	static int environ_open(struct inode inode, struct file file)
				985	{
				986	return __mem_open(inode, file, PTRACE_MODE_READ);
				987	}
				988
				989	static ssize_t environ_read(struct file file, char __user buf,
				990	size_t count, loff_t *ppos)
				991	{
				992	char *page;
				993	unsigned long src = *ppos;
				994	int ret = 0;
				995	struct mm_struct *mm = file->private_data;
				996	unsigned long env_start, env_end;
				997
				998	/* Ensure the process spawned far enough to have an environment. */
				999	if (!mm \|\| !mm->env_end)
				1000	return 0;
				1001
				1002	page = (char *)__get_free_page(GFP_KERNEL);
				1003	if (!page)
				1004	return -ENOMEM;
				1005
				1006	ret = 0;
				1007	if (!mmget_not_zero(mm))
				1008	goto free;
				1009
				1010	spin_lock(&mm->arg_lock);
				1011	env_start = mm->env_start;
				1012	env_end = mm->env_end;
				1013	spin_unlock(&mm->arg_lock);
				1014
				1015	while (count > 0) {
				1016	size_t this_len, max_len;
				1017	int retval;
				1018
				1019	if (src >= (env_end - env_start))
				1020	break;
				1021
				1022	this_len = env_end - (env_start + src);
				1023
				1024	max_len = min_t(size_t, PAGE_SIZE, count);
				1025	this_len = min(max_len, this_len);
				1026
				1027	retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
				1028
				1029	if (retval <= 0) {
				1030	ret = retval;
				1031	break;
				1032	}
				1033
				1034	if (copy_to_user(buf, page, retval)) {
				1035	ret = -EFAULT;
				1036	break;
				1037	}
				1038
				1039	ret += retval;
				1040	src += retval;
				1041	buf += retval;
				1042	count -= retval;
				1043	}
				1044	*ppos = src;
				1045	mmput(mm);
				1046
				1047	free:
				1048	free_page((unsigned long) page);
				1049	return ret;
				1050	}
				1051
				1052	static const struct file_operations proc_environ_operations = {
				1053	.open = environ_open,
				1054	.read = environ_read,
				1055	.llseek = generic_file_llseek,
				1056	.release = mem_release,
				1057	};
				1058
				1059	static int auxv_open(struct inode inode, struct file file)
				1060	{
				1061	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
				1062	}
				1063
				1064	static ssize_t auxv_read(struct file file, char __user buf,
				1065	size_t count, loff_t *ppos)
				1066	{
				1067	struct mm_struct *mm = file->private_data;
				1068	unsigned int nwords = 0;
				1069
				1070	if (!mm)
				1071	return 0;
				1072	do {
				1073	nwords += 2;
				1074	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
				1075	return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
				1076	nwords * sizeof(mm->saved_auxv[0]));
				1077	}
				1078
				1079	static const struct file_operations proc_auxv_operations = {
				1080	.open = auxv_open,
				1081	.read = auxv_read,
				1082	.llseek = generic_file_llseek,
				1083	.release = mem_release,
				1084	};
				1085
				1086	static ssize_t oom_adj_read(struct file file, char __user buf, size_t count,
				1087	loff_t *ppos)
				1088	{
				1089	struct task_struct *task = get_proc_task(file_inode(file));
				1090	char buffer[PROC_NUMBUF];
				1091	int oom_adj = OOM_ADJUST_MIN;
				1092	size_t len;
				1093
				1094	if (!task)
				1095	return -ESRCH;
				1096	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
				1097	oom_adj = OOM_ADJUST_MAX;
				1098	else
				1099	oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
				1100	OOM_SCORE_ADJ_MAX;
				1101	put_task_struct(task);
				1102	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
				1103	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1104	}
				1105
				1106	static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
				1107	{
				1108	struct mm_struct *mm = NULL;
				1109	struct task_struct *task;
				1110	int err = 0;
				1111
				1112	task = get_proc_task(file_inode(file));
				1113	if (!task)
				1114	return -ESRCH;
				1115
				1116	mutex_lock(&oom_adj_mutex);
				1117	if (legacy) {
				1118	if (oom_adj < task->signal->oom_score_adj &&
				1119	!capable(CAP_SYS_RESOURCE)) {
				1120	err = -EACCES;
				1121	goto err_unlock;
				1122	}
				1123	/*
				1124	* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
				1125	* /proc/pid/oom_score_adj instead.
				1126	*/
				1127	pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
				1128	current->comm, task_pid_nr(current), task_pid_nr(task),
				1129	task_pid_nr(task));
				1130	} else {
				1131	if ((short)oom_adj < task->signal->oom_score_adj_min &&
				1132	!capable(CAP_SYS_RESOURCE)) {
				1133	err = -EACCES;
				1134	goto err_unlock;
				1135	}
				1136	}
				1137
				1138	/*
				1139	* Make sure we will check other processes sharing the mm if this is
				1140	* not vfrok which wants its own oom_score_adj.
				1141	* pin the mm so it doesn't go away and get reused after task_unlock
				1142	*/
				1143	if (!task->vfork_done) {
				1144	struct task_struct *p = find_lock_task_mm(task);
				1145
				1146	if (p) {
				1147	if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
				1148	mm = p->mm;
				1149	mmgrab(mm);
				1150	}
				1151	task_unlock(p);
				1152	}
				1153	}
				1154
				1155	task->signal->oom_score_adj = oom_adj;
				1156	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1157	task->signal->oom_score_adj_min = (short)oom_adj;
				1158	trace_oom_score_adj_update(task);
				1159
				1160	if (mm) {
				1161	struct task_struct *p;
				1162
				1163	rcu_read_lock();
				1164	for_each_process(p) {
				1165	if (same_thread_group(task, p))
				1166	continue;
				1167
				1168	/* do not touch kernel threads or the global init */
				1169	if (p->flags & PF_KTHREAD \|\| is_global_init(p))
				1170	continue;
				1171
				1172	task_lock(p);
				1173	if (!p->vfork_done && process_shares_mm(p, mm)) {
				1174	p->signal->oom_score_adj = oom_adj;
				1175	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1176	p->signal->oom_score_adj_min = (short)oom_adj;
				1177	}
				1178	task_unlock(p);
				1179	}
				1180	rcu_read_unlock();
				1181	mmdrop(mm);
				1182	}
				1183	err_unlock:
				1184	mutex_unlock(&oom_adj_mutex);
				1185	put_task_struct(task);
				1186	return err;
				1187	}
				1188
				1189	/*
				1190	* /proc/pid/oom_adj exists solely for backwards compatibility with previous
				1191	* kernels. The effective policy is defined by oom_score_adj, which has a
				1192	* different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
				1193	* Values written to oom_adj are simply mapped linearly to oom_score_adj.
				1194	* Processes that become oom disabled via oom_adj will still be oom disabled
				1195	* with this implementation.
				1196	*
				1197	* oom_adj cannot be removed since existing userspace binaries use it.
				1198	*/
				1199	static ssize_t oom_adj_write(struct file file, const char __user buf,
				1200	size_t count, loff_t *ppos)
				1201	{
				1202	char buffer[PROC_NUMBUF];
				1203	int oom_adj;
				1204	int err;
				1205
				1206	memset(buffer, 0, sizeof(buffer));
				1207	if (count > sizeof(buffer) - 1)
				1208	count = sizeof(buffer) - 1;
				1209	if (copy_from_user(buffer, buf, count)) {
				1210	err = -EFAULT;
				1211	goto out;
				1212	}
				1213
				1214	err = kstrtoint(strstrip(buffer), 0, &oom_adj);
				1215	if (err)
				1216	goto out;
				1217	if ((oom_adj < OOM_ADJUST_MIN \|\| oom_adj > OOM_ADJUST_MAX) &&
				1218	oom_adj != OOM_DISABLE) {
				1219	err = -EINVAL;
				1220	goto out;
				1221	}
				1222
				1223	/*
				1224	* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
				1225	* value is always attainable.
				1226	*/
				1227	if (oom_adj == OOM_ADJUST_MAX)
				1228	oom_adj = OOM_SCORE_ADJ_MAX;
				1229	else
				1230	oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
				1231
				1232	err = __set_oom_adj(file, oom_adj, true);
				1233	out:
				1234	return err < 0 ? err : count;
				1235	}
				1236
				1237	static const struct file_operations proc_oom_adj_operations = {
				1238	.read = oom_adj_read,
				1239	.write = oom_adj_write,
				1240	.llseek = generic_file_llseek,
				1241	};
				1242
				1243	static ssize_t oom_score_adj_read(struct file file, char __user buf,
				1244	size_t count, loff_t *ppos)
				1245	{
				1246	struct task_struct *task = get_proc_task(file_inode(file));
				1247	char buffer[PROC_NUMBUF];
				1248	short oom_score_adj = OOM_SCORE_ADJ_MIN;
				1249	size_t len;
				1250
				1251	if (!task)
				1252	return -ESRCH;
				1253	oom_score_adj = task->signal->oom_score_adj;
				1254	put_task_struct(task);
				1255	len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
				1256	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1257	}
				1258
				1259	static ssize_t oom_score_adj_write(struct file file, const char __user buf,
				1260	size_t count, loff_t *ppos)
				1261	{
				1262	char buffer[PROC_NUMBUF];
				1263	int oom_score_adj;
				1264	int err;
				1265
				1266	memset(buffer, 0, sizeof(buffer));
				1267	if (count > sizeof(buffer) - 1)
				1268	count = sizeof(buffer) - 1;
				1269	if (copy_from_user(buffer, buf, count)) {
				1270	err = -EFAULT;
				1271	goto out;
				1272	}
				1273
				1274	err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
				1275	if (err)
				1276	goto out;
				1277	if (oom_score_adj < OOM_SCORE_ADJ_MIN \|\|
				1278	oom_score_adj > OOM_SCORE_ADJ_MAX) {
				1279	err = -EINVAL;
				1280	goto out;
				1281	}
				1282
				1283	err = __set_oom_adj(file, oom_score_adj, false);
				1284	out:
				1285	return err < 0 ? err : count;
				1286	}
				1287
				1288	static const struct file_operations proc_oom_score_adj_operations = {
				1289	.read = oom_score_adj_read,
				1290	.write = oom_score_adj_write,
				1291	.llseek = default_llseek,
				1292	};
				1293
				1294	#ifdef CONFIG_AUDIT
				1295	#define TMPBUFLEN 11
				1296	static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
				1297	size_t count, loff_t *ppos)
				1298	{
				1299	struct inode * inode = file_inode(file);
				1300	struct task_struct *task = get_proc_task(inode);
				1301	ssize_t length;
				1302	char tmpbuf[TMPBUFLEN];
				1303
				1304	if (!task)
				1305	return -ESRCH;
				1306	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1307	from_kuid(file->f_cred->user_ns,
				1308	audit_get_loginuid(task)));
				1309	put_task_struct(task);
				1310	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1311	}
				1312
				1313	static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
				1314	size_t count, loff_t *ppos)
				1315	{
				1316	struct inode * inode = file_inode(file);
				1317	uid_t loginuid;
				1318	kuid_t kloginuid;
				1319	int rv;
				1320
				1321	rcu_read_lock();
				1322	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
				1323	rcu_read_unlock();
				1324	return -EPERM;
				1325	}
				1326	rcu_read_unlock();
				1327
				1328	if (*ppos != 0) {
				1329	/* No partial writes. */
				1330	return -EINVAL;
				1331	}
				1332
				1333	rv = kstrtou32_from_user(buf, count, 10, &loginuid);
				1334	if (rv < 0)
				1335	return rv;
				1336
				1337	/* is userspace tring to explicitly UNSET the loginuid? */
				1338	if (loginuid == AUDIT_UID_UNSET) {
				1339	kloginuid = INVALID_UID;
				1340	} else {
				1341	kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
				1342	if (!uid_valid(kloginuid))
				1343	return -EINVAL;
				1344	}
				1345
				1346	rv = audit_set_loginuid(kloginuid);
				1347	if (rv < 0)
				1348	return rv;
				1349	return count;
				1350	}
				1351
				1352	static const struct file_operations proc_loginuid_operations = {
				1353	.read = proc_loginuid_read,
				1354	.write = proc_loginuid_write,
				1355	.llseek = generic_file_llseek,
				1356	};
				1357
				1358	static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
				1359	size_t count, loff_t *ppos)
				1360	{
				1361	struct inode * inode = file_inode(file);
				1362	struct task_struct *task = get_proc_task(inode);
				1363	ssize_t length;
				1364	char tmpbuf[TMPBUFLEN];
				1365
				1366	if (!task)
				1367	return -ESRCH;
				1368	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1369	audit_get_sessionid(task));
				1370	put_task_struct(task);
				1371	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1372	}
				1373
				1374	static const struct file_operations proc_sessionid_operations = {
				1375	.read = proc_sessionid_read,
				1376	.llseek = generic_file_llseek,
				1377	};
				1378	#endif
				1379
				1380	#ifdef CONFIG_FAULT_INJECTION
				1381	static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
				1382	size_t count, loff_t *ppos)
				1383	{
				1384	struct task_struct *task = get_proc_task(file_inode(file));
				1385	char buffer[PROC_NUMBUF];
				1386	size_t len;
				1387	int make_it_fail;
				1388
				1389	if (!task)
				1390	return -ESRCH;
				1391	make_it_fail = task->make_it_fail;
				1392	put_task_struct(task);
				1393
				1394	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
				1395
				1396	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1397	}
				1398
				1399	static ssize_t proc_fault_inject_write(struct file * file,
				1400	const char __user * buf, size_t count, loff_t *ppos)
				1401	{
				1402	struct task_struct *task;
				1403	char buffer[PROC_NUMBUF];
				1404	int make_it_fail;
				1405	int rv;
				1406
				1407	if (!capable(CAP_SYS_RESOURCE))
				1408	return -EPERM;
				1409	memset(buffer, 0, sizeof(buffer));
				1410	if (count > sizeof(buffer) - 1)
				1411	count = sizeof(buffer) - 1;
				1412	if (copy_from_user(buffer, buf, count))
				1413	return -EFAULT;
				1414	rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
				1415	if (rv < 0)
				1416	return rv;
				1417	if (make_it_fail < 0 \|\| make_it_fail > 1)
				1418	return -EINVAL;
				1419
				1420	task = get_proc_task(file_inode(file));
				1421	if (!task)
				1422	return -ESRCH;
				1423	task->make_it_fail = make_it_fail;
				1424	put_task_struct(task);
				1425
				1426	return count;
				1427	}
				1428
				1429	static const struct file_operations proc_fault_inject_operations = {
				1430	.read = proc_fault_inject_read,
				1431	.write = proc_fault_inject_write,
				1432	.llseek = generic_file_llseek,
				1433	};
				1434
				1435	static ssize_t proc_fail_nth_write(struct file file, const char __user buf,
				1436	size_t count, loff_t *ppos)
				1437	{
				1438	struct task_struct *task;
				1439	int err;
				1440	unsigned int n;
				1441
				1442	err = kstrtouint_from_user(buf, count, 0, &n);
				1443	if (err)
				1444	return err;
				1445
				1446	task = get_proc_task(file_inode(file));
				1447	if (!task)
				1448	return -ESRCH;
				1449	task->fail_nth = n;
				1450	put_task_struct(task);
				1451
				1452	return count;
				1453	}
				1454
				1455	static ssize_t proc_fail_nth_read(struct file file, char __user buf,
				1456	size_t count, loff_t *ppos)
				1457	{
				1458	struct task_struct *task;
				1459	char numbuf[PROC_NUMBUF];
				1460	ssize_t len;
				1461
				1462	task = get_proc_task(file_inode(file));
				1463	if (!task)
				1464	return -ESRCH;
				1465	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
				1466	put_task_struct(task);
				1467	return simple_read_from_buffer(buf, count, ppos, numbuf, len);
				1468	}
				1469
				1470	static const struct file_operations proc_fail_nth_operations = {
				1471	.read = proc_fail_nth_read,
				1472	.write = proc_fail_nth_write,
				1473	};
				1474	#endif
				1475
				1476
				1477	#ifdef CONFIG_SCHED_DEBUG
				1478	/*
				1479	* Print out various scheduling related per-task fields:
				1480	*/
				1481	static int sched_show(struct seq_file m, void v)
				1482	{
				1483	struct inode *inode = m->private;
				1484	struct pid_namespace *ns = proc_pid_ns(inode);
				1485	struct task_struct *p;
				1486
				1487	p = get_proc_task(inode);
				1488	if (!p)
				1489	return -ESRCH;
				1490	proc_sched_show_task(p, ns, m);
				1491
				1492	put_task_struct(p);
				1493
				1494	return 0;
				1495	}
				1496
				1497	static ssize_t
				1498	sched_write(struct file file, const char __user buf,
				1499	size_t count, loff_t *offset)
				1500	{
				1501	struct inode *inode = file_inode(file);
				1502	struct task_struct *p;
				1503
				1504	p = get_proc_task(inode);
				1505	if (!p)
				1506	return -ESRCH;
				1507	proc_sched_set_task(p);
				1508
				1509	put_task_struct(p);
				1510
				1511	return count;
				1512	}
				1513
				1514	static int sched_open(struct inode inode, struct file filp)
				1515	{
				1516	return single_open(filp, sched_show, inode);
				1517	}
				1518
				1519	static const struct file_operations proc_pid_sched_operations = {
				1520	.open = sched_open,
				1521	.read = seq_read,
				1522	.write = sched_write,
				1523	.llseek = seq_lseek,
				1524	.release = single_release,
				1525	};
				1526
				1527	#endif
				1528
				1529	#ifdef CONFIG_SCHED_AUTOGROUP
				1530	/*
				1531	* Print out autogroup related information:
				1532	*/
				1533	static int sched_autogroup_show(struct seq_file m, void v)
				1534	{
				1535	struct inode *inode = m->private;
				1536	struct task_struct *p;
				1537
				1538	p = get_proc_task(inode);
				1539	if (!p)
				1540	return -ESRCH;
				1541	proc_sched_autogroup_show_task(p, m);
				1542
				1543	put_task_struct(p);
				1544
				1545	return 0;
				1546	}
				1547
				1548	static ssize_t
				1549	sched_autogroup_write(struct file file, const char __user buf,
				1550	size_t count, loff_t *offset)
				1551	{
				1552	struct inode *inode = file_inode(file);
				1553	struct task_struct *p;
				1554	char buffer[PROC_NUMBUF];
				1555	int nice;
				1556	int err;
				1557
				1558	memset(buffer, 0, sizeof(buffer));
				1559	if (count > sizeof(buffer) - 1)
				1560	count = sizeof(buffer) - 1;
				1561	if (copy_from_user(buffer, buf, count))
				1562	return -EFAULT;
				1563
				1564	err = kstrtoint(strstrip(buffer), 0, &nice);
				1565	if (err < 0)
				1566	return err;
				1567
				1568	p = get_proc_task(inode);
				1569	if (!p)
				1570	return -ESRCH;
				1571
				1572	err = proc_sched_autogroup_set_nice(p, nice);
				1573	if (err)
				1574	count = err;
				1575
				1576	put_task_struct(p);
				1577
				1578	return count;
				1579	}
				1580
				1581	static int sched_autogroup_open(struct inode inode, struct file filp)
				1582	{
				1583	int ret;
				1584
				1585	ret = single_open(filp, sched_autogroup_show, NULL);
				1586	if (!ret) {
				1587	struct seq_file *m = filp->private_data;
				1588
				1589	m->private = inode;
				1590	}
				1591	return ret;
				1592	}
				1593
				1594	static const struct file_operations proc_pid_sched_autogroup_operations = {
				1595	.open = sched_autogroup_open,
				1596	.read = seq_read,
				1597	.write = sched_autogroup_write,
				1598	.llseek = seq_lseek,
				1599	.release = single_release,
				1600	};
				1601
				1602	#endif /* CONFIG_SCHED_AUTOGROUP */
				1603
				1604	static ssize_t comm_write(struct file file, const char __user buf,
				1605	size_t count, loff_t *offset)
				1606	{
				1607	struct inode *inode = file_inode(file);
				1608	struct task_struct *p;
				1609	char buffer[TASK_COMM_LEN];
				1610	const size_t maxlen = sizeof(buffer) - 1;
				1611
				1612	memset(buffer, 0, sizeof(buffer));
				1613	if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
				1614	return -EFAULT;
				1615
				1616	p = get_proc_task(inode);
				1617	if (!p)
				1618	return -ESRCH;
				1619
				1620	if (same_thread_group(current, p))
				1621	set_task_comm(p, buffer);
				1622	else
				1623	count = -EINVAL;
				1624
				1625	put_task_struct(p);
				1626
				1627	return count;
				1628	}
				1629
				1630	static int comm_show(struct seq_file m, void v)
				1631	{
				1632	struct inode *inode = m->private;
				1633	struct task_struct *p;
				1634
				1635	p = get_proc_task(inode);
				1636	if (!p)
				1637	return -ESRCH;
				1638
				1639	proc_task_name(m, p, false);
				1640	seq_putc(m, '\n');
				1641
				1642	put_task_struct(p);
				1643
				1644	return 0;
				1645	}
				1646
				1647	static int comm_open(struct inode inode, struct file filp)
				1648	{
				1649	return single_open(filp, comm_show, inode);
				1650	}
				1651
				1652	static const struct file_operations proc_pid_set_comm_operations = {
				1653	.open = comm_open,
				1654	.read = seq_read,
				1655	.write = comm_write,
				1656	.llseek = seq_lseek,
				1657	.release = single_release,
				1658	};
				1659
				1660	static int proc_exe_link(struct dentry dentry, struct path exe_path)
				1661	{
				1662	struct task_struct *task;
				1663	struct file *exe_file;
				1664
				1665	task = get_proc_task(d_inode(dentry));
				1666	if (!task)
				1667	return -ENOENT;
				1668	exe_file = get_task_exe_file(task);
				1669	put_task_struct(task);
				1670	if (exe_file) {
				1671	*exe_path = exe_file->f_path;
				1672	path_get(&exe_file->f_path);
				1673	fput(exe_file);
				1674	return 0;
				1675	} else
				1676	return -ENOENT;
				1677	}
				1678
				1679	static const char proc_pid_get_link(struct dentry dentry,
				1680	struct inode *inode,
				1681	struct delayed_call *done)
				1682	{
				1683	struct path path;
				1684	int error = -EACCES;
				1685
				1686	if (!dentry)
				1687	return ERR_PTR(-ECHILD);
				1688
				1689	/* Are we allowed to snoop on the tasks file descriptors? */
				1690	if (!proc_fd_access_allowed(inode))
				1691	goto out;
				1692
				1693	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1694	if (error)
				1695	goto out;
				1696
				1697	nd_jump_link(&path);
				1698	return NULL;
				1699	out:
				1700	return ERR_PTR(error);
				1701	}
				1702
				1703	static int do_proc_readlink(struct path path, char __user buffer, int buflen)
				1704	{
				1705	char tmp = (char )__get_free_page(GFP_KERNEL);
				1706	char *pathname;
				1707	int len;
				1708
				1709	if (!tmp)
				1710	return -ENOMEM;
				1711
				1712	pathname = d_path(path, tmp, PAGE_SIZE);
				1713	len = PTR_ERR(pathname);
				1714	if (IS_ERR(pathname))
				1715	goto out;
				1716	len = tmp + PAGE_SIZE - 1 - pathname;
				1717
				1718	if (len > buflen)
				1719	len = buflen;
				1720	if (copy_to_user(buffer, pathname, len))
				1721	len = -EFAULT;
				1722	out:
				1723	free_page((unsigned long)tmp);
				1724	return len;
				1725	}
				1726
				1727	static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
				1728	{
				1729	int error = -EACCES;
				1730	struct inode *inode = d_inode(dentry);
				1731	struct path path;
				1732
				1733	/* Are we allowed to snoop on the tasks file descriptors? */
				1734	if (!proc_fd_access_allowed(inode))
				1735	goto out;
				1736
				1737	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1738	if (error)
				1739	goto out;
				1740
				1741	error = do_proc_readlink(&path, buffer, buflen);
				1742	path_put(&path);
				1743	out:
				1744	return error;
				1745	}
				1746
				1747	const struct inode_operations proc_pid_link_inode_operations = {
				1748	.readlink = proc_pid_readlink,
				1749	.get_link = proc_pid_get_link,
				1750	.setattr = proc_setattr,
				1751	};
				1752
				1753
				1754	/* building an inode */
				1755
				1756	void task_dump_owner(struct task_struct *task, umode_t mode,
				1757	kuid_t ruid, kgid_t rgid)
				1758	{
				1759	/* Depending on the state of dumpable compute who should own a
				1760	* proc file for a task.
				1761	*/
				1762	const struct cred *cred;
				1763	kuid_t uid;
				1764	kgid_t gid;
				1765
				1766	if (unlikely(task->flags & PF_KTHREAD)) {
				1767	*ruid = GLOBAL_ROOT_UID;
				1768	*rgid = GLOBAL_ROOT_GID;
				1769	return;
				1770	}
				1771
				1772	/* Default to the tasks effective ownership */
				1773	rcu_read_lock();
				1774	cred = __task_cred(task);
				1775	uid = cred->euid;
				1776	gid = cred->egid;
				1777	rcu_read_unlock();
				1778
				1779	/*
				1780	* Before the /proc/pid/status file was created the only way to read
				1781	* the effective uid of a /process was to stat /proc/pid. Reading
				1782	* /proc/pid/status is slow enough that procps and other packages
				1783	* kept stating /proc/pid. To keep the rules in /proc simple I have
				1784	* made this apply to all per process world readable and executable
				1785	* directories.
				1786	*/
				1787	if (mode != (S_IFDIR\|S_IRUGO\|S_IXUGO)) {
				1788	struct mm_struct *mm;
				1789	task_lock(task);
				1790	mm = task->mm;
				1791	/* Make non-dumpable tasks owned by some root */
				1792	if (mm) {
				1793	if (get_dumpable(mm) != SUID_DUMP_USER) {
				1794	struct user_namespace *user_ns = mm->user_ns;
				1795
				1796	uid = make_kuid(user_ns, 0);
				1797	if (!uid_valid(uid))
				1798	uid = GLOBAL_ROOT_UID;
				1799
				1800	gid = make_kgid(user_ns, 0);
				1801	if (!gid_valid(gid))
				1802	gid = GLOBAL_ROOT_GID;
				1803	}
				1804	} else {
				1805	uid = GLOBAL_ROOT_UID;
				1806	gid = GLOBAL_ROOT_GID;
				1807	}
				1808	task_unlock(task);
				1809	}
				1810	*ruid = uid;
				1811	*rgid = gid;
				1812	}
				1813
				1814	struct inode proc_pid_make_inode(struct super_block sb,
				1815	struct task_struct *task, umode_t mode)
				1816	{
				1817	struct inode * inode;
				1818	struct proc_inode *ei;
				1819
				1820	/* We need a new inode */
				1821
				1822	inode = new_inode(sb);
				1823	if (!inode)
				1824	goto out;
				1825
				1826	/* Common stuff */
				1827	ei = PROC_I(inode);
				1828	inode->i_mode = mode;
				1829	inode->i_ino = get_next_ino();
				1830	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
				1831	inode->i_op = &proc_def_inode_operations;
				1832
				1833	/*
				1834	* grab the reference to task.
				1835	*/
				1836	ei->pid = get_task_pid(task, PIDTYPE_PID);
				1837	if (!ei->pid)
				1838	goto out_unlock;
				1839
				1840	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
				1841	security_task_to_inode(task, inode);
				1842
				1843	out:
				1844	return inode;
				1845
				1846	out_unlock:
				1847	iput(inode);
				1848	return NULL;
				1849	}
				1850
				1851	int pid_getattr(const struct path path, struct kstat stat,
				1852	u32 request_mask, unsigned int query_flags)
				1853	{
				1854	struct inode *inode = d_inode(path->dentry);
				1855	struct pid_namespace *pid = proc_pid_ns(inode);
				1856	struct task_struct *task;
				1857
				1858	generic_fillattr(inode, stat);
				1859
				1860	stat->uid = GLOBAL_ROOT_UID;
				1861	stat->gid = GLOBAL_ROOT_GID;
				1862	rcu_read_lock();
				1863	task = pid_task(proc_pid(inode), PIDTYPE_PID);
				1864	if (task) {
				1865	if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
				1866	rcu_read_unlock();
				1867	/*
				1868	* This doesn't prevent learning whether PID exists,
				1869	* it only makes getattr() consistent with readdir().
				1870	*/
				1871	return -ENOENT;
				1872	}
				1873	task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
				1874	}
				1875	rcu_read_unlock();
				1876	return 0;
				1877	}
				1878
				1879	/* dentry stuff */
				1880
				1881	/*
				1882	* Set <pid>/... inode ownership (can change due to setuid(), etc.)
				1883	*/
				1884	void pid_update_inode(struct task_struct task, struct inode inode)
				1885	{
				1886	task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
				1887
				1888	inode->i_mode &= ~(S_ISUID \| S_ISGID);
				1889	security_task_to_inode(task, inode);
				1890	}
				1891
				1892	/*
				1893	* Rewrite the inode's ownerships here because the owning task may have
				1894	* performed a setuid(), etc.
				1895	*
				1896	*/
				1897	static int pid_revalidate(struct dentry *dentry, unsigned int flags)
				1898	{
				1899	struct inode *inode;
				1900	struct task_struct *task;
				1901
				1902	if (flags & LOOKUP_RCU)
				1903	return -ECHILD;
				1904
				1905	inode = d_inode(dentry);
				1906	task = get_proc_task(inode);
				1907
				1908	if (task) {
				1909	pid_update_inode(task, inode);
				1910	put_task_struct(task);
				1911	return 1;
				1912	}
				1913	return 0;
				1914	}
				1915
				1916	static inline bool proc_inode_is_dead(struct inode *inode)
				1917	{
				1918	return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
				1919	}
				1920
				1921	int pid_delete_dentry(const struct dentry *dentry)
				1922	{
				1923	/* Is the task we represent dead?
				1924	* If so, then don't put the dentry on the lru list,
				1925	* kill it immediately.
				1926	*/
				1927	return proc_inode_is_dead(d_inode(dentry));
				1928	}
				1929
				1930	const struct dentry_operations pid_dentry_operations =
				1931	{
				1932	.d_revalidate = pid_revalidate,
				1933	.d_delete = pid_delete_dentry,
				1934	};
				1935
				1936	/* Lookups */
				1937
				1938	/*
				1939	* Fill a directory entry.
				1940	*
				1941	* If possible create the dcache entry and derive our inode number and
				1942	* file type from dcache entry.
				1943	*
				1944	* Since all of the proc inode numbers are dynamically generated, the inode
				1945	* numbers do not exist until the inode is cache. This means creating the
				1946	* the dcache entry in readdir is necessary to keep the inode numbers
				1947	* reported by readdir in sync with the inode numbers reported
				1948	* by stat.
				1949	*/
				1950	bool proc_fill_cache(struct file file, struct dir_context ctx,
				1951	const char *name, unsigned int len,
				1952	instantiate_t instantiate, struct task_struct task, const void ptr)
				1953	{
				1954	struct dentry child, dir = file->f_path.dentry;
				1955	struct qstr qname = QSTR_INIT(name, len);
				1956	struct inode *inode;
				1957	unsigned type = DT_UNKNOWN;
				1958	ino_t ino = 1;
				1959
				1960	child = d_hash_and_lookup(dir, &qname);
				1961	if (!child) {
				1962	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
				1963	child = d_alloc_parallel(dir, &qname, &wq);
				1964	if (IS_ERR(child))
				1965	goto end_instantiate;
				1966	if (d_in_lookup(child)) {
				1967	struct dentry *res;
				1968	res = instantiate(child, task, ptr);
				1969	d_lookup_done(child);
				1970	if (unlikely(res)) {
				1971	dput(child);
				1972	child = res;
				1973	if (IS_ERR(child))
				1974	goto end_instantiate;
				1975	}
				1976	}
				1977	}
				1978	inode = d_inode(child);
				1979	ino = inode->i_ino;
				1980	type = inode->i_mode >> 12;
				1981	dput(child);
				1982	end_instantiate:
				1983	return dir_emit(ctx, name, len, ino, type);
				1984	}
				1985
				1986	/*
				1987	* dname_to_vma_addr - maps a dentry name into two unsigned longs
				1988	* which represent vma start and end addresses.
				1989	*/
				1990	static int dname_to_vma_addr(struct dentry *dentry,
				1991	unsigned long start, unsigned long end)
				1992	{
				1993	const char *str = dentry->d_name.name;
				1994	unsigned long long sval, eval;
				1995	unsigned int len;
				1996
				1997	if (str[0] == '0' && str[1] != '-')
				1998	return -EINVAL;
				1999	len = _parse_integer(str, 16, &sval);
				2000	if (len & KSTRTOX_OVERFLOW)
				2001	return -EINVAL;
				2002	if (sval != (unsigned long)sval)
				2003	return -EINVAL;
				2004	str += len;
				2005
				2006	if (*str != '-')
				2007	return -EINVAL;
				2008	str++;
				2009
				2010	if (str[0] == '0' && str[1])
				2011	return -EINVAL;
				2012	len = _parse_integer(str, 16, &eval);
				2013	if (len & KSTRTOX_OVERFLOW)
				2014	return -EINVAL;
				2015	if (eval != (unsigned long)eval)
				2016	return -EINVAL;
				2017	str += len;
				2018
				2019	if (*str != '\0')
				2020	return -EINVAL;
				2021
				2022	*start = sval;
				2023	*end = eval;
				2024
				2025	return 0;
				2026	}
				2027
				2028	static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
				2029	{
				2030	unsigned long vm_start, vm_end;
				2031	bool exact_vma_exists = false;
				2032	struct mm_struct *mm = NULL;
				2033	struct task_struct *task;
				2034	struct inode *inode;
				2035	int status = 0;
				2036
				2037	if (flags & LOOKUP_RCU)
				2038	return -ECHILD;
				2039
				2040	inode = d_inode(dentry);
				2041	task = get_proc_task(inode);
				2042	if (!task)
				2043	goto out_notask;
				2044
				2045	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
				2046	if (IS_ERR_OR_NULL(mm))
				2047	goto out;
				2048
				2049	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
				2050	status = down_read_killable(&mm->mmap_sem);
				2051	if (!status) {
				2052	exact_vma_exists = !!find_exact_vma(mm, vm_start,
				2053	vm_end);
				2054	up_read(&mm->mmap_sem);
				2055	}
				2056	}
				2057
				2058	mmput(mm);
				2059
				2060	if (exact_vma_exists) {
				2061	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
				2062
				2063	security_task_to_inode(task, inode);
				2064	status = 1;
				2065	}
				2066
				2067	out:
				2068	put_task_struct(task);
				2069
				2070	out_notask:
				2071	return status;
				2072	}
				2073
				2074	static const struct dentry_operations tid_map_files_dentry_operations = {
				2075	.d_revalidate = map_files_d_revalidate,
				2076	.d_delete = pid_delete_dentry,
				2077	};
				2078
				2079	static int map_files_get_link(struct dentry dentry, struct path path)
				2080	{
				2081	unsigned long vm_start, vm_end;
				2082	struct vm_area_struct *vma;
				2083	struct task_struct *task;
				2084	struct mm_struct *mm;
				2085	int rc;
				2086
				2087	rc = -ENOENT;
				2088	task = get_proc_task(d_inode(dentry));
				2089	if (!task)
				2090	goto out;
				2091
				2092	mm = get_task_mm(task);
				2093	put_task_struct(task);
				2094	if (!mm)
				2095	goto out;
				2096
				2097	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
				2098	if (rc)
				2099	goto out_mmput;
				2100
				2101	rc = down_read_killable(&mm->mmap_sem);
				2102	if (rc)
				2103	goto out_mmput;
				2104
				2105	rc = -ENOENT;
				2106	vma = find_exact_vma(mm, vm_start, vm_end);
				2107	if (vma && vma->vm_file) {
				2108	*path = vma->vm_file->f_path;
				2109	path_get(path);
				2110	rc = 0;
				2111	}
				2112	up_read(&mm->mmap_sem);
				2113
				2114	out_mmput:
				2115	mmput(mm);
				2116	out:
				2117	return rc;
				2118	}
				2119
				2120	struct map_files_info {
				2121	unsigned long start;
				2122	unsigned long end;
				2123	fmode_t mode;
				2124	};
				2125
				2126	/*
				2127	* Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
				2128	* symlinks may be used to bypass permissions on ancestor directories in the
				2129	* path to the file in question.
				2130	*/
				2131	static const char *
				2132	proc_map_files_get_link(struct dentry *dentry,
				2133	struct inode *inode,
				2134	struct delayed_call *done)
				2135	{
				2136	if (!capable(CAP_SYS_ADMIN))
				2137	return ERR_PTR(-EPERM);
				2138
				2139	return proc_pid_get_link(dentry, inode, done);
				2140	}
				2141
				2142	/*
				2143	* Identical to proc_pid_link_inode_operations except for get_link()
				2144	*/
				2145	static const struct inode_operations proc_map_files_link_inode_operations = {
				2146	.readlink = proc_pid_readlink,
				2147	.get_link = proc_map_files_get_link,
				2148	.setattr = proc_setattr,
				2149	};
				2150
				2151	static struct dentry *
				2152	proc_map_files_instantiate(struct dentry *dentry,
				2153	struct task_struct task, const void ptr)
				2154	{
				2155	fmode_t mode = (fmode_t)(unsigned long)ptr;
				2156	struct proc_inode *ei;
				2157	struct inode *inode;
				2158
				2159	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK \|
				2160	((mode & FMODE_READ ) ? S_IRUSR : 0) \|
				2161	((mode & FMODE_WRITE) ? S_IWUSR : 0));
				2162	if (!inode)
				2163	return ERR_PTR(-ENOENT);
				2164
				2165	ei = PROC_I(inode);
				2166	ei->op.proc_get_link = map_files_get_link;
				2167
				2168	inode->i_op = &proc_map_files_link_inode_operations;
				2169	inode->i_size = 64;
				2170
				2171	d_set_d_op(dentry, &tid_map_files_dentry_operations);
				2172	return d_splice_alias(inode, dentry);
				2173	}
				2174
				2175	static struct dentry proc_map_files_lookup(struct inode dir,
				2176	struct dentry *dentry, unsigned int flags)
				2177	{
				2178	unsigned long vm_start, vm_end;
				2179	struct vm_area_struct *vma;
				2180	struct task_struct *task;
				2181	struct dentry *result;
				2182	struct mm_struct *mm;
				2183
				2184	result = ERR_PTR(-ENOENT);
				2185	task = get_proc_task(dir);
				2186	if (!task)
				2187	goto out;
				2188
				2189	result = ERR_PTR(-EACCES);
				2190	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				2191	goto out_put_task;
				2192
				2193	result = ERR_PTR(-ENOENT);
				2194	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
				2195	goto out_put_task;
				2196
				2197	mm = get_task_mm(task);
				2198	if (!mm)
				2199	goto out_put_task;
				2200
				2201	result = ERR_PTR(-EINTR);
				2202	if (down_read_killable(&mm->mmap_sem))
				2203	goto out_put_mm;
				2204
				2205	result = ERR_PTR(-ENOENT);
				2206	vma = find_exact_vma(mm, vm_start, vm_end);
				2207	if (!vma)
				2208	goto out_no_vma;
				2209
				2210	if (vma->vm_file)
				2211	result = proc_map_files_instantiate(dentry, task,
				2212	(void *)(unsigned long)vma->vm_file->f_mode);
				2213
				2214	out_no_vma:
				2215	up_read(&mm->mmap_sem);
				2216	out_put_mm:
				2217	mmput(mm);
				2218	out_put_task:
				2219	put_task_struct(task);
				2220	out:
				2221	return result;
				2222	}
				2223
				2224	static const struct inode_operations proc_map_files_inode_operations = {
				2225	.lookup = proc_map_files_lookup,
				2226	.permission = proc_fd_permission,
				2227	.setattr = proc_setattr,
				2228	};
				2229
				2230	static int
				2231	proc_map_files_readdir(struct file file, struct dir_context ctx)
				2232	{
				2233	struct vm_area_struct *vma;
				2234	struct task_struct *task;
				2235	struct mm_struct *mm;
				2236	unsigned long nr_files, pos, i;
				2237	GENRADIX(struct map_files_info) fa;
				2238	struct map_files_info *p;
				2239	int ret;
				2240
				2241	genradix_init(&fa);
				2242
				2243	ret = -ENOENT;
				2244	task = get_proc_task(file_inode(file));
				2245	if (!task)
				2246	goto out;
				2247
				2248	ret = -EACCES;
				2249	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				2250	goto out_put_task;
				2251
				2252	ret = 0;
				2253	if (!dir_emit_dots(file, ctx))
				2254	goto out_put_task;
				2255
				2256	mm = get_task_mm(task);
				2257	if (!mm)
				2258	goto out_put_task;
				2259
				2260	ret = down_read_killable(&mm->mmap_sem);
				2261	if (ret) {
				2262	mmput(mm);
				2263	goto out_put_task;
				2264	}
				2265
				2266	nr_files = 0;
				2267
				2268	/*
				2269	* We need two passes here:
				2270	*
				2271	* 1) Collect vmas of mapped files with mmap_sem taken
				2272	* 2) Release mmap_sem and instantiate entries
				2273	*
				2274	* otherwise we get lockdep complained, since filldir()
				2275	* routine might require mmap_sem taken in might_fault().
				2276	*/
				2277
				2278	for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
				2279	if (!vma->vm_file)
				2280	continue;
				2281	if (++pos <= ctx->pos)
				2282	continue;
				2283
				2284	p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
				2285	if (!p) {
				2286	ret = -ENOMEM;
				2287	up_read(&mm->mmap_sem);
				2288	mmput(mm);
				2289	goto out_put_task;
				2290	}
				2291
				2292	p->start = vma->vm_start;
				2293	p->end = vma->vm_end;
				2294	p->mode = vma->vm_file->f_mode;
				2295	}
				2296	up_read(&mm->mmap_sem);
				2297	mmput(mm);
				2298
				2299	for (i = 0; i < nr_files; i++) {
				2300	char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
				2301	unsigned int len;
				2302
				2303	p = genradix_ptr(&fa, i);
				2304	len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
				2305	if (!proc_fill_cache(file, ctx,
				2306	buf, len,
				2307	proc_map_files_instantiate,
				2308	task,
				2309	(void *)(unsigned long)p->mode))
				2310	break;
				2311	ctx->pos++;
				2312	}
				2313
				2314	out_put_task:
				2315	put_task_struct(task);
				2316	out:
				2317	genradix_free(&fa);
				2318	return ret;
				2319	}
				2320
				2321	static const struct file_operations proc_map_files_operations = {
				2322	.read = generic_read_dir,
				2323	.iterate_shared = proc_map_files_readdir,
				2324	.llseek = generic_file_llseek,
				2325	};
				2326
				2327	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
				2328	struct timers_private {
				2329	struct pid *pid;
				2330	struct task_struct *task;
				2331	struct sighand_struct *sighand;
				2332	struct pid_namespace *ns;
				2333	unsigned long flags;
				2334	};
				2335
				2336	static void timers_start(struct seq_file m, loff_t *pos)
				2337	{
				2338	struct timers_private *tp = m->private;
				2339
				2340	tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
				2341	if (!tp->task)
				2342	return ERR_PTR(-ESRCH);
				2343
				2344	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
				2345	if (!tp->sighand)
				2346	return ERR_PTR(-ESRCH);
				2347
				2348	return seq_list_start(&tp->task->signal->posix_timers, *pos);
				2349	}
				2350
				2351	static void timers_next(struct seq_file m, void v, loff_t pos)
				2352	{
				2353	struct timers_private *tp = m->private;
				2354	return seq_list_next(v, &tp->task->signal->posix_timers, pos);
				2355	}
				2356
				2357	static void timers_stop(struct seq_file m, void v)
				2358	{
				2359	struct timers_private *tp = m->private;
				2360
				2361	if (tp->sighand) {
				2362	unlock_task_sighand(tp->task, &tp->flags);
				2363	tp->sighand = NULL;
				2364	}
				2365
				2366	if (tp->task) {
				2367	put_task_struct(tp->task);
				2368	tp->task = NULL;
				2369	}
				2370	}
				2371
				2372	static int show_timer(struct seq_file m, void v)
				2373	{
				2374	struct k_itimer *timer;
				2375	struct timers_private *tp = m->private;
				2376	int notify;
				2377	static const char * const nstr[] = {
				2378	[SIGEV_SIGNAL] = "signal",
				2379	[SIGEV_NONE] = "none",
				2380	[SIGEV_THREAD] = "thread",
				2381	};
				2382
				2383	timer = list_entry((struct list_head *)v, struct k_itimer, list);
				2384	notify = timer->it_sigev_notify;
				2385
				2386	seq_printf(m, "ID: %d\n", timer->it_id);
				2387	seq_printf(m, "signal: %d/%px\n",
				2388	timer->sigq->info.si_signo,
				2389	timer->sigq->info.si_value.sival_ptr);
				2390	seq_printf(m, "notify: %s/%s.%d\n",
				2391	nstr[notify & ~SIGEV_THREAD_ID],
				2392	(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
				2393	pid_nr_ns(timer->it_pid, tp->ns));
				2394	seq_printf(m, "ClockID: %d\n", timer->it_clock);
				2395
				2396	return 0;
				2397	}
				2398
				2399	static const struct seq_operations proc_timers_seq_ops = {
				2400	.start = timers_start,
				2401	.next = timers_next,
				2402	.stop = timers_stop,
				2403	.show = show_timer,
				2404	};
				2405
				2406	static int proc_timers_open(struct inode inode, struct file file)
				2407	{
				2408	struct timers_private *tp;
				2409
				2410	tp = __seq_open_private(file, &proc_timers_seq_ops,
				2411	sizeof(struct timers_private));
				2412	if (!tp)
				2413	return -ENOMEM;
				2414
				2415	tp->pid = proc_pid(inode);
				2416	tp->ns = proc_pid_ns(inode);
				2417	return 0;
				2418	}
				2419
				2420	static const struct file_operations proc_timers_operations = {
				2421	.open = proc_timers_open,
				2422	.read = seq_read,
				2423	.llseek = seq_lseek,
				2424	.release = seq_release_private,
				2425	};
				2426	#endif
				2427
				2428	static ssize_t timerslack_ns_write(struct file file, const char __user buf,
				2429	size_t count, loff_t *offset)
				2430	{
				2431	struct inode *inode = file_inode(file);
				2432	struct task_struct *p;
				2433	u64 slack_ns;
				2434	int err;
				2435
				2436	err = kstrtoull_from_user(buf, count, 10, &slack_ns);
				2437	if (err < 0)
				2438	return err;
				2439
				2440	p = get_proc_task(inode);
				2441	if (!p)
				2442	return -ESRCH;
				2443
				2444	if (p != current) {
				2445	rcu_read_lock();
				2446	if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
				2447	rcu_read_unlock();
				2448	count = -EPERM;
				2449	goto out;
				2450	}
				2451	rcu_read_unlock();
				2452
				2453	err = security_task_setscheduler(p);
				2454	if (err) {
				2455	count = err;
				2456	goto out;
				2457	}
				2458	}
				2459
				2460	task_lock(p);
				2461	if (slack_ns == 0)
				2462	p->timer_slack_ns = p->default_timer_slack_ns;
				2463	else
				2464	p->timer_slack_ns = slack_ns;
				2465	task_unlock(p);
				2466
				2467	out:
				2468	put_task_struct(p);
				2469
				2470	return count;
				2471	}
				2472
				2473	static int timerslack_ns_show(struct seq_file m, void v)
				2474	{
				2475	struct inode *inode = m->private;
				2476	struct task_struct *p;
				2477	int err = 0;
				2478
				2479	p = get_proc_task(inode);
				2480	if (!p)
				2481	return -ESRCH;
				2482
				2483	if (p != current) {
				2484	rcu_read_lock();
				2485	if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
				2486	rcu_read_unlock();
				2487	err = -EPERM;
				2488	goto out;
				2489	}
				2490	rcu_read_unlock();
				2491
				2492	err = security_task_getscheduler(p);
				2493	if (err)
				2494	goto out;
				2495	}
				2496
				2497	task_lock(p);
				2498	seq_printf(m, "%llu\n", p->timer_slack_ns);
				2499	task_unlock(p);
				2500
				2501	out:
				2502	put_task_struct(p);
				2503
				2504	return err;
				2505	}
				2506
				2507	static int timerslack_ns_open(struct inode inode, struct file filp)
				2508	{
				2509	return single_open(filp, timerslack_ns_show, inode);
				2510	}
				2511
				2512	static const struct file_operations proc_pid_set_timerslack_ns_operations = {
				2513	.open = timerslack_ns_open,
				2514	.read = seq_read,
				2515	.write = timerslack_ns_write,
				2516	.llseek = seq_lseek,
				2517	.release = single_release,
				2518	};
				2519
				2520	static struct dentry proc_pident_instantiate(struct dentry dentry,
				2521	struct task_struct task, const void ptr)
				2522	{
				2523	const struct pid_entry *p = ptr;
				2524	struct inode *inode;
				2525	struct proc_inode *ei;
				2526
				2527	inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
				2528	if (!inode)
				2529	return ERR_PTR(-ENOENT);
				2530
				2531	ei = PROC_I(inode);
				2532	if (S_ISDIR(inode->i_mode))
				2533	set_nlink(inode, 2); /* Use getattr to fix if necessary */
				2534	if (p->iop)
				2535	inode->i_op = p->iop;
				2536	if (p->fop)
				2537	inode->i_fop = p->fop;
				2538	ei->op = p->op;
				2539	pid_update_inode(task, inode);
				2540	d_set_d_op(dentry, &pid_dentry_operations);
				2541	return d_splice_alias(inode, dentry);
				2542	}
				2543
				2544	static struct dentry proc_pident_lookup(struct inode dir,
				2545	struct dentry *dentry,
				2546	const struct pid_entry *p,
				2547	const struct pid_entry *end)
				2548	{
				2549	struct task_struct *task = get_proc_task(dir);
				2550	struct dentry *res = ERR_PTR(-ENOENT);
				2551
				2552	if (!task)
				2553	goto out_no_task;
				2554
				2555	/*
				2556	* Yes, it does not scale. And it should not. Don't add
				2557	* new entries into /proc/<tgid>/ without very good reasons.
				2558	*/
				2559	for (; p < end; p++) {
				2560	if (p->len != dentry->d_name.len)
				2561	continue;
				2562	if (!memcmp(dentry->d_name.name, p->name, p->len)) {
				2563	res = proc_pident_instantiate(dentry, task, p);
				2564	break;
				2565	}
				2566	}
				2567	put_task_struct(task);
				2568	out_no_task:
				2569	return res;
				2570	}
				2571
				2572	static int proc_pident_readdir(struct file file, struct dir_context ctx,
				2573	const struct pid_entry *ents, unsigned int nents)
				2574	{
				2575	struct task_struct *task = get_proc_task(file_inode(file));
				2576	const struct pid_entry *p;
				2577
				2578	if (!task)
				2579	return -ENOENT;
				2580
				2581	if (!dir_emit_dots(file, ctx))
				2582	goto out;
				2583
				2584	if (ctx->pos >= nents + 2)
				2585	goto out;
				2586
				2587	for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
				2588	if (!proc_fill_cache(file, ctx, p->name, p->len,
				2589	proc_pident_instantiate, task, p))
				2590	break;
				2591	ctx->pos++;
				2592	}
				2593	out:
				2594	put_task_struct(task);
				2595	return 0;
				2596	}
				2597
				2598	#ifdef CONFIG_SECURITY
				2599	static int proc_pid_attr_open(struct inode inode, struct file file)
				2600	{
				2601	file->private_data = NULL;
				2602	__mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
				2603	return 0;
				2604	}
				2605
				2606	static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
				2607	size_t count, loff_t *ppos)
				2608	{
				2609	struct inode * inode = file_inode(file);
				2610	char *p = NULL;
				2611	ssize_t length;
				2612	struct task_struct *task = get_proc_task(inode);
				2613
				2614	if (!task)
				2615	return -ESRCH;
				2616
				2617	length = security_getprocattr(task, PROC_I(inode)->op.lsm,
				2618	(char*)file->f_path.dentry->d_name.name,
				2619	&p);
				2620	put_task_struct(task);
				2621	if (length > 0)
				2622	length = simple_read_from_buffer(buf, count, ppos, p, length);
				2623	kfree(p);
				2624	return length;
				2625	}
				2626
				2627	static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
				2628	size_t count, loff_t *ppos)
				2629	{
				2630	struct inode * inode = file_inode(file);
				2631	struct task_struct *task;
				2632	void *page;
				2633	int rv;
				2634
				2635	/* A task may only write when it was the opener. */
				2636	if (file->private_data != current->mm)
				2637	return -EPERM;
				2638
				2639	rcu_read_lock();
				2640	task = pid_task(proc_pid(inode), PIDTYPE_PID);
				2641	if (!task) {
				2642	rcu_read_unlock();
				2643	return -ESRCH;
				2644	}
				2645	/* A task may only write its own attributes. */
				2646	if (current != task) {
				2647	rcu_read_unlock();
				2648	return -EACCES;
				2649	}
				2650	/* Prevent changes to overridden credentials. */
				2651	if (current_cred() != current_real_cred()) {
				2652	rcu_read_unlock();
				2653	return -EBUSY;
				2654	}
				2655	rcu_read_unlock();
				2656
				2657	if (count > PAGE_SIZE)
				2658	count = PAGE_SIZE;
				2659
				2660	/* No partial writes. */
				2661	if (*ppos != 0)
				2662	return -EINVAL;
				2663
				2664	page = memdup_user(buf, count);
				2665	if (IS_ERR(page)) {
				2666	rv = PTR_ERR(page);
				2667	goto out;
				2668	}
				2669
				2670	/* Guard against adverse ptrace interaction */
				2671	rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
				2672	if (rv < 0)
				2673	goto out_free;
				2674
				2675	rv = security_setprocattr(PROC_I(inode)->op.lsm,
				2676	file->f_path.dentry->d_name.name, page,
				2677	count);
				2678	mutex_unlock(&current->signal->cred_guard_mutex);
				2679	out_free:
				2680	kfree(page);
				2681	out:
				2682	return rv;
				2683	}
				2684
				2685	static const struct file_operations proc_pid_attr_operations = {
				2686	.open = proc_pid_attr_open,
				2687	.read = proc_pid_attr_read,
				2688	.write = proc_pid_attr_write,
				2689	.llseek = generic_file_llseek,
				2690	.release = mem_release,
				2691	};
				2692
				2693	#define LSM_DIR_OPS(LSM) \
				2694	static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
				2695	struct dir_context *ctx) \
				2696	{ \
				2697	return proc_pident_readdir(filp, ctx, \
				2698	LSM##_attr_dir_stuff, \
				2699	ARRAY_SIZE(LSM##_attr_dir_stuff)); \
				2700	} \
				2701	\
				2702	static const struct file_operations proc_##LSM##_attr_dir_ops = { \
				2703	.read = generic_read_dir, \
				2704	.iterate = proc_##LSM##_attr_dir_iterate, \
				2705	.llseek = default_llseek, \
				2706	}; \
				2707	\
				2708	static struct dentry proc_##LSM##_attr_dir_lookup(struct inode dir, \
				2709	struct dentry *dentry, unsigned int flags) \
				2710	{ \
				2711	return proc_pident_lookup(dir, dentry, \
				2712	LSM##_attr_dir_stuff, \
				2713	LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
				2714	} \
				2715	\
				2716	static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
				2717	.lookup = proc_##LSM##_attr_dir_lookup, \
				2718	.getattr = pid_getattr, \
				2719	.setattr = proc_setattr, \
				2720	}
				2721
				2722	#ifdef CONFIG_SECURITY_SMACK
				2723	static const struct pid_entry smack_attr_dir_stuff[] = {
				2724	ATTR("smack", "current", 0666),
				2725	};
				2726	LSM_DIR_OPS(smack);
				2727	#endif
				2728
				2729	static const struct pid_entry attr_dir_stuff[] = {
				2730	ATTR(NULL, "current", 0666),
				2731	ATTR(NULL, "prev", 0444),
				2732	ATTR(NULL, "exec", 0666),
				2733	ATTR(NULL, "fscreate", 0666),
				2734	ATTR(NULL, "keycreate", 0666),
				2735	ATTR(NULL, "sockcreate", 0666),
				2736	#ifdef CONFIG_SECURITY_SMACK
				2737	DIR("smack", 0555,
				2738	proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
				2739	#endif
				2740	};
				2741
				2742	static int proc_attr_dir_readdir(struct file file, struct dir_context ctx)
				2743	{
				2744	return proc_pident_readdir(file, ctx,
				2745	attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
				2746	}
				2747
				2748	static const struct file_operations proc_attr_dir_operations = {
				2749	.read = generic_read_dir,
				2750	.iterate_shared = proc_attr_dir_readdir,
				2751	.llseek = generic_file_llseek,
				2752	};
				2753
				2754	static struct dentry proc_attr_dir_lookup(struct inode dir,
				2755	struct dentry *dentry, unsigned int flags)
				2756	{
				2757	return proc_pident_lookup(dir, dentry,
				2758	attr_dir_stuff,
				2759	attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
				2760	}
				2761
				2762	static const struct inode_operations proc_attr_dir_inode_operations = {
				2763	.lookup = proc_attr_dir_lookup,
				2764	.getattr = pid_getattr,
				2765	.setattr = proc_setattr,
				2766	};
				2767
				2768	#endif
				2769
				2770	#ifdef CONFIG_ELF_CORE
				2771	static ssize_t proc_coredump_filter_read(struct file file, char __user buf,
				2772	size_t count, loff_t *ppos)
				2773	{
				2774	struct task_struct *task = get_proc_task(file_inode(file));
				2775	struct mm_struct *mm;
				2776	char buffer[PROC_NUMBUF];
				2777	size_t len;
				2778	int ret;
				2779
				2780	if (!task)
				2781	return -ESRCH;
				2782
				2783	ret = 0;
				2784	mm = get_task_mm(task);
				2785	if (mm) {
				2786	len = snprintf(buffer, sizeof(buffer), "%08lx\n",
				2787	((mm->flags & MMF_DUMP_FILTER_MASK) >>
				2788	MMF_DUMP_FILTER_SHIFT));
				2789	mmput(mm);
				2790	ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
				2791	}
				2792
				2793	put_task_struct(task);
				2794
				2795	return ret;
				2796	}
				2797
				2798	static ssize_t proc_coredump_filter_write(struct file *file,
				2799	const char __user *buf,
				2800	size_t count,
				2801	loff_t *ppos)
				2802	{
				2803	struct task_struct *task;
				2804	struct mm_struct *mm;
				2805	unsigned int val;
				2806	int ret;
				2807	int i;
				2808	unsigned long mask;
				2809
				2810	ret = kstrtouint_from_user(buf, count, 0, &val);
				2811	if (ret < 0)
				2812	return ret;
				2813
				2814	ret = -ESRCH;
				2815	task = get_proc_task(file_inode(file));
				2816	if (!task)
				2817	goto out_no_task;
				2818
				2819	mm = get_task_mm(task);
				2820	if (!mm)
				2821	goto out_no_mm;
				2822	ret = 0;
				2823
				2824	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
				2825	if (val & mask)
				2826	set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2827	else
				2828	clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2829	}
				2830
				2831	mmput(mm);
				2832	out_no_mm:
				2833	put_task_struct(task);
				2834	out_no_task:
				2835	if (ret < 0)
				2836	return ret;
				2837	return count;
				2838	}
				2839
				2840	static const struct file_operations proc_coredump_filter_operations = {
				2841	.read = proc_coredump_filter_read,
				2842	.write = proc_coredump_filter_write,
				2843	.llseek = generic_file_llseek,
				2844	};
				2845	#endif
				2846
				2847	#ifdef CONFIG_TASK_IO_ACCOUNTING
				2848	static int do_io_accounting(struct task_struct task, struct seq_file m, int whole)
				2849	{
				2850	struct task_io_accounting acct = task->ioac;
				2851	unsigned long flags;
				2852	int result;
				2853
				2854	result = down_read_killable(&task->signal->exec_update_lock);
				2855	if (result)
				2856	return result;
				2857
				2858	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
				2859	result = -EACCES;
				2860	goto out_unlock;
				2861	}
				2862
				2863	if (whole && lock_task_sighand(task, &flags)) {
				2864	struct task_struct *t = task;
				2865
				2866	task_io_accounting_add(&acct, &task->signal->ioac);
				2867	while_each_thread(task, t)
				2868	task_io_accounting_add(&acct, &t->ioac);
				2869
				2870	unlock_task_sighand(task, &flags);
				2871	}
				2872	seq_printf(m,
				2873	"rchar: %llu\n"
				2874	"wchar: %llu\n"
				2875	"syscr: %llu\n"
				2876	"syscw: %llu\n"
				2877	"read_bytes: %llu\n"
				2878	"write_bytes: %llu\n"
				2879	"cancelled_write_bytes: %llu\n",
				2880	(unsigned long long)acct.rchar,
				2881	(unsigned long long)acct.wchar,
				2882	(unsigned long long)acct.syscr,
				2883	(unsigned long long)acct.syscw,
				2884	(unsigned long long)acct.read_bytes,
				2885	(unsigned long long)acct.write_bytes,
				2886	(unsigned long long)acct.cancelled_write_bytes);
				2887	result = 0;
				2888
				2889	out_unlock:
				2890	up_read(&task->signal->exec_update_lock);
				2891	return result;
				2892	}
				2893
				2894	static int proc_tid_io_accounting(struct seq_file m, struct pid_namespace ns,
				2895	struct pid pid, struct task_struct task)
				2896	{
				2897	return do_io_accounting(task, m, 0);
				2898	}
				2899
				2900	static int proc_tgid_io_accounting(struct seq_file m, struct pid_namespace ns,
				2901	struct pid pid, struct task_struct task)
				2902	{
				2903	return do_io_accounting(task, m, 1);
				2904	}
				2905	#endif /* CONFIG_TASK_IO_ACCOUNTING */
				2906
				2907	#ifdef CONFIG_USER_NS
				2908	static int proc_id_map_open(struct inode inode, struct file file,
				2909	const struct seq_operations *seq_ops)
				2910	{
				2911	struct user_namespace *ns = NULL;
				2912	struct task_struct *task;
				2913	struct seq_file *seq;
				2914	int ret = -EINVAL;
				2915
				2916	task = get_proc_task(inode);
				2917	if (task) {
				2918	rcu_read_lock();
				2919	ns = get_user_ns(task_cred_xxx(task, user_ns));
				2920	rcu_read_unlock();
				2921	put_task_struct(task);
				2922	}
				2923	if (!ns)
				2924	goto err;
				2925
				2926	ret = seq_open(file, seq_ops);
				2927	if (ret)
				2928	goto err_put_ns;
				2929
				2930	seq = file->private_data;
				2931	seq->private = ns;
				2932
				2933	return 0;
				2934	err_put_ns:
				2935	put_user_ns(ns);
				2936	err:
				2937	return ret;
				2938	}
				2939
				2940	static int proc_id_map_release(struct inode inode, struct file file)
				2941	{
				2942	struct seq_file *seq = file->private_data;
				2943	struct user_namespace *ns = seq->private;
				2944	put_user_ns(ns);
				2945	return seq_release(inode, file);
				2946	}
				2947
				2948	static int proc_uid_map_open(struct inode inode, struct file file)
				2949	{
				2950	return proc_id_map_open(inode, file, &proc_uid_seq_operations);
				2951	}
				2952
				2953	static int proc_gid_map_open(struct inode inode, struct file file)
				2954	{
				2955	return proc_id_map_open(inode, file, &proc_gid_seq_operations);
				2956	}
				2957
				2958	static int proc_projid_map_open(struct inode inode, struct file file)
				2959	{
				2960	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
				2961	}
				2962
				2963	static const struct file_operations proc_uid_map_operations = {
				2964	.open = proc_uid_map_open,
				2965	.write = proc_uid_map_write,
				2966	.read = seq_read,
				2967	.llseek = seq_lseek,
				2968	.release = proc_id_map_release,
				2969	};
				2970
				2971	static const struct file_operations proc_gid_map_operations = {
				2972	.open = proc_gid_map_open,
				2973	.write = proc_gid_map_write,
				2974	.read = seq_read,
				2975	.llseek = seq_lseek,
				2976	.release = proc_id_map_release,
				2977	};
				2978
				2979	static const struct file_operations proc_projid_map_operations = {
				2980	.open = proc_projid_map_open,
				2981	.write = proc_projid_map_write,
				2982	.read = seq_read,
				2983	.llseek = seq_lseek,
				2984	.release = proc_id_map_release,
				2985	};
				2986
				2987	static int proc_setgroups_open(struct inode inode, struct file file)
				2988	{
				2989	struct user_namespace *ns = NULL;
				2990	struct task_struct *task;
				2991	int ret;
				2992
				2993	ret = -ESRCH;
				2994	task = get_proc_task(inode);
				2995	if (task) {
				2996	rcu_read_lock();
				2997	ns = get_user_ns(task_cred_xxx(task, user_ns));
				2998	rcu_read_unlock();
				2999	put_task_struct(task);
				3000	}
				3001	if (!ns)
				3002	goto err;
				3003
				3004	if (file->f_mode & FMODE_WRITE) {
				3005	ret = -EACCES;
				3006	if (!ns_capable(ns, CAP_SYS_ADMIN))
				3007	goto err_put_ns;
				3008	}
				3009
				3010	ret = single_open(file, &proc_setgroups_show, ns);
				3011	if (ret)
				3012	goto err_put_ns;
				3013
				3014	return 0;
				3015	err_put_ns:
				3016	put_user_ns(ns);
				3017	err:
				3018	return ret;
				3019	}
				3020
				3021	static int proc_setgroups_release(struct inode inode, struct file file)
				3022	{
				3023	struct seq_file *seq = file->private_data;
				3024	struct user_namespace *ns = seq->private;
				3025	int ret = single_release(inode, file);
				3026	put_user_ns(ns);
				3027	return ret;
				3028	}
				3029
				3030	static const struct file_operations proc_setgroups_operations = {
				3031	.open = proc_setgroups_open,
				3032	.write = proc_setgroups_write,
				3033	.read = seq_read,
				3034	.llseek = seq_lseek,
				3035	.release = proc_setgroups_release,
				3036	};
				3037	#endif /* CONFIG_USER_NS */
				3038
				3039	static int proc_pid_personality(struct seq_file m, struct pid_namespace ns,
				3040	struct pid pid, struct task_struct task)
				3041	{
				3042	int err = lock_trace(task);
				3043	if (!err) {
				3044	seq_printf(m, "%08x\n", task->personality);
				3045	unlock_trace(task);
				3046	}
				3047	return err;
				3048	}
				3049
				3050	#ifdef CONFIG_LIVEPATCH
				3051	static int proc_pid_patch_state(struct seq_file m, struct pid_namespace ns,
				3052	struct pid pid, struct task_struct task)
				3053	{
				3054	seq_printf(m, "%d\n", task->patch_state);
				3055	return 0;
				3056	}
				3057	#endif /* CONFIG_LIVEPATCH */
				3058
				3059	#ifdef CONFIG_STACKLEAK_METRICS
				3060	static int proc_stack_depth(struct seq_file m, struct pid_namespace ns,
				3061	struct pid pid, struct task_struct task)
				3062	{
				3063	unsigned long prev_depth = THREAD_SIZE -
				3064	(task->prev_lowest_stack & (THREAD_SIZE - 1));
				3065	unsigned long depth = THREAD_SIZE -
				3066	(task->lowest_stack & (THREAD_SIZE - 1));
				3067
				3068	seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
				3069	prev_depth, depth);
				3070	return 0;
				3071	}
				3072	#endif /* CONFIG_STACKLEAK_METRICS */
				3073
				3074	/*
				3075	* Thread groups
				3076	*/
				3077	static const struct file_operations proc_task_operations;
				3078	static const struct inode_operations proc_task_inode_operations;
				3079
				3080	static const struct pid_entry tgid_base_stuff[] = {
				3081	DIR("task", S_IRUGO\|S_IXUGO, proc_task_inode_operations, proc_task_operations),
				3082	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				3083	DIR("map_files", S_IRUSR\|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
				3084	DIR("fdinfo", S_IRUGO\|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				3085	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				3086	#ifdef CONFIG_NET
				3087	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				3088	#endif
				3089	REG("environ", S_IRUSR, proc_environ_operations),
				3090	REG("auxv", S_IRUSR, proc_auxv_operations),
				3091	ONE("status", S_IRUGO, proc_pid_status),
				3092	ONE("personality", S_IRUSR, proc_pid_personality),
				3093	ONE("limits", S_IRUGO, proc_pid_limits),
				3094	#ifdef CONFIG_SCHED_DEBUG
				3095	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				3096	#endif
				3097	#ifdef CONFIG_SCHED_AUTOGROUP
				3098	REG("autogroup", S_IRUGO\|S_IWUSR, proc_pid_sched_autogroup_operations),
				3099	#endif
				3100	REG("comm", S_IRUGO\|S_IWUSR, proc_pid_set_comm_operations),
				3101	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				3102	ONE("syscall", S_IRUSR, proc_pid_syscall),
				3103	#endif
				3104	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
				3105	ONE("stat", S_IRUGO, proc_tgid_stat),
				3106	ONE("statm", S_IRUGO, proc_pid_statm),
				3107	REG("maps", S_IRUGO, proc_pid_maps_operations),
				3108	#ifdef CONFIG_NUMA
				3109	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
				3110	#endif
				3111	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				3112	LNK("cwd", proc_cwd_link),
				3113	LNK("root", proc_root_link),
				3114	LNK("exe", proc_exe_link),
				3115	REG("mounts", S_IRUGO, proc_mounts_operations),
				3116	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				3117	REG("mountstats", S_IRUSR, proc_mountstats_operations),
				3118	#ifdef CONFIG_PROC_PAGE_MONITOR
				3119	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				3120	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
				3121	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
				3122	REG("pagemap", S_IRUSR, proc_pagemap_operations),
				3123	#endif
				3124	#ifdef CONFIG_SECURITY
				3125	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				3126	#endif
				3127	#ifdef CONFIG_KALLSYMS
				3128	ONE("wchan", S_IRUGO, proc_pid_wchan),
				3129	#endif
				3130	#ifdef CONFIG_STACKTRACE
				3131	ONE("stack", S_IRUSR, proc_pid_stack),
				3132	#endif
				3133	#ifdef CONFIG_SCHED_INFO
				3134	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
				3135	#endif
				3136	#ifdef CONFIG_LATENCYTOP
				3137	REG("latency", S_IRUGO, proc_lstats_operations),
				3138	#endif
				3139	#ifdef CONFIG_PROC_PID_CPUSET
				3140	ONE("cpuset", S_IRUGO, proc_cpuset_show),
				3141	#endif
				3142	#ifdef CONFIG_CGROUPS
				3143	ONE("cgroup", S_IRUGO, proc_cgroup_show),
				3144	#endif
				3145	ONE("oom_score", S_IRUGO, proc_oom_score),
				3146	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
				3147	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3148	#ifdef CONFIG_AUDIT
				3149	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3150	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3151	#endif
				3152	#ifdef CONFIG_FAULT_INJECTION
				3153	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3154	REG("fail-nth", 0644, proc_fail_nth_operations),
				3155	#endif
				3156	#ifdef CONFIG_ELF_CORE
				3157	REG("coredump_filter", S_IRUGO\|S_IWUSR, proc_coredump_filter_operations),
				3158	#endif
				3159	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3160	ONE("io", S_IRUSR, proc_tgid_io_accounting),
				3161	#endif
				3162	#ifdef CONFIG_USER_NS
				3163	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
				3164	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
				3165	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
				3166	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
				3167	#endif
				3168	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
				3169	REG("timers", S_IRUGO, proc_timers_operations),
				3170	#endif
				3171	REG("timerslack_ns", S_IRUGO\|S_IWUGO, proc_pid_set_timerslack_ns_operations),
				3172	#ifdef CONFIG_LIVEPATCH
				3173	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
				3174	#endif
				3175	#ifdef CONFIG_CPU_FREQ_TIMES
				3176	ONE("time_in_state", 0444, proc_time_in_state_show),
				3177	#endif
				3178	#ifdef CONFIG_STACKLEAK_METRICS
				3179	ONE("stack_depth", S_IRUGO, proc_stack_depth),
				3180	#endif
				3181	#ifdef CONFIG_PROC_PID_ARCH_STATUS
				3182	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
				3183	#endif
				3184	};
				3185
				3186	static int proc_tgid_base_readdir(struct file file, struct dir_context ctx)
				3187	{
				3188	return proc_pident_readdir(file, ctx,
				3189	tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3190	}
				3191
				3192	static const struct file_operations proc_tgid_base_operations = {
				3193	.read = generic_read_dir,
				3194	.iterate_shared = proc_tgid_base_readdir,
				3195	.llseek = generic_file_llseek,
				3196	};
				3197
				3198	struct pid tgid_pidfd_to_pid(const struct file file)
				3199	{
				3200	if (file->f_op != &proc_tgid_base_operations)
				3201	return ERR_PTR(-EBADF);
				3202
				3203	return proc_pid(file_inode(file));
				3204	}
				3205
				3206	static struct dentry proc_tgid_base_lookup(struct inode dir, struct dentry *dentry, unsigned int flags)
				3207	{
				3208	return proc_pident_lookup(dir, dentry,
				3209	tgid_base_stuff,
				3210	tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
				3211	}
				3212
				3213	static const struct inode_operations proc_tgid_base_inode_operations = {
				3214	.lookup = proc_tgid_base_lookup,
				3215	.getattr = pid_getattr,
				3216	.setattr = proc_setattr,
				3217	.permission = proc_pid_permission,
				3218	};
				3219
				3220	static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
				3221	{
				3222	struct dentry dentry, leader, *dir;
				3223	char buf[10 + 1];
				3224	struct qstr name;
				3225
				3226	name.name = buf;
				3227	name.len = snprintf(buf, sizeof(buf), "%u", pid);
				3228	/* no ->d_hash() rejects on procfs */
				3229	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
				3230	if (dentry) {
				3231	d_invalidate(dentry);
				3232	dput(dentry);
				3233	}
				3234
				3235	if (pid == tgid)
				3236	return;
				3237
				3238	name.name = buf;
				3239	name.len = snprintf(buf, sizeof(buf), "%u", tgid);
				3240	leader = d_hash_and_lookup(mnt->mnt_root, &name);
				3241	if (!leader)
				3242	goto out;
				3243
				3244	name.name = "task";
				3245	name.len = strlen(name.name);
				3246	dir = d_hash_and_lookup(leader, &name);
				3247	if (!dir)
				3248	goto out_put_leader;
				3249
				3250	name.name = buf;
				3251	name.len = snprintf(buf, sizeof(buf), "%u", pid);
				3252	dentry = d_hash_and_lookup(dir, &name);
				3253	if (dentry) {
				3254	d_invalidate(dentry);
				3255	dput(dentry);
				3256	}
				3257
				3258	dput(dir);
				3259	out_put_leader:
				3260	dput(leader);
				3261	out:
				3262	return;
				3263	}
				3264
				3265	/**
				3266	* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
				3267	* @task: task that should be flushed.
				3268	*
				3269	* When flushing dentries from proc, one needs to flush them from global
				3270	* proc (proc_mnt) and from all the namespaces' procs this task was seen
				3271	* in. This call is supposed to do all of this job.
				3272	*
				3273	* Looks in the dcache for
				3274	* /proc/@pid
				3275	* /proc/@tgid/task/@pid
				3276	* if either directory is present flushes it and all of it'ts children
				3277	* from the dcache.
				3278	*
				3279	* It is safe and reasonable to cache /proc entries for a task until
				3280	* that task exits. After that they just clog up the dcache with
				3281	* useless entries, possibly causing useful dcache entries to be
				3282	* flushed instead. This routine is proved to flush those useless
				3283	* dcache entries at process exit time.
				3284	*
				3285	* NOTE: This routine is just an optimization so it does not guarantee
				3286	* that no dcache entries will exist at process exit time it
				3287	* just makes it very unlikely that any will persist.
				3288	*/
				3289
				3290	void proc_flush_task(struct task_struct *task)
				3291	{
				3292	int i;
				3293	struct pid pid, tgid;
				3294	struct upid *upid;
				3295
				3296	pid = task_pid(task);
				3297	tgid = task_tgid(task);
				3298
				3299	for (i = 0; i <= pid->level; i++) {
				3300	upid = &pid->numbers[i];
				3301	proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
				3302	tgid->numbers[i].nr);
				3303	}
				3304	}
				3305
				3306	static struct dentry proc_pid_instantiate(struct dentry dentry,
				3307	struct task_struct task, const void ptr)
				3308	{
				3309	struct inode *inode;
				3310
				3311	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR \| S_IRUGO \| S_IXUGO);
				3312	if (!inode)
				3313	return ERR_PTR(-ENOENT);
				3314
				3315	inode->i_op = &proc_tgid_base_inode_operations;
				3316	inode->i_fop = &proc_tgid_base_operations;
				3317	inode->i_flags\|=S_IMMUTABLE;
				3318
				3319	set_nlink(inode, nlink_tgid);
				3320	pid_update_inode(task, inode);
				3321
				3322	d_set_d_op(dentry, &pid_dentry_operations);
				3323	return d_splice_alias(inode, dentry);
				3324	}
				3325
				3326	struct dentry proc_pid_lookup(struct dentry dentry, unsigned int flags)
				3327	{
				3328	struct task_struct *task;
				3329	unsigned tgid;
				3330	struct pid_namespace *ns;
				3331	struct dentry *result = ERR_PTR(-ENOENT);
				3332
				3333	tgid = name_to_int(&dentry->d_name);
				3334	if (tgid == ~0U)
				3335	goto out;
				3336
				3337	ns = dentry->d_sb->s_fs_info;
				3338	rcu_read_lock();
				3339	task = find_task_by_pid_ns(tgid, ns);
				3340	if (task)
				3341	get_task_struct(task);
				3342	rcu_read_unlock();
				3343	if (!task)
				3344	goto out;
				3345
				3346	result = proc_pid_instantiate(dentry, task, NULL);
				3347	put_task_struct(task);
				3348	out:
				3349	return result;
				3350	}
				3351
				3352	/*
				3353	* Find the first task with tgid >= tgid
				3354	*
				3355	*/
				3356	struct tgid_iter {
				3357	unsigned int tgid;
				3358	struct task_struct *task;
				3359	};
				3360	static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
				3361	{
				3362	struct pid *pid;
				3363
				3364	if (iter.task)
				3365	put_task_struct(iter.task);
				3366	rcu_read_lock();
				3367	retry:
				3368	iter.task = NULL;
				3369	pid = find_ge_pid(iter.tgid, ns);
				3370	if (pid) {
				3371	iter.tgid = pid_nr_ns(pid, ns);
				3372	iter.task = pid_task(pid, PIDTYPE_PID);
				3373	/* What we to know is if the pid we have find is the
				3374	* pid of a thread_group_leader. Testing for task
				3375	* being a thread_group_leader is the obvious thing
				3376	* todo but there is a window when it fails, due to
				3377	* the pid transfer logic in de_thread.
				3378	*
				3379	* So we perform the straight forward test of seeing
				3380	* if the pid we have found is the pid of a thread
				3381	* group leader, and don't worry if the task we have
				3382	* found doesn't happen to be a thread group leader.
				3383	* As we don't care in the case of readdir.
				3384	*/
				3385	if (!iter.task \|\| !has_group_leader_pid(iter.task)) {
				3386	iter.tgid += 1;
				3387	goto retry;
				3388	}
				3389	get_task_struct(iter.task);
				3390	}
				3391	rcu_read_unlock();
				3392	return iter;
				3393	}
				3394
				3395	#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
				3396
				3397	/* for the /proc/ directory itself, after non-process stuff has been done */
				3398	int proc_pid_readdir(struct file file, struct dir_context ctx)
				3399	{
				3400	struct tgid_iter iter;
				3401	struct pid_namespace *ns = proc_pid_ns(file_inode(file));
				3402	loff_t pos = ctx->pos;
				3403
				3404	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
				3405	return 0;
				3406
				3407	if (pos == TGID_OFFSET - 2) {
				3408	struct inode *inode = d_inode(ns->proc_self);
				3409	if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
				3410	return 0;
				3411	ctx->pos = pos = pos + 1;
				3412	}
				3413	if (pos == TGID_OFFSET - 1) {
				3414	struct inode *inode = d_inode(ns->proc_thread_self);
				3415	if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
				3416	return 0;
				3417	ctx->pos = pos = pos + 1;
				3418	}
				3419	iter.tgid = pos - TGID_OFFSET;
				3420	iter.task = NULL;
				3421	for (iter = next_tgid(ns, iter);
				3422	iter.task;
				3423	iter.tgid += 1, iter = next_tgid(ns, iter)) {
				3424	char name[10 + 1];
				3425	unsigned int len;
				3426
				3427	cond_resched();
				3428	if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
				3429	continue;
				3430
				3431	len = snprintf(name, sizeof(name), "%u", iter.tgid);
				3432	ctx->pos = iter.tgid + TGID_OFFSET;
				3433	if (!proc_fill_cache(file, ctx, name, len,
				3434	proc_pid_instantiate, iter.task, NULL)) {
				3435	put_task_struct(iter.task);
				3436	return 0;
				3437	}
				3438	}
				3439	ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
				3440	return 0;
				3441	}
				3442
				3443	/*
				3444	* proc_tid_comm_permission is a special permission function exclusively
				3445	* used for the node /proc/<pid>/task/<tid>/comm.
				3446	* It bypasses generic permission checks in the case where a task of the same
				3447	* task group attempts to access the node.
				3448	* The rationale behind this is that glibc and bionic access this node for
				3449	* cross thread naming (pthread_set/getname_np(!self)). However, if
				3450	* PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
				3451	* which locks out the cross thread naming implementation.
				3452	* This function makes sure that the node is always accessible for members of
				3453	* same thread group.
				3454	*/
				3455	static int proc_tid_comm_permission(struct inode *inode, int mask)
				3456	{
				3457	bool is_same_tgroup;
				3458	struct task_struct *task;
				3459
				3460	task = get_proc_task(inode);
				3461	if (!task)
				3462	return -ESRCH;
				3463	is_same_tgroup = same_thread_group(current, task);
				3464	put_task_struct(task);
				3465
				3466	if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
				3467	/* This file (/proc/<pid>/task/<tid>/comm) can always be
				3468	* read or written by the members of the corresponding
				3469	* thread group.
				3470	*/
				3471	return 0;
				3472	}
				3473
				3474	return generic_permission(inode, mask);
				3475	}
				3476
				3477	static const struct inode_operations proc_tid_comm_inode_operations = {
				3478	.setattr = proc_setattr,
				3479	.permission = proc_tid_comm_permission,
				3480	};
				3481
				3482	/*
				3483	* Tasks
				3484	*/
				3485	static const struct pid_entry tid_base_stuff[] = {
				3486	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				3487	DIR("fdinfo", S_IRUGO\|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				3488	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				3489	#ifdef CONFIG_NET
				3490	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				3491	#endif
				3492	REG("environ", S_IRUSR, proc_environ_operations),
				3493	REG("auxv", S_IRUSR, proc_auxv_operations),
				3494	ONE("status", S_IRUGO, proc_pid_status),
				3495	ONE("personality", S_IRUSR, proc_pid_personality),
				3496	ONE("limits", S_IRUGO, proc_pid_limits),
				3497	#ifdef CONFIG_SCHED_DEBUG
				3498	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				3499	#endif
				3500	NOD("comm", S_IFREG\|S_IRUGO\|S_IWUSR,
				3501	&proc_tid_comm_inode_operations,
				3502	&proc_pid_set_comm_operations, {}),
				3503	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				3504	ONE("syscall", S_IRUSR, proc_pid_syscall),
				3505	#endif
				3506	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
				3507	ONE("stat", S_IRUGO, proc_tid_stat),
				3508	ONE("statm", S_IRUGO, proc_pid_statm),
				3509	REG("maps", S_IRUGO, proc_pid_maps_operations),
				3510	#ifdef CONFIG_PROC_CHILDREN
				3511	REG("children", S_IRUGO, proc_tid_children_operations),
				3512	#endif
				3513	#ifdef CONFIG_NUMA
				3514	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
				3515	#endif
				3516	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				3517	LNK("cwd", proc_cwd_link),
				3518	LNK("root", proc_root_link),
				3519	LNK("exe", proc_exe_link),
				3520	REG("mounts", S_IRUGO, proc_mounts_operations),
				3521	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				3522	#ifdef CONFIG_PROC_PAGE_MONITOR
				3523	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				3524	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
				3525	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
				3526	REG("pagemap", S_IRUSR, proc_pagemap_operations),
				3527	#endif
				3528	#ifdef CONFIG_SECURITY
				3529	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				3530	#endif
				3531	#ifdef CONFIG_KALLSYMS
				3532	ONE("wchan", S_IRUGO, proc_pid_wchan),
				3533	#endif
				3534	#ifdef CONFIG_STACKTRACE
				3535	ONE("stack", S_IRUSR, proc_pid_stack),
				3536	#endif
				3537	#ifdef CONFIG_SCHED_INFO
				3538	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
				3539	#endif
				3540	#ifdef CONFIG_LATENCYTOP
				3541	REG("latency", S_IRUGO, proc_lstats_operations),
				3542	#endif
				3543	#ifdef CONFIG_PROC_PID_CPUSET
				3544	ONE("cpuset", S_IRUGO, proc_cpuset_show),
				3545	#endif
				3546	#ifdef CONFIG_CGROUPS
				3547	ONE("cgroup", S_IRUGO, proc_cgroup_show),
				3548	#endif
				3549	ONE("oom_score", S_IRUGO, proc_oom_score),
				3550	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
				3551	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3552	#ifdef CONFIG_AUDIT
				3553	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3554	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3555	#endif
				3556	#ifdef CONFIG_FAULT_INJECTION
				3557	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3558	REG("fail-nth", 0644, proc_fail_nth_operations),
				3559	#endif
				3560	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3561	ONE("io", S_IRUSR, proc_tid_io_accounting),
				3562	#endif
				3563	#ifdef CONFIG_USER_NS
				3564	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
				3565	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
				3566	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
				3567	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
				3568	#endif
				3569	#ifdef CONFIG_LIVEPATCH
				3570	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
				3571	#endif
				3572	#ifdef CONFIG_PROC_PID_ARCH_STATUS
				3573	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
				3574	#endif
				3575	#ifdef CONFIG_CPU_FREQ_TIMES
				3576	ONE("time_in_state", 0444, proc_time_in_state_show),
				3577	#endif
				3578	};
				3579
				3580	static int proc_tid_base_readdir(struct file file, struct dir_context ctx)
				3581	{
				3582	return proc_pident_readdir(file, ctx,
				3583	tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3584	}
				3585
				3586	static struct dentry proc_tid_base_lookup(struct inode dir, struct dentry *dentry, unsigned int flags)
				3587	{
				3588	return proc_pident_lookup(dir, dentry,
				3589	tid_base_stuff,
				3590	tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
				3591	}
				3592
				3593	static const struct file_operations proc_tid_base_operations = {
				3594	.read = generic_read_dir,
				3595	.iterate_shared = proc_tid_base_readdir,
				3596	.llseek = generic_file_llseek,
				3597	};
				3598
				3599	static const struct inode_operations proc_tid_base_inode_operations = {
				3600	.lookup = proc_tid_base_lookup,
				3601	.getattr = pid_getattr,
				3602	.setattr = proc_setattr,
				3603	};
				3604
				3605	static struct dentry proc_task_instantiate(struct dentry dentry,
				3606	struct task_struct task, const void ptr)
				3607	{
				3608	struct inode *inode;
				3609	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR \| S_IRUGO \| S_IXUGO);
				3610	if (!inode)
				3611	return ERR_PTR(-ENOENT);
				3612
				3613	inode->i_op = &proc_tid_base_inode_operations;
				3614	inode->i_fop = &proc_tid_base_operations;
				3615	inode->i_flags \|= S_IMMUTABLE;
				3616
				3617	set_nlink(inode, nlink_tid);
				3618	pid_update_inode(task, inode);
				3619
				3620	d_set_d_op(dentry, &pid_dentry_operations);
				3621	return d_splice_alias(inode, dentry);
				3622	}
				3623
				3624	static struct dentry proc_task_lookup(struct inode dir, struct dentry * dentry, unsigned int flags)
				3625	{
				3626	struct task_struct *task;
				3627	struct task_struct *leader = get_proc_task(dir);
				3628	unsigned tid;
				3629	struct pid_namespace *ns;
				3630	struct dentry *result = ERR_PTR(-ENOENT);
				3631
				3632	if (!leader)
				3633	goto out_no_task;
				3634
				3635	tid = name_to_int(&dentry->d_name);
				3636	if (tid == ~0U)
				3637	goto out;
				3638
				3639	ns = dentry->d_sb->s_fs_info;
				3640	rcu_read_lock();
				3641	task = find_task_by_pid_ns(tid, ns);
				3642	if (task)
				3643	get_task_struct(task);
				3644	rcu_read_unlock();
				3645	if (!task)
				3646	goto out;
				3647	if (!same_thread_group(leader, task))
				3648	goto out_drop_task;
				3649
				3650	result = proc_task_instantiate(dentry, task, NULL);
				3651	out_drop_task:
				3652	put_task_struct(task);
				3653	out:
				3654	put_task_struct(leader);
				3655	out_no_task:
				3656	return result;
				3657	}
				3658
				3659	/*
				3660	* Find the first tid of a thread group to return to user space.
				3661	*
				3662	* Usually this is just the thread group leader, but if the users
				3663	* buffer was too small or there was a seek into the middle of the
				3664	* directory we have more work todo.
				3665	*
				3666	* In the case of a short read we start with find_task_by_pid.
				3667	*
				3668	* In the case of a seek we start with the leader and walk nr
				3669	* threads past it.
				3670	*/
				3671	static struct task_struct first_tid(struct pid pid, int tid, loff_t f_pos,
				3672	struct pid_namespace *ns)
				3673	{
				3674	struct task_struct pos, task;
				3675	unsigned long nr = f_pos;
				3676
				3677	if (nr != f_pos) /* 32bit overflow? */
				3678	return NULL;
				3679
				3680	rcu_read_lock();
				3681	task = pid_task(pid, PIDTYPE_PID);
				3682	if (!task)
				3683	goto fail;
				3684
				3685	/* Attempt to start with the tid of a thread */
				3686	if (tid && nr) {
				3687	pos = find_task_by_pid_ns(tid, ns);
				3688	if (pos && same_thread_group(pos, task))
				3689	goto found;
				3690	}
				3691
				3692	/* If nr exceeds the number of threads there is nothing todo */
				3693	if (nr >= get_nr_threads(task))
				3694	goto fail;
				3695
				3696	/* If we haven't found our starting place yet start
				3697	* with the leader and walk nr threads forward.
				3698	*/
				3699	pos = task = task->group_leader;
				3700	do {
				3701	if (!nr--)
				3702	goto found;
				3703	} while_each_thread(task, pos);
				3704	fail:
				3705	pos = NULL;
				3706	goto out;
				3707	found:
				3708	get_task_struct(pos);
				3709	out:
				3710	rcu_read_unlock();
				3711	return pos;
				3712	}
				3713
				3714	/*
				3715	* Find the next thread in the thread list.
				3716	* Return NULL if there is an error or no next thread.
				3717	*
				3718	* The reference to the input task_struct is released.
				3719	*/
				3720	static struct task_struct next_tid(struct task_struct start)
				3721	{
				3722	struct task_struct *pos = NULL;
				3723	rcu_read_lock();
				3724	if (pid_alive(start)) {
				3725	pos = next_thread(start);
				3726	if (thread_group_leader(pos))
				3727	pos = NULL;
				3728	else
				3729	get_task_struct(pos);
				3730	}
				3731	rcu_read_unlock();
				3732	put_task_struct(start);
				3733	return pos;
				3734	}
				3735
				3736	/* for the /proc/TGID/task/ directories */
				3737	static int proc_task_readdir(struct file file, struct dir_context ctx)
				3738	{
				3739	struct inode *inode = file_inode(file);
				3740	struct task_struct *task;
				3741	struct pid_namespace *ns;
				3742	int tid;
				3743
				3744	if (proc_inode_is_dead(inode))
				3745	return -ENOENT;
				3746
				3747	if (!dir_emit_dots(file, ctx))
				3748	return 0;
				3749
				3750	/* f_version caches the tgid value that the last readdir call couldn't
				3751	* return. lseek aka telldir automagically resets f_version to 0.
				3752	*/
				3753	ns = proc_pid_ns(inode);
				3754	tid = (int)file->f_version;
				3755	file->f_version = 0;
				3756	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
				3757	task;
				3758	task = next_tid(task), ctx->pos++) {
				3759	char name[10 + 1];
				3760	unsigned int len;
				3761	tid = task_pid_nr_ns(task, ns);
				3762	len = snprintf(name, sizeof(name), "%u", tid);
				3763	if (!proc_fill_cache(file, ctx, name, len,
				3764	proc_task_instantiate, task, NULL)) {
				3765	/* returning this tgid failed, save it as the first
				3766	* pid for the next readir call */
				3767	file->f_version = (u64)tid;
				3768	put_task_struct(task);
				3769	break;
				3770	}
				3771	}
				3772
				3773	return 0;
				3774	}
				3775
				3776	static int proc_task_getattr(const struct path path, struct kstat stat,
				3777	u32 request_mask, unsigned int query_flags)
				3778	{
				3779	struct inode *inode = d_inode(path->dentry);
				3780	struct task_struct *p = get_proc_task(inode);
				3781	generic_fillattr(inode, stat);
				3782
				3783	if (p) {
				3784	stat->nlink += get_nr_threads(p);
				3785	put_task_struct(p);
				3786	}
				3787
				3788	return 0;
				3789	}
				3790
				3791	static const struct inode_operations proc_task_inode_operations = {
				3792	.lookup = proc_task_lookup,
				3793	.getattr = proc_task_getattr,
				3794	.setattr = proc_setattr,
				3795	.permission = proc_pid_permission,
				3796	};
				3797
				3798	static const struct file_operations proc_task_operations = {
				3799	.read = generic_read_dir,
				3800	.iterate_shared = proc_task_readdir,
				3801	.llseek = generic_file_llseek,
				3802	};
				3803
				3804	void __init set_proc_pid_nlink(void)
				3805	{
				3806	nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3807	nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3808	}