Blame - src/kernel/linux/v4.19/fs/proc/base.c - T800

blob: ebaad54f39951686f4c0ce7ec366bbfd1a5edec2 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/fs/proc/base.c
				4	*
				5	* Copyright (C) 1991, 1992 Linus Torvalds
				6	*
				7	* proc base directory handling functions
				8	*
				9	* 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
				10	* Instead of using magical inumbers to determine the kind of object
				11	* we allocate and fill in-core inodes upon lookup. They don't even
				12	* go into icache. We cache the reference to task_struct upon lookup too.
				13	* Eventually it should become a filesystem in its own. We don't use the
				14	* rest of procfs anymore.
				15	*
				16	*
				17	* Changelog:
				18	* 17-Jan-2005
				19	* Allan Bezerra
				20	* Bruna Moreira <bruna.moreira@indt.org.br>
				21	* Edjard Mota <edjard.mota@indt.org.br>
				22	* Ilias Biris <ilias.biris@indt.org.br>
				23	* Mauricio Lin <mauricio.lin@indt.org.br>
				24	*
				25	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				26	*
				27	* A new process specific entry (smaps) included in /proc. It shows the
				28	* size of rss for each memory area. The maps entry lacks information
				29	* about physical memory size (rss) for each mapped file, i.e.,
				30	* rss information for executables and library files.
				31	* This additional information is useful for any tools that need to know
				32	* about physical memory consumption for a process specific library.
				33	*
				34	* Changelog:
				35	* 21-Feb-2005
				36	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				37	* Pud inclusion in the page table walking.
				38	*
				39	* ChangeLog:
				40	* 10-Mar-2005
				41	* 10LE Instituto Nokia de Tecnologia - INdT:
				42	* A better way to walks through the page table as suggested by Hugh Dickins.
				43	*
				44	* Simo Piiroinen <simo.piiroinen@nokia.com>:
				45	* Smaps information related to shared, private, clean and dirty pages.
				46	*
				47	* Paul Mundt <paul.mundt@nokia.com>:
				48	* Overall revision about smaps.
				49	*/
				50
				51	#include <linux/uaccess.h>
				52
				53	#include <linux/errno.h>
				54	#include <linux/time.h>
				55	#include <linux/proc_fs.h>
				56	#include <linux/stat.h>
				57	#include <linux/task_io_accounting_ops.h>
				58	#include <linux/init.h>
				59	#include <linux/capability.h>
				60	#include <linux/file.h>
				61	#include <linux/fdtable.h>
				62	#include <linux/string.h>
				63	#include <linux/seq_file.h>
				64	#include <linux/namei.h>
				65	#include <linux/mnt_namespace.h>
				66	#include <linux/mm.h>
				67	#include <linux/swap.h>
				68	#include <linux/rcupdate.h>
				69	#include <linux/kallsyms.h>
				70	#include <linux/stacktrace.h>
				71	#include <linux/resource.h>
				72	#include <linux/module.h>
				73	#include <linux/mount.h>
				74	#include <linux/security.h>
				75	#include <linux/ptrace.h>
				76	#include <linux/tracehook.h>
				77	#include <linux/printk.h>
				78	#include <linux/cache.h>
				79	#include <linux/cgroup.h>
				80	#include <linux/cpuset.h>
				81	#include <linux/audit.h>
				82	#include <linux/poll.h>
				83	#include <linux/nsproxy.h>
				84	#include <linux/oom.h>
				85	#include <linux/elf.h>
				86	#include <linux/pid_namespace.h>
				87	#include <linux/user_namespace.h>
				88	#include <linux/fs_struct.h>
				89	#include <linux/slab.h>
				90	#include <linux/sched/autogroup.h>
				91	#include <linux/sched/mm.h>
				92	#include <linux/sched/coredump.h>
				93	#include <linux/sched/debug.h>
				94	#include <linux/sched/stat.h>
				95	#include <linux/flex_array.h>
				96	#include <linux/posix-timers.h>
				97	#include <linux/cpufreq_times.h>
				98	#include <trace/events/oom.h>
				99	#include "internal.h"
				100	#include "fd.h"
				101
				102	#include "../../lib/kstrtox.h"
				103
				104	/* NOTE:
				105	* Implementing inode permission operations in /proc is almost
				106	* certainly an error. Permission checks need to happen during
				107	* each system call not at open time. The reason is that most of
				108	* what we wish to check for permissions in /proc varies at runtime.
				109	*
				110	* The classic example of a problem is opening file descriptors
				111	* in /proc for a task before it execs a suid executable.
				112	*/
				113
				114	static u8 nlink_tid __ro_after_init;
				115	static u8 nlink_tgid __ro_after_init;
				116
				117	struct pid_entry {
				118	const char *name;
				119	unsigned int len;
				120	umode_t mode;
				121	const struct inode_operations *iop;
				122	const struct file_operations *fop;
				123	union proc_op op;
				124	};
				125
				126	#define NOD(NAME, MODE, IOP, FOP, OP) { \
				127	.name = (NAME), \
				128	.len = sizeof(NAME) - 1, \
				129	.mode = MODE, \
				130	.iop = IOP, \
				131	.fop = FOP, \
				132	.op = OP, \
				133	}
				134
				135	#define DIR(NAME, MODE, iops, fops) \
				136	NOD(NAME, (S_IFDIR\|(MODE)), &iops, &fops, {} )
				137	#define LNK(NAME, get_link) \
				138	NOD(NAME, (S_IFLNK\|S_IRWXUGO), \
				139	&proc_pid_link_inode_operations, NULL, \
				140	{ .proc_get_link = get_link } )
				141	#define REG(NAME, MODE, fops) \
				142	NOD(NAME, (S_IFREG\|(MODE)), NULL, &fops, {})
				143	#define ONE(NAME, MODE, show) \
				144	NOD(NAME, (S_IFREG\|(MODE)), \
				145	NULL, &proc_single_file_operations, \
				146	{ .proc_show = show } )
				147
				148	/*
				149	* Count the number of hardlinks for the pid_entry table, excluding the .
				150	* and .. links.
				151	*/
				152	static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
				153	unsigned int n)
				154	{
				155	unsigned int i;
				156	unsigned int count;
				157
				158	count = 2;
				159	for (i = 0; i < n; ++i) {
				160	if (S_ISDIR(entries[i].mode))
				161	++count;
				162	}
				163
				164	return count;
				165	}
				166
				167	static int get_task_root(struct task_struct task, struct path root)
				168	{
				169	int result = -ENOENT;
				170
				171	task_lock(task);
				172	if (task->fs) {
				173	get_fs_root(task->fs, root);
				174	result = 0;
				175	}
				176	task_unlock(task);
				177	return result;
				178	}
				179
				180	static int proc_cwd_link(struct dentry dentry, struct path path)
				181	{
				182	struct task_struct *task = get_proc_task(d_inode(dentry));
				183	int result = -ENOENT;
				184
				185	if (task) {
				186	task_lock(task);
				187	if (task->fs) {
				188	get_fs_pwd(task->fs, path);
				189	result = 0;
				190	}
				191	task_unlock(task);
				192	put_task_struct(task);
				193	}
				194	return result;
				195	}
				196
				197	static int proc_root_link(struct dentry dentry, struct path path)
				198	{
				199	struct task_struct *task = get_proc_task(d_inode(dentry));
				200	int result = -ENOENT;
				201
				202	if (task) {
				203	result = get_task_root(task, path);
				204	put_task_struct(task);
				205	}
				206	return result;
				207	}
				208
				209	/*
				210	* If the user used setproctitle(), we just get the string from
				211	* user space at arg_start, and limit it to a maximum of one page.
				212	*/
				213	static ssize_t get_mm_proctitle(struct mm_struct mm, char __user buf,
				214	size_t count, unsigned long pos,
				215	unsigned long arg_start)
				216	{
				217	char *page;
				218	int ret, got;
				219
				220	if (pos >= PAGE_SIZE)
				221	return 0;
				222
				223	page = (char *)__get_free_page(GFP_KERNEL);
				224	if (!page)
				225	return -ENOMEM;
				226
				227	ret = 0;
				228	got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
				229	if (got > 0) {
				230	int len = strnlen(page, got);
				231
				232	/* Include the NUL character if it was found */
				233	if (len < got)
				234	len++;
				235
				236	if (len > pos) {
				237	len -= pos;
				238	if (len > count)
				239	len = count;
				240	len -= copy_to_user(buf, page+pos, len);
				241	if (!len)
				242	len = -EFAULT;
				243	ret = len;
				244	}
				245	}
				246	free_page((unsigned long)page);
				247	return ret;
				248	}
				249
				250	static ssize_t get_mm_cmdline(struct mm_struct mm, char __user buf,
				251	size_t count, loff_t *ppos)
				252	{
				253	unsigned long arg_start, arg_end, env_start, env_end;
				254	unsigned long pos, len;
				255	char *page, c;
				256
				257	/* Check if process spawned far enough to have cmdline. */
				258	if (!mm->env_end)
				259	return 0;
				260
				261	spin_lock(&mm->arg_lock);
				262	arg_start = mm->arg_start;
				263	arg_end = mm->arg_end;
				264	env_start = mm->env_start;
				265	env_end = mm->env_end;
				266	spin_unlock(&mm->arg_lock);
				267
				268	if (arg_start >= arg_end)
				269	return 0;
				270
				271	/*
				272	* We allow setproctitle() to overwrite the argument
				273	* strings, and overflow past the original end. But
				274	* only when it overflows into the environment area.
				275	*/
				276	if (env_start != arg_end \|\| env_end < env_start)
				277	env_start = env_end = arg_end;
				278	len = env_end - arg_start;
				279
				280	/* We're not going to care if "ppos" has high bits set /
				281	pos = *ppos;
				282	if (pos >= len)
				283	return 0;
				284	if (count > len - pos)
				285	count = len - pos;
				286	if (!count)
				287	return 0;
				288
				289	/*
				290	* Magical special case: if the argv[] end byte is not
				291	* zero, the user has overwritten it with setproctitle(3).
				292	*
				293	* Possible future enhancement: do this only once when
				294	* pos is 0, and set a flag in the 'struct file'.
				295	*/
				296	if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
				297	return get_mm_proctitle(mm, buf, count, pos, arg_start);
				298
				299	/*
				300	* For the non-setproctitle() case we limit things strictly
				301	* to the [arg_start, arg_end[ range.
				302	*/
				303	pos += arg_start;
				304	if (pos < arg_start \|\| pos >= arg_end)
				305	return 0;
				306	if (count > arg_end - pos)
				307	count = arg_end - pos;
				308
				309	page = (char *)__get_free_page(GFP_KERNEL);
				310	if (!page)
				311	return -ENOMEM;
				312
				313	len = 0;
				314	while (count) {
				315	int got;
				316	size_t size = min_t(size_t, PAGE_SIZE, count);
				317
				318	got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
				319	if (got <= 0)
				320	break;
				321	got -= copy_to_user(buf, page, got);
				322	if (unlikely(!got)) {
				323	if (!len)
				324	len = -EFAULT;
				325	break;
				326	}
				327	pos += got;
				328	buf += got;
				329	len += got;
				330	count -= got;
				331	}
				332
				333	free_page((unsigned long)page);
				334	return len;
				335	}
				336
				337	static ssize_t get_task_cmdline(struct task_struct tsk, char __user buf,
				338	size_t count, loff_t *pos)
				339	{
				340	struct mm_struct *mm;
				341	ssize_t ret;
				342
				343	mm = get_task_mm(tsk);
				344	if (!mm)
				345	return 0;
				346
				347	ret = get_mm_cmdline(mm, buf, count, pos);
				348	mmput(mm);
				349	return ret;
				350	}
				351
				352	static ssize_t proc_pid_cmdline_read(struct file file, char __user buf,
				353	size_t count, loff_t *pos)
				354	{
				355	struct task_struct *tsk;
				356	ssize_t ret;
				357
				358	BUG_ON(*pos < 0);
				359
				360	tsk = get_proc_task(file_inode(file));
				361	if (!tsk)
				362	return -ESRCH;
				363	ret = get_task_cmdline(tsk, buf, count, pos);
				364	put_task_struct(tsk);
				365	if (ret > 0)
				366	*pos += ret;
				367	return ret;
				368	}
				369
				370	static const struct file_operations proc_pid_cmdline_ops = {
				371	.read = proc_pid_cmdline_read,
				372	.llseek = generic_file_llseek,
				373	};
				374
				375	#ifdef CONFIG_KALLSYMS
				376	/*
				377	* Provides a wchan file via kallsyms in a proper one-value-per-file format.
				378	* Returns the resolved symbol. If that fails, simply return the address.
				379	*/
				380	static int proc_pid_wchan(struct seq_file m, struct pid_namespace ns,
				381	struct pid pid, struct task_struct task)
				382	{
				383	unsigned long wchan;
				384	char symname[KSYM_NAME_LEN];
				385
				386	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				387	goto print0;
				388
				389	wchan = get_wchan(task);
				390	if (wchan && !lookup_symbol_name(wchan, symname)) {
				391	seq_puts(m, symname);
				392	return 0;
				393	}
				394
				395	print0:
				396	seq_putc(m, '0');
				397	return 0;
				398	}
				399	#endif /* CONFIG_KALLSYMS */
				400
				401	static int lock_trace(struct task_struct *task)
				402	{
				403	int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
				404	if (err)
				405	return err;
				406	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
				407	mutex_unlock(&task->signal->cred_guard_mutex);
				408	return -EPERM;
				409	}
				410	return 0;
				411	}
				412
				413	static void unlock_trace(struct task_struct *task)
				414	{
				415	mutex_unlock(&task->signal->cred_guard_mutex);
				416	}
				417
				418	#ifdef CONFIG_STACKTRACE
				419
				420	#define MAX_STACK_TRACE_DEPTH 64
				421
				422	static int proc_pid_stack(struct seq_file m, struct pid_namespace ns,
				423	struct pid pid, struct task_struct task)
				424	{
				425	struct stack_trace trace;
				426	unsigned long *entries;
				427	int err;
				428
				429	/*
				430	* The ability to racily run the kernel stack unwinder on a running task
				431	* and then observe the unwinder output is scary; while it is useful for
				432	* debugging kernel issues, it can also allow an attacker to leak kernel
				433	* stack contents.
				434	* Doing this in a manner that is at least safe from races would require
				435	* some work to ensure that the remote task can not be scheduled; and
				436	* even then, this would still expose the unwinder as local attack
				437	* surface.
				438	* Therefore, this interface is restricted to root.
				439	*/
				440	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
				441	return -EACCES;
				442
				443	entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
				444	GFP_KERNEL);
				445	if (!entries)
				446	return -ENOMEM;
				447
				448	trace.nr_entries = 0;
				449	trace.max_entries = MAX_STACK_TRACE_DEPTH;
				450	trace.entries = entries;
				451	trace.skip = 0;
				452
				453	err = lock_trace(task);
				454	if (!err) {
				455	unsigned int i;
				456
				457	save_stack_trace_tsk(task, &trace);
				458
				459	for (i = 0; i < trace.nr_entries; i++) {
				460	seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
				461	}
				462	unlock_trace(task);
				463	}
				464	kfree(entries);
				465
				466	return err;
				467	}
				468	#endif
				469
				470	#ifdef CONFIG_SCHED_INFO
				471	/*
				472	* Provides /proc/PID/schedstat
				473	*/
				474	static int proc_pid_schedstat(struct seq_file m, struct pid_namespace ns,
				475	struct pid pid, struct task_struct task)
				476	{
				477	if (unlikely(!sched_info_on()))
				478	seq_printf(m, "0 0 0\n");
				479	else
				480	seq_printf(m, "%llu %llu %lu\n",
				481	(unsigned long long)task->se.sum_exec_runtime,
				482	(unsigned long long)task->sched_info.run_delay,
				483	task->sched_info.pcount);
				484
				485	return 0;
				486	}
				487	#endif
				488
				489	#ifdef CONFIG_LATENCYTOP
				490	static int lstats_show_proc(struct seq_file m, void v)
				491	{
				492	int i;
				493	struct inode *inode = m->private;
				494	struct task_struct *task = get_proc_task(inode);
				495
				496	if (!task)
				497	return -ESRCH;
				498	seq_puts(m, "Latency Top version : v0.1\n");
				499	for (i = 0; i < LT_SAVECOUNT; i++) {
				500	struct latency_record *lr = &task->latency_record[i];
				501	if (lr->backtrace[0]) {
				502	int q;
				503	seq_printf(m, "%i %li %li",
				504	lr->count, lr->time, lr->max);
				505	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
				506	unsigned long bt = lr->backtrace[q];
				507	if (!bt)
				508	break;
				509	if (bt == ULONG_MAX)
				510	break;
				511	seq_printf(m, " %ps", (void *)bt);
				512	}
				513	seq_putc(m, '\n');
				514	}
				515
				516	}
				517	put_task_struct(task);
				518	return 0;
				519	}
				520
				521	static int lstats_open(struct inode inode, struct file file)
				522	{
				523	return single_open(file, lstats_show_proc, inode);
				524	}
				525
				526	static ssize_t lstats_write(struct file file, const char __user buf,
				527	size_t count, loff_t *offs)
				528	{
				529	struct task_struct *task = get_proc_task(file_inode(file));
				530
				531	if (!task)
				532	return -ESRCH;
				533	clear_all_latency_tracing(task);
				534	put_task_struct(task);
				535
				536	return count;
				537	}
				538
				539	static const struct file_operations proc_lstats_operations = {
				540	.open = lstats_open,
				541	.read = seq_read,
				542	.write = lstats_write,
				543	.llseek = seq_lseek,
				544	.release = single_release,
				545	};
				546
				547	#endif
				548
				549	static int proc_oom_score(struct seq_file m, struct pid_namespace ns,
				550	struct pid pid, struct task_struct task)
				551	{
				552	unsigned long totalpages = totalram_pages + total_swap_pages;
				553	unsigned long points = 0;
				554
				555	points = oom_badness(task, NULL, NULL, totalpages) *
				556	1000 / totalpages;
				557	seq_printf(m, "%lu\n", points);
				558
				559	return 0;
				560	}
				561
				562	struct limit_names {
				563	const char *name;
				564	const char *unit;
				565	};
				566
				567	static const struct limit_names lnames[RLIM_NLIMITS] = {
				568	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
				569	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
				570	[RLIMIT_DATA] = {"Max data size", "bytes"},
				571	[RLIMIT_STACK] = {"Max stack size", "bytes"},
				572	[RLIMIT_CORE] = {"Max core file size", "bytes"},
				573	[RLIMIT_RSS] = {"Max resident set", "bytes"},
				574	[RLIMIT_NPROC] = {"Max processes", "processes"},
				575	[RLIMIT_NOFILE] = {"Max open files", "files"},
				576	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
				577	[RLIMIT_AS] = {"Max address space", "bytes"},
				578	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
				579	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
				580	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
				581	[RLIMIT_NICE] = {"Max nice priority", NULL},
				582	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
				583	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
				584	};
				585
				586	/* Display limits for a process */
				587	static int proc_pid_limits(struct seq_file m, struct pid_namespace ns,
				588	struct pid pid, struct task_struct task)
				589	{
				590	unsigned int i;
				591	unsigned long flags;
				592
				593	struct rlimit rlim[RLIM_NLIMITS];
				594
				595	if (!lock_task_sighand(task, &flags))
				596	return 0;
				597	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
				598	unlock_task_sighand(task, &flags);
				599
				600	/*
				601	* print the file header
				602	*/
				603	seq_printf(m, "%-25s %-20s %-20s %-10s\n",
				604	"Limit", "Soft Limit", "Hard Limit", "Units");
				605
				606	for (i = 0; i < RLIM_NLIMITS; i++) {
				607	if (rlim[i].rlim_cur == RLIM_INFINITY)
				608	seq_printf(m, "%-25s %-20s ",
				609	lnames[i].name, "unlimited");
				610	else
				611	seq_printf(m, "%-25s %-20lu ",
				612	lnames[i].name, rlim[i].rlim_cur);
				613
				614	if (rlim[i].rlim_max == RLIM_INFINITY)
				615	seq_printf(m, "%-20s ", "unlimited");
				616	else
				617	seq_printf(m, "%-20lu ", rlim[i].rlim_max);
				618
				619	if (lnames[i].unit)
				620	seq_printf(m, "%-10s\n", lnames[i].unit);
				621	else
				622	seq_putc(m, '\n');
				623	}
				624
				625	return 0;
				626	}
				627
				628	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				629	static int proc_pid_syscall(struct seq_file m, struct pid_namespace ns,
				630	struct pid pid, struct task_struct task)
				631	{
				632	long nr;
				633	unsigned long args[6], sp, pc;
				634	int res;
				635
				636	res = lock_trace(task);
				637	if (res)
				638	return res;
				639
				640	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
				641	seq_puts(m, "running\n");
				642	else if (nr < 0)
				643	seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
				644	else
				645	seq_printf(m,
				646	"%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
				647	nr,
				648	args[0], args[1], args[2], args[3], args[4], args[5],
				649	sp, pc);
				650	unlock_trace(task);
				651
				652	return 0;
				653	}
				654	#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
				655
				656	/************************************************************************/
				657	/* Here the fs part begins */
				658	/************************************************************************/
				659
				660	/* permission checks */
				661	static int proc_fd_access_allowed(struct inode *inode)
				662	{
				663	struct task_struct *task;
				664	int allowed = 0;
				665	/* Allow access to a task's file descriptors if it is us or we
				666	* may use ptrace attach to the process and find out that
				667	* information.
				668	*/
				669	task = get_proc_task(inode);
				670	if (task) {
				671	allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
				672	put_task_struct(task);
				673	}
				674	return allowed;
				675	}
				676
				677	int proc_setattr(struct dentry dentry, struct iattr attr)
				678	{
				679	int error;
				680	struct inode *inode = d_inode(dentry);
				681
				682	if (attr->ia_valid & ATTR_MODE)
				683	return -EPERM;
				684
				685	error = setattr_prepare(dentry, attr);
				686	if (error)
				687	return error;
				688
				689	setattr_copy(inode, attr);
				690	mark_inode_dirty(inode);
				691	return 0;
				692	}
				693
				694	/*
				695	* May current process learn task's sched/cmdline info (for hide_pid_min=1)
				696	* or euid/egid (for hide_pid_min=2)?
				697	*/
				698	static bool has_pid_permissions(struct pid_namespace *pid,
				699	struct task_struct *task,
				700	int hide_pid_min)
				701	{
				702	if (pid->hide_pid < hide_pid_min)
				703	return true;
				704	if (in_group_p(pid->pid_gid))
				705	return true;
				706	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
				707	}
				708
				709
				710	static int proc_pid_permission(struct inode *inode, int mask)
				711	{
				712	struct pid_namespace *pid = proc_pid_ns(inode);
				713	struct task_struct *task;
				714	bool has_perms;
				715
				716	task = get_proc_task(inode);
				717	if (!task)
				718	return -ESRCH;
				719	has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
				720	put_task_struct(task);
				721
				722	if (!has_perms) {
				723	if (pid->hide_pid == HIDEPID_INVISIBLE) {
				724	/*
				725	* Let's make getdents(), stat(), and open()
				726	* consistent with each other. If a process
				727	* may not stat() a file, it shouldn't be seen
				728	* in procfs at all.
				729	*/
				730	return -ENOENT;
				731	}
				732
				733	return -EPERM;
				734	}
				735	return generic_permission(inode, mask);
				736	}
				737
				738
				739
				740	static const struct inode_operations proc_def_inode_operations = {
				741	.setattr = proc_setattr,
				742	};
				743
				744	static int proc_single_show(struct seq_file m, void v)
				745	{
				746	struct inode *inode = m->private;
				747	struct pid_namespace *ns = proc_pid_ns(inode);
				748	struct pid *pid = proc_pid(inode);
				749	struct task_struct *task;
				750	int ret;
				751
				752	task = get_pid_task(pid, PIDTYPE_PID);
				753	if (!task)
				754	return -ESRCH;
				755
				756	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
				757
				758	put_task_struct(task);
				759	return ret;
				760	}
				761
				762	static int proc_single_open(struct inode inode, struct file filp)
				763	{
				764	return single_open(filp, proc_single_show, inode);
				765	}
				766
				767	static const struct file_operations proc_single_file_operations = {
				768	.open = proc_single_open,
				769	.read = seq_read,
				770	.llseek = seq_lseek,
				771	.release = single_release,
				772	};
				773
				774
				775	struct mm_struct proc_mem_open(struct inode inode, unsigned int mode)
				776	{
				777	struct task_struct *task = get_proc_task(inode);
				778	struct mm_struct *mm = ERR_PTR(-ESRCH);
				779
				780	if (task) {
				781	mm = mm_access(task, mode \| PTRACE_MODE_FSCREDS);
				782	put_task_struct(task);
				783
				784	if (!IS_ERR_OR_NULL(mm)) {
				785	/* ensure this mm_struct can't be freed */
				786	mmgrab(mm);
				787	/* but do not pin its memory */
				788	mmput(mm);
				789	}
				790	}
				791
				792	return mm;
				793	}
				794
				795	static int __mem_open(struct inode inode, struct file file, unsigned int mode)
				796	{
				797	struct mm_struct *mm = proc_mem_open(inode, mode);
				798
				799	if (IS_ERR(mm))
				800	return PTR_ERR(mm);
				801
				802	file->private_data = mm;
				803	return 0;
				804	}
				805
				806	static int mem_open(struct inode inode, struct file file)
				807	{
				808	int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
				809
				810	/* OK to pass negative loff_t, we can catch out-of-range */
				811	file->f_mode \|= FMODE_UNSIGNED_OFFSET;
				812
				813	return ret;
				814	}
				815
				816	static ssize_t mem_rw(struct file file, char __user buf,
				817	size_t count, loff_t *ppos, int write)
				818	{
				819	struct mm_struct *mm = file->private_data;
				820	unsigned long addr = *ppos;
				821	ssize_t copied;
				822	char *page;
				823	unsigned int flags;
				824
				825	if (!mm)
				826	return 0;
				827
				828	page = (char *)__get_free_page(GFP_KERNEL);
				829	if (!page)
				830	return -ENOMEM;
				831
				832	copied = 0;
				833	if (!mmget_not_zero(mm))
				834	goto free;
				835
				836	flags = FOLL_FORCE \| (write ? FOLL_WRITE : 0);
				837
				838	while (count > 0) {
				839	int this_len = min_t(int, count, PAGE_SIZE);
				840
				841	if (write && copy_from_user(page, buf, this_len)) {
				842	copied = -EFAULT;
				843	break;
				844	}
				845
				846	this_len = access_remote_vm(mm, addr, page, this_len, flags);
				847	if (!this_len) {
				848	if (!copied)
				849	copied = -EIO;
				850	break;
				851	}
				852
				853	if (!write && copy_to_user(buf, page, this_len)) {
				854	copied = -EFAULT;
				855	break;
				856	}
				857
				858	buf += this_len;
				859	addr += this_len;
				860	copied += this_len;
				861	count -= this_len;
				862	}
				863	*ppos = addr;
				864
				865	mmput(mm);
				866	free:
				867	free_page((unsigned long) page);
				868	return copied;
				869	}
				870
				871	static ssize_t mem_read(struct file file, char __user buf,
				872	size_t count, loff_t *ppos)
				873	{
				874	return mem_rw(file, buf, count, ppos, 0);
				875	}
				876
				877	static ssize_t mem_write(struct file file, const char __user buf,
				878	size_t count, loff_t *ppos)
				879	{
				880	return mem_rw(file, (char __user*)buf, count, ppos, 1);
				881	}
				882
				883	loff_t mem_lseek(struct file *file, loff_t offset, int orig)
				884	{
				885	switch (orig) {
				886	case 0:
				887	file->f_pos = offset;
				888	break;
				889	case 1:
				890	file->f_pos += offset;
				891	break;
				892	default:
				893	return -EINVAL;
				894	}
				895	force_successful_syscall_return();
				896	return file->f_pos;
				897	}
				898
				899	static int mem_release(struct inode inode, struct file file)
				900	{
				901	struct mm_struct *mm = file->private_data;
				902	if (mm)
				903	mmdrop(mm);
				904	return 0;
				905	}
				906
				907	static const struct file_operations proc_mem_operations = {
				908	.llseek = mem_lseek,
				909	.read = mem_read,
				910	.write = mem_write,
				911	.open = mem_open,
				912	.release = mem_release,
				913	};
				914
				915	static int environ_open(struct inode inode, struct file file)
				916	{
				917	return __mem_open(inode, file, PTRACE_MODE_READ);
				918	}
				919
				920	static ssize_t environ_read(struct file file, char __user buf,
				921	size_t count, loff_t *ppos)
				922	{
				923	char *page;
				924	unsigned long src = *ppos;
				925	int ret = 0;
				926	struct mm_struct *mm = file->private_data;
				927	unsigned long env_start, env_end;
				928
				929	/* Ensure the process spawned far enough to have an environment. */
				930	if (!mm \|\| !mm->env_end)
				931	return 0;
				932
				933	page = (char *)__get_free_page(GFP_KERNEL);
				934	if (!page)
				935	return -ENOMEM;
				936
				937	ret = 0;
				938	if (!mmget_not_zero(mm))
				939	goto free;
				940
				941	spin_lock(&mm->arg_lock);
				942	env_start = mm->env_start;
				943	env_end = mm->env_end;
				944	spin_unlock(&mm->arg_lock);
				945
				946	while (count > 0) {
				947	size_t this_len, max_len;
				948	int retval;
				949
				950	if (src >= (env_end - env_start))
				951	break;
				952
				953	this_len = env_end - (env_start + src);
				954
				955	max_len = min_t(size_t, PAGE_SIZE, count);
				956	this_len = min(max_len, this_len);
				957
				958	retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
				959
				960	if (retval <= 0) {
				961	ret = retval;
				962	break;
				963	}
				964
				965	if (copy_to_user(buf, page, retval)) {
				966	ret = -EFAULT;
				967	break;
				968	}
				969
				970	ret += retval;
				971	src += retval;
				972	buf += retval;
				973	count -= retval;
				974	}
				975	*ppos = src;
				976	mmput(mm);
				977
				978	free:
				979	free_page((unsigned long) page);
				980	return ret;
				981	}
				982
				983	static const struct file_operations proc_environ_operations = {
				984	.open = environ_open,
				985	.read = environ_read,
				986	.llseek = generic_file_llseek,
				987	.release = mem_release,
				988	};
				989
				990	static int auxv_open(struct inode inode, struct file file)
				991	{
				992	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
				993	}
				994
				995	static ssize_t auxv_read(struct file file, char __user buf,
				996	size_t count, loff_t *ppos)
				997	{
				998	struct mm_struct *mm = file->private_data;
				999	unsigned int nwords = 0;
				1000
				1001	if (!mm)
				1002	return 0;
				1003	do {
				1004	nwords += 2;
				1005	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
				1006	return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
				1007	nwords * sizeof(mm->saved_auxv[0]));
				1008	}
				1009
				1010	static const struct file_operations proc_auxv_operations = {
				1011	.open = auxv_open,
				1012	.read = auxv_read,
				1013	.llseek = generic_file_llseek,
				1014	.release = mem_release,
				1015	};
				1016
				1017	static ssize_t oom_adj_read(struct file file, char __user buf, size_t count,
				1018	loff_t *ppos)
				1019	{
				1020	struct task_struct *task = get_proc_task(file_inode(file));
				1021	char buffer[PROC_NUMBUF];
				1022	int oom_adj = OOM_ADJUST_MIN;
				1023	size_t len;
				1024
				1025	if (!task)
				1026	return -ESRCH;
				1027	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
				1028	oom_adj = OOM_ADJUST_MAX;
				1029	else
				1030	oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
				1031	OOM_SCORE_ADJ_MAX;
				1032	put_task_struct(task);
				1033	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
				1034	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1035	}
				1036
				1037	static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
				1038	{
				1039	static DEFINE_MUTEX(oom_adj_mutex);
				1040	struct mm_struct *mm = NULL;
				1041	struct task_struct *task;
				1042	int err = 0;
				1043
				1044	task = get_proc_task(file_inode(file));
				1045	if (!task)
				1046	return -ESRCH;
				1047
				1048	mutex_lock(&oom_adj_mutex);
				1049	if (legacy) {
				1050	if (oom_adj < task->signal->oom_score_adj &&
				1051	!capable(CAP_SYS_RESOURCE)) {
				1052	err = -EACCES;
				1053	goto err_unlock;
				1054	}
				1055	/*
				1056	* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
				1057	* /proc/pid/oom_score_adj instead.
				1058	*/
				1059	pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
				1060	current->comm, task_pid_nr(current), task_pid_nr(task),
				1061	task_pid_nr(task));
				1062	} else {
				1063	if ((short)oom_adj < task->signal->oom_score_adj_min &&
				1064	!capable(CAP_SYS_RESOURCE)) {
				1065	err = -EACCES;
				1066	goto err_unlock;
				1067	}
				1068	}
				1069
				1070	/*
				1071	* Make sure we will check other processes sharing the mm if this is
				1072	* not vfrok which wants its own oom_score_adj.
				1073	* pin the mm so it doesn't go away and get reused after task_unlock
				1074	*/
				1075	if (!task->vfork_done) {
				1076	struct task_struct *p = find_lock_task_mm(task);
				1077
				1078	if (p) {
				1079	if (atomic_read(&p->mm->mm_users) > 1) {
				1080	mm = p->mm;
				1081	mmgrab(mm);
				1082	}
				1083	task_unlock(p);
				1084	}
				1085	}
				1086
				1087	task->signal->oom_score_adj = oom_adj;
				1088	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1089	task->signal->oom_score_adj_min = (short)oom_adj;
				1090	trace_oom_score_adj_update(task);
				1091
				1092	if (mm) {
				1093	struct task_struct *p;
				1094
				1095	rcu_read_lock();
				1096	for_each_process(p) {
				1097	if (same_thread_group(task, p))
				1098	continue;
				1099
				1100	/* do not touch kernel threads or the global init */
				1101	if (p->flags & PF_KTHREAD \|\| is_global_init(p))
				1102	continue;
				1103
				1104	task_lock(p);
				1105	if (!p->vfork_done && process_shares_mm(p, mm)) {
				1106	p->signal->oom_score_adj = oom_adj;
				1107	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1108	p->signal->oom_score_adj_min = (short)oom_adj;
				1109	}
				1110	task_unlock(p);
				1111	}
				1112	rcu_read_unlock();
				1113	mmdrop(mm);
				1114	}
				1115	err_unlock:
				1116	mutex_unlock(&oom_adj_mutex);
				1117	put_task_struct(task);
				1118	return err;
				1119	}
				1120
				1121	/*
				1122	* /proc/pid/oom_adj exists solely for backwards compatibility with previous
				1123	* kernels. The effective policy is defined by oom_score_adj, which has a
				1124	* different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
				1125	* Values written to oom_adj are simply mapped linearly to oom_score_adj.
				1126	* Processes that become oom disabled via oom_adj will still be oom disabled
				1127	* with this implementation.
				1128	*
				1129	* oom_adj cannot be removed since existing userspace binaries use it.
				1130	*/
				1131	static ssize_t oom_adj_write(struct file file, const char __user buf,
				1132	size_t count, loff_t *ppos)
				1133	{
				1134	char buffer[PROC_NUMBUF];
				1135	int oom_adj;
				1136	int err;
				1137
				1138	memset(buffer, 0, sizeof(buffer));
				1139	if (count > sizeof(buffer) - 1)
				1140	count = sizeof(buffer) - 1;
				1141	if (copy_from_user(buffer, buf, count)) {
				1142	err = -EFAULT;
				1143	goto out;
				1144	}
				1145
				1146	err = kstrtoint(strstrip(buffer), 0, &oom_adj);
				1147	if (err)
				1148	goto out;
				1149	if ((oom_adj < OOM_ADJUST_MIN \|\| oom_adj > OOM_ADJUST_MAX) &&
				1150	oom_adj != OOM_DISABLE) {
				1151	err = -EINVAL;
				1152	goto out;
				1153	}
				1154
				1155	/*
				1156	* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
				1157	* value is always attainable.
				1158	*/
				1159	if (oom_adj == OOM_ADJUST_MAX)
				1160	oom_adj = OOM_SCORE_ADJ_MAX;
				1161	else
				1162	oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
				1163
				1164	err = __set_oom_adj(file, oom_adj, true);
				1165	out:
				1166	return err < 0 ? err : count;
				1167	}
				1168
				1169	static const struct file_operations proc_oom_adj_operations = {
				1170	.read = oom_adj_read,
				1171	.write = oom_adj_write,
				1172	.llseek = generic_file_llseek,
				1173	};
				1174
				1175	static ssize_t oom_score_adj_read(struct file file, char __user buf,
				1176	size_t count, loff_t *ppos)
				1177	{
				1178	struct task_struct *task = get_proc_task(file_inode(file));
				1179	char buffer[PROC_NUMBUF];
				1180	short oom_score_adj = OOM_SCORE_ADJ_MIN;
				1181	size_t len;
				1182
				1183	if (!task)
				1184	return -ESRCH;
				1185	oom_score_adj = task->signal->oom_score_adj;
				1186	put_task_struct(task);
				1187	len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
				1188	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1189	}
				1190
				1191	static ssize_t oom_score_adj_write(struct file file, const char __user buf,
				1192	size_t count, loff_t *ppos)
				1193	{
				1194	char buffer[PROC_NUMBUF];
				1195	int oom_score_adj;
				1196	int err;
				1197
				1198	memset(buffer, 0, sizeof(buffer));
				1199	if (count > sizeof(buffer) - 1)
				1200	count = sizeof(buffer) - 1;
				1201	if (copy_from_user(buffer, buf, count)) {
				1202	err = -EFAULT;
				1203	goto out;
				1204	}
				1205
				1206	err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
				1207	if (err)
				1208	goto out;
				1209	if (oom_score_adj < OOM_SCORE_ADJ_MIN \|\|
				1210	oom_score_adj > OOM_SCORE_ADJ_MAX) {
				1211	err = -EINVAL;
				1212	goto out;
				1213	}
				1214
				1215	err = __set_oom_adj(file, oom_score_adj, false);
				1216	out:
				1217	return err < 0 ? err : count;
				1218	}
				1219
				1220	static const struct file_operations proc_oom_score_adj_operations = {
				1221	.read = oom_score_adj_read,
				1222	.write = oom_score_adj_write,
				1223	.llseek = default_llseek,
				1224	};
				1225
				1226	#ifdef CONFIG_AUDITSYSCALL
				1227	#define TMPBUFLEN 11
				1228	static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
				1229	size_t count, loff_t *ppos)
				1230	{
				1231	struct inode * inode = file_inode(file);
				1232	struct task_struct *task = get_proc_task(inode);
				1233	ssize_t length;
				1234	char tmpbuf[TMPBUFLEN];
				1235
				1236	if (!task)
				1237	return -ESRCH;
				1238	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1239	from_kuid(file->f_cred->user_ns,
				1240	audit_get_loginuid(task)));
				1241	put_task_struct(task);
				1242	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1243	}
				1244
				1245	static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
				1246	size_t count, loff_t *ppos)
				1247	{
				1248	struct inode * inode = file_inode(file);
				1249	uid_t loginuid;
				1250	kuid_t kloginuid;
				1251	int rv;
				1252
				1253	rcu_read_lock();
				1254	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
				1255	rcu_read_unlock();
				1256	return -EPERM;
				1257	}
				1258	rcu_read_unlock();
				1259
				1260	if (*ppos != 0) {
				1261	/* No partial writes. */
				1262	return -EINVAL;
				1263	}
				1264
				1265	rv = kstrtou32_from_user(buf, count, 10, &loginuid);
				1266	if (rv < 0)
				1267	return rv;
				1268
				1269	/* is userspace tring to explicitly UNSET the loginuid? */
				1270	if (loginuid == AUDIT_UID_UNSET) {
				1271	kloginuid = INVALID_UID;
				1272	} else {
				1273	kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
				1274	if (!uid_valid(kloginuid))
				1275	return -EINVAL;
				1276	}
				1277
				1278	rv = audit_set_loginuid(kloginuid);
				1279	if (rv < 0)
				1280	return rv;
				1281	return count;
				1282	}
				1283
				1284	static const struct file_operations proc_loginuid_operations = {
				1285	.read = proc_loginuid_read,
				1286	.write = proc_loginuid_write,
				1287	.llseek = generic_file_llseek,
				1288	};
				1289
				1290	static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
				1291	size_t count, loff_t *ppos)
				1292	{
				1293	struct inode * inode = file_inode(file);
				1294	struct task_struct *task = get_proc_task(inode);
				1295	ssize_t length;
				1296	char tmpbuf[TMPBUFLEN];
				1297
				1298	if (!task)
				1299	return -ESRCH;
				1300	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1301	audit_get_sessionid(task));
				1302	put_task_struct(task);
				1303	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1304	}
				1305
				1306	static const struct file_operations proc_sessionid_operations = {
				1307	.read = proc_sessionid_read,
				1308	.llseek = generic_file_llseek,
				1309	};
				1310	#endif
				1311
				1312	#ifdef CONFIG_FAULT_INJECTION
				1313	static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
				1314	size_t count, loff_t *ppos)
				1315	{
				1316	struct task_struct *task = get_proc_task(file_inode(file));
				1317	char buffer[PROC_NUMBUF];
				1318	size_t len;
				1319	int make_it_fail;
				1320
				1321	if (!task)
				1322	return -ESRCH;
				1323	make_it_fail = task->make_it_fail;
				1324	put_task_struct(task);
				1325
				1326	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
				1327
				1328	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1329	}
				1330
				1331	static ssize_t proc_fault_inject_write(struct file * file,
				1332	const char __user * buf, size_t count, loff_t *ppos)
				1333	{
				1334	struct task_struct *task;
				1335	char buffer[PROC_NUMBUF];
				1336	int make_it_fail;
				1337	int rv;
				1338
				1339	if (!capable(CAP_SYS_RESOURCE))
				1340	return -EPERM;
				1341	memset(buffer, 0, sizeof(buffer));
				1342	if (count > sizeof(buffer) - 1)
				1343	count = sizeof(buffer) - 1;
				1344	if (copy_from_user(buffer, buf, count))
				1345	return -EFAULT;
				1346	rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
				1347	if (rv < 0)
				1348	return rv;
				1349	if (make_it_fail < 0 \|\| make_it_fail > 1)
				1350	return -EINVAL;
				1351
				1352	task = get_proc_task(file_inode(file));
				1353	if (!task)
				1354	return -ESRCH;
				1355	task->make_it_fail = make_it_fail;
				1356	put_task_struct(task);
				1357
				1358	return count;
				1359	}
				1360
				1361	static const struct file_operations proc_fault_inject_operations = {
				1362	.read = proc_fault_inject_read,
				1363	.write = proc_fault_inject_write,
				1364	.llseek = generic_file_llseek,
				1365	};
				1366
				1367	static ssize_t proc_fail_nth_write(struct file file, const char __user buf,
				1368	size_t count, loff_t *ppos)
				1369	{
				1370	struct task_struct *task;
				1371	int err;
				1372	unsigned int n;
				1373
				1374	err = kstrtouint_from_user(buf, count, 0, &n);
				1375	if (err)
				1376	return err;
				1377
				1378	task = get_proc_task(file_inode(file));
				1379	if (!task)
				1380	return -ESRCH;
				1381	task->fail_nth = n;
				1382	put_task_struct(task);
				1383
				1384	return count;
				1385	}
				1386
				1387	static ssize_t proc_fail_nth_read(struct file file, char __user buf,
				1388	size_t count, loff_t *ppos)
				1389	{
				1390	struct task_struct *task;
				1391	char numbuf[PROC_NUMBUF];
				1392	ssize_t len;
				1393
				1394	task = get_proc_task(file_inode(file));
				1395	if (!task)
				1396	return -ESRCH;
				1397	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
				1398	put_task_struct(task);
				1399	return simple_read_from_buffer(buf, count, ppos, numbuf, len);
				1400	}
				1401
				1402	static const struct file_operations proc_fail_nth_operations = {
				1403	.read = proc_fail_nth_read,
				1404	.write = proc_fail_nth_write,
				1405	};
				1406	#endif
				1407
				1408
				1409	#ifdef CONFIG_SCHED_DEBUG
				1410	/*
				1411	* Print out various scheduling related per-task fields:
				1412	*/
				1413	static int sched_show(struct seq_file m, void v)
				1414	{
				1415	struct inode *inode = m->private;
				1416	struct pid_namespace *ns = proc_pid_ns(inode);
				1417	struct task_struct *p;
				1418
				1419	p = get_proc_task(inode);
				1420	if (!p)
				1421	return -ESRCH;
				1422	proc_sched_show_task(p, ns, m);
				1423
				1424	put_task_struct(p);
				1425
				1426	return 0;
				1427	}
				1428
				1429	static ssize_t
				1430	sched_write(struct file file, const char __user buf,
				1431	size_t count, loff_t *offset)
				1432	{
				1433	struct inode *inode = file_inode(file);
				1434	struct task_struct *p;
				1435
				1436	p = get_proc_task(inode);
				1437	if (!p)
				1438	return -ESRCH;
				1439	proc_sched_set_task(p);
				1440
				1441	put_task_struct(p);
				1442
				1443	return count;
				1444	}
				1445
				1446	static int sched_open(struct inode inode, struct file filp)
				1447	{
				1448	return single_open(filp, sched_show, inode);
				1449	}
				1450
				1451	static const struct file_operations proc_pid_sched_operations = {
				1452	.open = sched_open,
				1453	.read = seq_read,
				1454	.write = sched_write,
				1455	.llseek = seq_lseek,
				1456	.release = single_release,
				1457	};
				1458
				1459	#endif
				1460
				1461	#ifdef CONFIG_SCHED_AUTOGROUP
				1462	/*
				1463	* Print out autogroup related information:
				1464	*/
				1465	static int sched_autogroup_show(struct seq_file m, void v)
				1466	{
				1467	struct inode *inode = m->private;
				1468	struct task_struct *p;
				1469
				1470	p = get_proc_task(inode);
				1471	if (!p)
				1472	return -ESRCH;
				1473	proc_sched_autogroup_show_task(p, m);
				1474
				1475	put_task_struct(p);
				1476
				1477	return 0;
				1478	}
				1479
				1480	static ssize_t
				1481	sched_autogroup_write(struct file file, const char __user buf,
				1482	size_t count, loff_t *offset)
				1483	{
				1484	struct inode *inode = file_inode(file);
				1485	struct task_struct *p;
				1486	char buffer[PROC_NUMBUF];
				1487	int nice;
				1488	int err;
				1489
				1490	memset(buffer, 0, sizeof(buffer));
				1491	if (count > sizeof(buffer) - 1)
				1492	count = sizeof(buffer) - 1;
				1493	if (copy_from_user(buffer, buf, count))
				1494	return -EFAULT;
				1495
				1496	err = kstrtoint(strstrip(buffer), 0, &nice);
				1497	if (err < 0)
				1498	return err;
				1499
				1500	p = get_proc_task(inode);
				1501	if (!p)
				1502	return -ESRCH;
				1503
				1504	err = proc_sched_autogroup_set_nice(p, nice);
				1505	if (err)
				1506	count = err;
				1507
				1508	put_task_struct(p);
				1509
				1510	return count;
				1511	}
				1512
				1513	static int sched_autogroup_open(struct inode inode, struct file filp)
				1514	{
				1515	int ret;
				1516
				1517	ret = single_open(filp, sched_autogroup_show, NULL);
				1518	if (!ret) {
				1519	struct seq_file *m = filp->private_data;
				1520
				1521	m->private = inode;
				1522	}
				1523	return ret;
				1524	}
				1525
				1526	static const struct file_operations proc_pid_sched_autogroup_operations = {
				1527	.open = sched_autogroup_open,
				1528	.read = seq_read,
				1529	.write = sched_autogroup_write,
				1530	.llseek = seq_lseek,
				1531	.release = single_release,
				1532	};
				1533
				1534	#endif /* CONFIG_SCHED_AUTOGROUP */
				1535
				1536	static ssize_t comm_write(struct file file, const char __user buf,
				1537	size_t count, loff_t *offset)
				1538	{
				1539	struct inode *inode = file_inode(file);
				1540	struct task_struct *p;
				1541	char buffer[TASK_COMM_LEN];
				1542	const size_t maxlen = sizeof(buffer) - 1;
				1543
				1544	memset(buffer, 0, sizeof(buffer));
				1545	if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
				1546	return -EFAULT;
				1547
				1548	p = get_proc_task(inode);
				1549	if (!p)
				1550	return -ESRCH;
				1551
				1552	if (same_thread_group(current, p))
				1553	set_task_comm(p, buffer);
				1554	else
				1555	count = -EINVAL;
				1556
				1557	put_task_struct(p);
				1558
				1559	return count;
				1560	}
				1561
				1562	static int comm_show(struct seq_file m, void v)
				1563	{
				1564	struct inode *inode = m->private;
				1565	struct task_struct *p;
				1566
				1567	p = get_proc_task(inode);
				1568	if (!p)
				1569	return -ESRCH;
				1570
				1571	proc_task_name(m, p, false);
				1572	seq_putc(m, '\n');
				1573
				1574	put_task_struct(p);
				1575
				1576	return 0;
				1577	}
				1578
				1579	static int comm_open(struct inode inode, struct file filp)
				1580	{
				1581	return single_open(filp, comm_show, inode);
				1582	}
				1583
				1584	static const struct file_operations proc_pid_set_comm_operations = {
				1585	.open = comm_open,
				1586	.read = seq_read,
				1587	.write = comm_write,
				1588	.llseek = seq_lseek,
				1589	.release = single_release,
				1590	};
				1591
				1592	static int proc_exe_link(struct dentry dentry, struct path exe_path)
				1593	{
				1594	struct task_struct *task;
				1595	struct file *exe_file;
				1596
				1597	task = get_proc_task(d_inode(dentry));
				1598	if (!task)
				1599	return -ENOENT;
				1600	exe_file = get_task_exe_file(task);
				1601	put_task_struct(task);
				1602	if (exe_file) {
				1603	*exe_path = exe_file->f_path;
				1604	path_get(&exe_file->f_path);
				1605	fput(exe_file);
				1606	return 0;
				1607	} else
				1608	return -ENOENT;
				1609	}
				1610
				1611	static const char proc_pid_get_link(struct dentry dentry,
				1612	struct inode *inode,
				1613	struct delayed_call *done)
				1614	{
				1615	struct path path;
				1616	int error = -EACCES;
				1617
				1618	if (!dentry)
				1619	return ERR_PTR(-ECHILD);
				1620
				1621	/* Are we allowed to snoop on the tasks file descriptors? */
				1622	if (!proc_fd_access_allowed(inode))
				1623	goto out;
				1624
				1625	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1626	if (error)
				1627	goto out;
				1628
				1629	nd_jump_link(&path);
				1630	return NULL;
				1631	out:
				1632	return ERR_PTR(error);
				1633	}
				1634
				1635	static int do_proc_readlink(struct path path, char __user buffer, int buflen)
				1636	{
				1637	char tmp = (char )__get_free_page(GFP_KERNEL);
				1638	char *pathname;
				1639	int len;
				1640
				1641	if (!tmp)
				1642	return -ENOMEM;
				1643
				1644	pathname = d_path(path, tmp, PAGE_SIZE);
				1645	len = PTR_ERR(pathname);
				1646	if (IS_ERR(pathname))
				1647	goto out;
				1648	len = tmp + PAGE_SIZE - 1 - pathname;
				1649
				1650	if (len > buflen)
				1651	len = buflen;
				1652	if (copy_to_user(buffer, pathname, len))
				1653	len = -EFAULT;
				1654	out:
				1655	free_page((unsigned long)tmp);
				1656	return len;
				1657	}
				1658
				1659	static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
				1660	{
				1661	int error = -EACCES;
				1662	struct inode *inode = d_inode(dentry);
				1663	struct path path;
				1664
				1665	/* Are we allowed to snoop on the tasks file descriptors? */
				1666	if (!proc_fd_access_allowed(inode))
				1667	goto out;
				1668
				1669	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1670	if (error)
				1671	goto out;
				1672
				1673	error = do_proc_readlink(&path, buffer, buflen);
				1674	path_put(&path);
				1675	out:
				1676	return error;
				1677	}
				1678
				1679	const struct inode_operations proc_pid_link_inode_operations = {
				1680	.readlink = proc_pid_readlink,
				1681	.get_link = proc_pid_get_link,
				1682	.setattr = proc_setattr,
				1683	};
				1684
				1685
				1686	/* building an inode */
				1687
				1688	void task_dump_owner(struct task_struct *task, umode_t mode,
				1689	kuid_t ruid, kgid_t rgid)
				1690	{
				1691	/* Depending on the state of dumpable compute who should own a
				1692	* proc file for a task.
				1693	*/
				1694	const struct cred *cred;
				1695	kuid_t uid;
				1696	kgid_t gid;
				1697
				1698	if (unlikely(task->flags & PF_KTHREAD)) {
				1699	*ruid = GLOBAL_ROOT_UID;
				1700	*rgid = GLOBAL_ROOT_GID;
				1701	return;
				1702	}
				1703
				1704	/* Default to the tasks effective ownership */
				1705	rcu_read_lock();
				1706	cred = __task_cred(task);
				1707	uid = cred->euid;
				1708	gid = cred->egid;
				1709	rcu_read_unlock();
				1710
				1711	/*
				1712	* Before the /proc/pid/status file was created the only way to read
				1713	* the effective uid of a /process was to stat /proc/pid. Reading
				1714	* /proc/pid/status is slow enough that procps and other packages
				1715	* kept stating /proc/pid. To keep the rules in /proc simple I have
				1716	* made this apply to all per process world readable and executable
				1717	* directories.
				1718	*/
				1719	if (mode != (S_IFDIR\|S_IRUGO\|S_IXUGO)) {
				1720	struct mm_struct *mm;
				1721	task_lock(task);
				1722	mm = task->mm;
				1723	/* Make non-dumpable tasks owned by some root */
				1724	if (mm) {
				1725	if (get_dumpable(mm) != SUID_DUMP_USER) {
				1726	struct user_namespace *user_ns = mm->user_ns;
				1727
				1728	uid = make_kuid(user_ns, 0);
				1729	if (!uid_valid(uid))
				1730	uid = GLOBAL_ROOT_UID;
				1731
				1732	gid = make_kgid(user_ns, 0);
				1733	if (!gid_valid(gid))
				1734	gid = GLOBAL_ROOT_GID;
				1735	}
				1736	} else {
				1737	uid = GLOBAL_ROOT_UID;
				1738	gid = GLOBAL_ROOT_GID;
				1739	}
				1740	task_unlock(task);
				1741	}
				1742	*ruid = uid;
				1743	*rgid = gid;
				1744	}
				1745
				1746	struct inode proc_pid_make_inode(struct super_block sb,
				1747	struct task_struct *task, umode_t mode)
				1748	{
				1749	struct inode * inode;
				1750	struct proc_inode *ei;
				1751
				1752	/* We need a new inode */
				1753
				1754	inode = new_inode(sb);
				1755	if (!inode)
				1756	goto out;
				1757
				1758	/* Common stuff */
				1759	ei = PROC_I(inode);
				1760	inode->i_mode = mode;
				1761	inode->i_ino = get_next_ino();
				1762	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
				1763	inode->i_op = &proc_def_inode_operations;
				1764
				1765	/*
				1766	* grab the reference to task.
				1767	*/
				1768	ei->pid = get_task_pid(task, PIDTYPE_PID);
				1769	if (!ei->pid)
				1770	goto out_unlock;
				1771
				1772	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
				1773	security_task_to_inode(task, inode);
				1774
				1775	out:
				1776	return inode;
				1777
				1778	out_unlock:
				1779	iput(inode);
				1780	return NULL;
				1781	}
				1782
				1783	int pid_getattr(const struct path path, struct kstat stat,
				1784	u32 request_mask, unsigned int query_flags)
				1785	{
				1786	struct inode *inode = d_inode(path->dentry);
				1787	struct pid_namespace *pid = proc_pid_ns(inode);
				1788	struct task_struct *task;
				1789
				1790	generic_fillattr(inode, stat);
				1791
				1792	stat->uid = GLOBAL_ROOT_UID;
				1793	stat->gid = GLOBAL_ROOT_GID;
				1794	rcu_read_lock();
				1795	task = pid_task(proc_pid(inode), PIDTYPE_PID);
				1796	if (task) {
				1797	if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
				1798	rcu_read_unlock();
				1799	/*
				1800	* This doesn't prevent learning whether PID exists,
				1801	* it only makes getattr() consistent with readdir().
				1802	*/
				1803	return -ENOENT;
				1804	}
				1805	task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
				1806	}
				1807	rcu_read_unlock();
				1808	return 0;
				1809	}
				1810
				1811	/* dentry stuff */
				1812
				1813	/*
				1814	* Set <pid>/... inode ownership (can change due to setuid(), etc.)
				1815	*/
				1816	void pid_update_inode(struct task_struct task, struct inode inode)
				1817	{
				1818	task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
				1819
				1820	inode->i_mode &= ~(S_ISUID \| S_ISGID);
				1821	security_task_to_inode(task, inode);
				1822	}
				1823
				1824	/*
				1825	* Rewrite the inode's ownerships here because the owning task may have
				1826	* performed a setuid(), etc.
				1827	*
				1828	*/
				1829	static int pid_revalidate(struct dentry *dentry, unsigned int flags)
				1830	{
				1831	struct inode *inode;
				1832	struct task_struct *task;
				1833
				1834	if (flags & LOOKUP_RCU)
				1835	return -ECHILD;
				1836
				1837	inode = d_inode(dentry);
				1838	task = get_proc_task(inode);
				1839
				1840	if (task) {
				1841	pid_update_inode(task, inode);
				1842	put_task_struct(task);
				1843	return 1;
				1844	}
				1845	return 0;
				1846	}
				1847
				1848	static inline bool proc_inode_is_dead(struct inode *inode)
				1849	{
				1850	return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
				1851	}
				1852
				1853	int pid_delete_dentry(const struct dentry *dentry)
				1854	{
				1855	/* Is the task we represent dead?
				1856	* If so, then don't put the dentry on the lru list,
				1857	* kill it immediately.
				1858	*/
				1859	return proc_inode_is_dead(d_inode(dentry));
				1860	}
				1861
				1862	const struct dentry_operations pid_dentry_operations =
				1863	{
				1864	.d_revalidate = pid_revalidate,
				1865	.d_delete = pid_delete_dentry,
				1866	};
				1867
				1868	/* Lookups */
				1869
				1870	/*
				1871	* Fill a directory entry.
				1872	*
				1873	* If possible create the dcache entry and derive our inode number and
				1874	* file type from dcache entry.
				1875	*
				1876	* Since all of the proc inode numbers are dynamically generated, the inode
				1877	* numbers do not exist until the inode is cache. This means creating the
				1878	* the dcache entry in readdir is necessary to keep the inode numbers
				1879	* reported by readdir in sync with the inode numbers reported
				1880	* by stat.
				1881	*/
				1882	bool proc_fill_cache(struct file file, struct dir_context ctx,
				1883	const char *name, unsigned int len,
				1884	instantiate_t instantiate, struct task_struct task, const void ptr)
				1885	{
				1886	struct dentry child, dir = file->f_path.dentry;
				1887	struct qstr qname = QSTR_INIT(name, len);
				1888	struct inode *inode;
				1889	unsigned type = DT_UNKNOWN;
				1890	ino_t ino = 1;
				1891
				1892	child = d_hash_and_lookup(dir, &qname);
				1893	if (!child) {
				1894	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
				1895	child = d_alloc_parallel(dir, &qname, &wq);
				1896	if (IS_ERR(child))
				1897	goto end_instantiate;
				1898	if (d_in_lookup(child)) {
				1899	struct dentry *res;
				1900	res = instantiate(child, task, ptr);
				1901	d_lookup_done(child);
				1902	if (unlikely(res)) {
				1903	dput(child);
				1904	child = res;
				1905	if (IS_ERR(child))
				1906	goto end_instantiate;
				1907	}
				1908	}
				1909	}
				1910	inode = d_inode(child);
				1911	ino = inode->i_ino;
				1912	type = inode->i_mode >> 12;
				1913	dput(child);
				1914	end_instantiate:
				1915	return dir_emit(ctx, name, len, ino, type);
				1916	}
				1917
				1918	/*
				1919	* dname_to_vma_addr - maps a dentry name into two unsigned longs
				1920	* which represent vma start and end addresses.
				1921	*/
				1922	static int dname_to_vma_addr(struct dentry *dentry,
				1923	unsigned long start, unsigned long end)
				1924	{
				1925	const char *str = dentry->d_name.name;
				1926	unsigned long long sval, eval;
				1927	unsigned int len;
				1928
				1929	if (str[0] == '0' && str[1] != '-')
				1930	return -EINVAL;
				1931	len = _parse_integer(str, 16, &sval);
				1932	if (len & KSTRTOX_OVERFLOW)
				1933	return -EINVAL;
				1934	if (sval != (unsigned long)sval)
				1935	return -EINVAL;
				1936	str += len;
				1937
				1938	if (*str != '-')
				1939	return -EINVAL;
				1940	str++;
				1941
				1942	if (str[0] == '0' && str[1])
				1943	return -EINVAL;
				1944	len = _parse_integer(str, 16, &eval);
				1945	if (len & KSTRTOX_OVERFLOW)
				1946	return -EINVAL;
				1947	if (eval != (unsigned long)eval)
				1948	return -EINVAL;
				1949	str += len;
				1950
				1951	if (*str != '\0')
				1952	return -EINVAL;
				1953
				1954	*start = sval;
				1955	*end = eval;
				1956
				1957	return 0;
				1958	}
				1959
				1960	static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
				1961	{
				1962	unsigned long vm_start, vm_end;
				1963	bool exact_vma_exists = false;
				1964	struct mm_struct *mm = NULL;
				1965	struct task_struct *task;
				1966	struct inode *inode;
				1967	int status = 0;
				1968
				1969	if (flags & LOOKUP_RCU)
				1970	return -ECHILD;
				1971
				1972	inode = d_inode(dentry);
				1973	task = get_proc_task(inode);
				1974	if (!task)
				1975	goto out_notask;
				1976
				1977	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
				1978	if (IS_ERR_OR_NULL(mm))
				1979	goto out;
				1980
				1981	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
				1982	status = down_read_killable(&mm->mmap_sem);
				1983	if (!status) {
				1984	exact_vma_exists = !!find_exact_vma(mm, vm_start,
				1985	vm_end);
				1986	up_read(&mm->mmap_sem);
				1987	}
				1988	}
				1989
				1990	mmput(mm);
				1991
				1992	if (exact_vma_exists) {
				1993	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
				1994
				1995	security_task_to_inode(task, inode);
				1996	status = 1;
				1997	}
				1998
				1999	out:
				2000	put_task_struct(task);
				2001
				2002	out_notask:
				2003	return status;
				2004	}
				2005
				2006	static const struct dentry_operations tid_map_files_dentry_operations = {
				2007	.d_revalidate = map_files_d_revalidate,
				2008	.d_delete = pid_delete_dentry,
				2009	};
				2010
				2011	static int map_files_get_link(struct dentry dentry, struct path path)
				2012	{
				2013	unsigned long vm_start, vm_end;
				2014	struct vm_area_struct *vma;
				2015	struct task_struct *task;
				2016	struct mm_struct *mm;
				2017	int rc;
				2018
				2019	rc = -ENOENT;
				2020	task = get_proc_task(d_inode(dentry));
				2021	if (!task)
				2022	goto out;
				2023
				2024	mm = get_task_mm(task);
				2025	put_task_struct(task);
				2026	if (!mm)
				2027	goto out;
				2028
				2029	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
				2030	if (rc)
				2031	goto out_mmput;
				2032
				2033	rc = down_read_killable(&mm->mmap_sem);
				2034	if (rc)
				2035	goto out_mmput;
				2036
				2037	rc = -ENOENT;
				2038	vma = find_exact_vma(mm, vm_start, vm_end);
				2039	if (vma && vma->vm_file) {
				2040	*path = vma->vm_file->f_path;
				2041	path_get(path);
				2042	rc = 0;
				2043	}
				2044	up_read(&mm->mmap_sem);
				2045
				2046	out_mmput:
				2047	mmput(mm);
				2048	out:
				2049	return rc;
				2050	}
				2051
				2052	struct map_files_info {
				2053	unsigned long start;
				2054	unsigned long end;
				2055	fmode_t mode;
				2056	};
				2057
				2058	/*
				2059	* Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
				2060	* symlinks may be used to bypass permissions on ancestor directories in the
				2061	* path to the file in question.
				2062	*/
				2063	static const char *
				2064	proc_map_files_get_link(struct dentry *dentry,
				2065	struct inode *inode,
				2066	struct delayed_call *done)
				2067	{
				2068	if (!capable(CAP_SYS_ADMIN))
				2069	return ERR_PTR(-EPERM);
				2070
				2071	return proc_pid_get_link(dentry, inode, done);
				2072	}
				2073
				2074	/*
				2075	* Identical to proc_pid_link_inode_operations except for get_link()
				2076	*/
				2077	static const struct inode_operations proc_map_files_link_inode_operations = {
				2078	.readlink = proc_pid_readlink,
				2079	.get_link = proc_map_files_get_link,
				2080	.setattr = proc_setattr,
				2081	};
				2082
				2083	static struct dentry *
				2084	proc_map_files_instantiate(struct dentry *dentry,
				2085	struct task_struct task, const void ptr)
				2086	{
				2087	fmode_t mode = (fmode_t)(unsigned long)ptr;
				2088	struct proc_inode *ei;
				2089	struct inode *inode;
				2090
				2091	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK \|
				2092	((mode & FMODE_READ ) ? S_IRUSR : 0) \|
				2093	((mode & FMODE_WRITE) ? S_IWUSR : 0));
				2094	if (!inode)
				2095	return ERR_PTR(-ENOENT);
				2096
				2097	ei = PROC_I(inode);
				2098	ei->op.proc_get_link = map_files_get_link;
				2099
				2100	inode->i_op = &proc_map_files_link_inode_operations;
				2101	inode->i_size = 64;
				2102
				2103	d_set_d_op(dentry, &tid_map_files_dentry_operations);
				2104	return d_splice_alias(inode, dentry);
				2105	}
				2106
				2107	static struct dentry proc_map_files_lookup(struct inode dir,
				2108	struct dentry *dentry, unsigned int flags)
				2109	{
				2110	unsigned long vm_start, vm_end;
				2111	struct vm_area_struct *vma;
				2112	struct task_struct *task;
				2113	struct dentry *result;
				2114	struct mm_struct *mm;
				2115
				2116	result = ERR_PTR(-ENOENT);
				2117	task = get_proc_task(dir);
				2118	if (!task)
				2119	goto out;
				2120
				2121	result = ERR_PTR(-EACCES);
				2122	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				2123	goto out_put_task;
				2124
				2125	result = ERR_PTR(-ENOENT);
				2126	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
				2127	goto out_put_task;
				2128
				2129	mm = get_task_mm(task);
				2130	if (!mm)
				2131	goto out_put_task;
				2132
				2133	result = ERR_PTR(-EINTR);
				2134	if (down_read_killable(&mm->mmap_sem))
				2135	goto out_put_mm;
				2136
				2137	result = ERR_PTR(-ENOENT);
				2138	vma = find_exact_vma(mm, vm_start, vm_end);
				2139	if (!vma)
				2140	goto out_no_vma;
				2141
				2142	if (vma->vm_file)
				2143	result = proc_map_files_instantiate(dentry, task,
				2144	(void *)(unsigned long)vma->vm_file->f_mode);
				2145
				2146	out_no_vma:
				2147	up_read(&mm->mmap_sem);
				2148	out_put_mm:
				2149	mmput(mm);
				2150	out_put_task:
				2151	put_task_struct(task);
				2152	out:
				2153	return result;
				2154	}
				2155
				2156	static const struct inode_operations proc_map_files_inode_operations = {
				2157	.lookup = proc_map_files_lookup,
				2158	.permission = proc_fd_permission,
				2159	.setattr = proc_setattr,
				2160	};
				2161
				2162	static int
				2163	proc_map_files_readdir(struct file file, struct dir_context ctx)
				2164	{
				2165	struct vm_area_struct *vma;
				2166	struct task_struct *task;
				2167	struct mm_struct *mm;
				2168	unsigned long nr_files, pos, i;
				2169	struct flex_array *fa = NULL;
				2170	struct map_files_info info;
				2171	struct map_files_info *p;
				2172	int ret;
				2173
				2174	ret = -ENOENT;
				2175	task = get_proc_task(file_inode(file));
				2176	if (!task)
				2177	goto out;
				2178
				2179	ret = -EACCES;
				2180	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				2181	goto out_put_task;
				2182
				2183	ret = 0;
				2184	if (!dir_emit_dots(file, ctx))
				2185	goto out_put_task;
				2186
				2187	mm = get_task_mm(task);
				2188	if (!mm)
				2189	goto out_put_task;
				2190
				2191	ret = down_read_killable(&mm->mmap_sem);
				2192	if (ret) {
				2193	mmput(mm);
				2194	goto out_put_task;
				2195	}
				2196
				2197	nr_files = 0;
				2198
				2199	/*
				2200	* We need two passes here:
				2201	*
				2202	* 1) Collect vmas of mapped files with mmap_sem taken
				2203	* 2) Release mmap_sem and instantiate entries
				2204	*
				2205	* otherwise we get lockdep complained, since filldir()
				2206	* routine might require mmap_sem taken in might_fault().
				2207	*/
				2208
				2209	for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
				2210	if (vma->vm_file && ++pos > ctx->pos)
				2211	nr_files++;
				2212	}
				2213
				2214	if (nr_files) {
				2215	fa = flex_array_alloc(sizeof(info), nr_files,
				2216	GFP_KERNEL);
				2217	if (!fa \|\| flex_array_prealloc(fa, 0, nr_files,
				2218	GFP_KERNEL)) {
				2219	ret = -ENOMEM;
				2220	if (fa)
				2221	flex_array_free(fa);
				2222	up_read(&mm->mmap_sem);
				2223	mmput(mm);
				2224	goto out_put_task;
				2225	}
				2226	for (i = 0, vma = mm->mmap, pos = 2; vma;
				2227	vma = vma->vm_next) {
				2228	if (!vma->vm_file)
				2229	continue;
				2230	if (++pos <= ctx->pos)
				2231	continue;
				2232
				2233	info.start = vma->vm_start;
				2234	info.end = vma->vm_end;
				2235	info.mode = vma->vm_file->f_mode;
				2236	if (flex_array_put(fa, i++, &info, GFP_KERNEL))
				2237	BUG();
				2238	}
				2239	}
				2240	up_read(&mm->mmap_sem);
				2241	mmput(mm);
				2242
				2243	for (i = 0; i < nr_files; i++) {
				2244	char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
				2245	unsigned int len;
				2246
				2247	p = flex_array_get(fa, i);
				2248	len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
				2249	if (!proc_fill_cache(file, ctx,
				2250	buf, len,
				2251	proc_map_files_instantiate,
				2252	task,
				2253	(void *)(unsigned long)p->mode))
				2254	break;
				2255	ctx->pos++;
				2256	}
				2257	if (fa)
				2258	flex_array_free(fa);
				2259
				2260	out_put_task:
				2261	put_task_struct(task);
				2262	out:
				2263	return ret;
				2264	}
				2265
				2266	static const struct file_operations proc_map_files_operations = {
				2267	.read = generic_read_dir,
				2268	.iterate_shared = proc_map_files_readdir,
				2269	.llseek = generic_file_llseek,
				2270	};
				2271
				2272	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
				2273	struct timers_private {
				2274	struct pid *pid;
				2275	struct task_struct *task;
				2276	struct sighand_struct *sighand;
				2277	struct pid_namespace *ns;
				2278	unsigned long flags;
				2279	};
				2280
				2281	static void timers_start(struct seq_file m, loff_t *pos)
				2282	{
				2283	struct timers_private *tp = m->private;
				2284
				2285	tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
				2286	if (!tp->task)
				2287	return ERR_PTR(-ESRCH);
				2288
				2289	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
				2290	if (!tp->sighand)
				2291	return ERR_PTR(-ESRCH);
				2292
				2293	return seq_list_start(&tp->task->signal->posix_timers, *pos);
				2294	}
				2295
				2296	static void timers_next(struct seq_file m, void v, loff_t pos)
				2297	{
				2298	struct timers_private *tp = m->private;
				2299	return seq_list_next(v, &tp->task->signal->posix_timers, pos);
				2300	}
				2301
				2302	static void timers_stop(struct seq_file m, void v)
				2303	{
				2304	struct timers_private *tp = m->private;
				2305
				2306	if (tp->sighand) {
				2307	unlock_task_sighand(tp->task, &tp->flags);
				2308	tp->sighand = NULL;
				2309	}
				2310
				2311	if (tp->task) {
				2312	put_task_struct(tp->task);
				2313	tp->task = NULL;
				2314	}
				2315	}
				2316
				2317	static int show_timer(struct seq_file m, void v)
				2318	{
				2319	struct k_itimer *timer;
				2320	struct timers_private *tp = m->private;
				2321	int notify;
				2322	static const char * const nstr[] = {
				2323	[SIGEV_SIGNAL] = "signal",
				2324	[SIGEV_NONE] = "none",
				2325	[SIGEV_THREAD] = "thread",
				2326	};
				2327
				2328	timer = list_entry((struct list_head *)v, struct k_itimer, list);
				2329	notify = timer->it_sigev_notify;
				2330
				2331	seq_printf(m, "ID: %d\n", timer->it_id);
				2332	seq_printf(m, "signal: %d/%px\n",
				2333	timer->sigq->info.si_signo,
				2334	timer->sigq->info.si_value.sival_ptr);
				2335	seq_printf(m, "notify: %s/%s.%d\n",
				2336	nstr[notify & ~SIGEV_THREAD_ID],
				2337	(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
				2338	pid_nr_ns(timer->it_pid, tp->ns));
				2339	seq_printf(m, "ClockID: %d\n", timer->it_clock);
				2340
				2341	return 0;
				2342	}
				2343
				2344	static const struct seq_operations proc_timers_seq_ops = {
				2345	.start = timers_start,
				2346	.next = timers_next,
				2347	.stop = timers_stop,
				2348	.show = show_timer,
				2349	};
				2350
				2351	static int proc_timers_open(struct inode inode, struct file file)
				2352	{
				2353	struct timers_private *tp;
				2354
				2355	tp = __seq_open_private(file, &proc_timers_seq_ops,
				2356	sizeof(struct timers_private));
				2357	if (!tp)
				2358	return -ENOMEM;
				2359
				2360	tp->pid = proc_pid(inode);
				2361	tp->ns = proc_pid_ns(inode);
				2362	return 0;
				2363	}
				2364
				2365	static const struct file_operations proc_timers_operations = {
				2366	.open = proc_timers_open,
				2367	.read = seq_read,
				2368	.llseek = seq_lseek,
				2369	.release = seq_release_private,
				2370	};
				2371	#endif
				2372
				2373	static ssize_t timerslack_ns_write(struct file file, const char __user buf,
				2374	size_t count, loff_t *offset)
				2375	{
				2376	struct inode *inode = file_inode(file);
				2377	struct task_struct *p;
				2378	u64 slack_ns;
				2379	int err;
				2380
				2381	err = kstrtoull_from_user(buf, count, 10, &slack_ns);
				2382	if (err < 0)
				2383	return err;
				2384
				2385	p = get_proc_task(inode);
				2386	if (!p)
				2387	return -ESRCH;
				2388
				2389	if (p != current) {
				2390	if (!capable(CAP_SYS_NICE)) {
				2391	count = -EPERM;
				2392	goto out;
				2393	}
				2394
				2395	err = security_task_setscheduler(p);
				2396	if (err) {
				2397	count = err;
				2398	goto out;
				2399	}
				2400	}
				2401
				2402	task_lock(p);
				2403	if (slack_ns == 0)
				2404	p->timer_slack_ns = p->default_timer_slack_ns;
				2405	else
				2406	p->timer_slack_ns = slack_ns;
				2407	task_unlock(p);
				2408
				2409	out:
				2410	put_task_struct(p);
				2411
				2412	return count;
				2413	}
				2414
				2415	static int timerslack_ns_show(struct seq_file m, void v)
				2416	{
				2417	struct inode *inode = m->private;
				2418	struct task_struct *p;
				2419	int err = 0;
				2420
				2421	p = get_proc_task(inode);
				2422	if (!p)
				2423	return -ESRCH;
				2424
				2425	if (p != current) {
				2426
				2427	if (!capable(CAP_SYS_NICE)) {
				2428	err = -EPERM;
				2429	goto out;
				2430	}
				2431	err = security_task_getscheduler(p);
				2432	if (err)
				2433	goto out;
				2434	}
				2435
				2436	task_lock(p);
				2437	seq_printf(m, "%llu\n", p->timer_slack_ns);
				2438	task_unlock(p);
				2439
				2440	out:
				2441	put_task_struct(p);
				2442
				2443	return err;
				2444	}
				2445
				2446	static int timerslack_ns_open(struct inode inode, struct file filp)
				2447	{
				2448	return single_open(filp, timerslack_ns_show, inode);
				2449	}
				2450
				2451	static const struct file_operations proc_pid_set_timerslack_ns_operations = {
				2452	.open = timerslack_ns_open,
				2453	.read = seq_read,
				2454	.write = timerslack_ns_write,
				2455	.llseek = seq_lseek,
				2456	.release = single_release,
				2457	};
				2458
				2459	static struct dentry proc_pident_instantiate(struct dentry dentry,
				2460	struct task_struct task, const void ptr)
				2461	{
				2462	const struct pid_entry *p = ptr;
				2463	struct inode *inode;
				2464	struct proc_inode *ei;
				2465
				2466	inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
				2467	if (!inode)
				2468	return ERR_PTR(-ENOENT);
				2469
				2470	ei = PROC_I(inode);
				2471	if (S_ISDIR(inode->i_mode))
				2472	set_nlink(inode, 2); /* Use getattr to fix if necessary */
				2473	if (p->iop)
				2474	inode->i_op = p->iop;
				2475	if (p->fop)
				2476	inode->i_fop = p->fop;
				2477	ei->op = p->op;
				2478	pid_update_inode(task, inode);
				2479	d_set_d_op(dentry, &pid_dentry_operations);
				2480	return d_splice_alias(inode, dentry);
				2481	}
				2482
				2483	static struct dentry proc_pident_lookup(struct inode dir,
				2484	struct dentry *dentry,
				2485	const struct pid_entry *ents,
				2486	unsigned int nents)
				2487	{
				2488	struct task_struct *task = get_proc_task(dir);
				2489	const struct pid_entry p, last;
				2490	struct dentry *res = ERR_PTR(-ENOENT);
				2491
				2492	if (!task)
				2493	goto out_no_task;
				2494
				2495	/*
				2496	* Yes, it does not scale. And it should not. Don't add
				2497	* new entries into /proc/<tgid>/ without very good reasons.
				2498	*/
				2499	last = &ents[nents];
				2500	for (p = ents; p < last; p++) {
				2501	if (p->len != dentry->d_name.len)
				2502	continue;
				2503	if (!memcmp(dentry->d_name.name, p->name, p->len)) {
				2504	res = proc_pident_instantiate(dentry, task, p);
				2505	break;
				2506	}
				2507	}
				2508	put_task_struct(task);
				2509	out_no_task:
				2510	return res;
				2511	}
				2512
				2513	static int proc_pident_readdir(struct file file, struct dir_context ctx,
				2514	const struct pid_entry *ents, unsigned int nents)
				2515	{
				2516	struct task_struct *task = get_proc_task(file_inode(file));
				2517	const struct pid_entry *p;
				2518
				2519	if (!task)
				2520	return -ENOENT;
				2521
				2522	if (!dir_emit_dots(file, ctx))
				2523	goto out;
				2524
				2525	if (ctx->pos >= nents + 2)
				2526	goto out;
				2527
				2528	for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
				2529	if (!proc_fill_cache(file, ctx, p->name, p->len,
				2530	proc_pident_instantiate, task, p))
				2531	break;
				2532	ctx->pos++;
				2533	}
				2534	out:
				2535	put_task_struct(task);
				2536	return 0;
				2537	}
				2538
				2539	#ifdef CONFIG_SECURITY
				2540	static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
				2541	size_t count, loff_t *ppos)
				2542	{
				2543	struct inode * inode = file_inode(file);
				2544	char *p = NULL;
				2545	ssize_t length;
				2546	struct task_struct *task = get_proc_task(inode);
				2547
				2548	if (!task)
				2549	return -ESRCH;
				2550
				2551	length = security_getprocattr(task,
				2552	(char*)file->f_path.dentry->d_name.name,
				2553	&p);
				2554	put_task_struct(task);
				2555	if (length > 0)
				2556	length = simple_read_from_buffer(buf, count, ppos, p, length);
				2557	kfree(p);
				2558	return length;
				2559	}
				2560
				2561	static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
				2562	size_t count, loff_t *ppos)
				2563	{
				2564	struct inode * inode = file_inode(file);
				2565	struct task_struct *task;
				2566	void *page;
				2567	int rv;
				2568
				2569	rcu_read_lock();
				2570	task = pid_task(proc_pid(inode), PIDTYPE_PID);
				2571	if (!task) {
				2572	rcu_read_unlock();
				2573	return -ESRCH;
				2574	}
				2575	/* A task may only write its own attributes. */
				2576	if (current != task) {
				2577	rcu_read_unlock();
				2578	return -EACCES;
				2579	}
				2580	/* Prevent changes to overridden credentials. */
				2581	if (current_cred() != current_real_cred()) {
				2582	rcu_read_unlock();
				2583	return -EBUSY;
				2584	}
				2585	rcu_read_unlock();
				2586
				2587	if (count > PAGE_SIZE)
				2588	count = PAGE_SIZE;
				2589
				2590	/* No partial writes. */
				2591	if (*ppos != 0)
				2592	return -EINVAL;
				2593
				2594	page = memdup_user(buf, count);
				2595	if (IS_ERR(page)) {
				2596	rv = PTR_ERR(page);
				2597	goto out;
				2598	}
				2599
				2600	/* Guard against adverse ptrace interaction */
				2601	rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
				2602	if (rv < 0)
				2603	goto out_free;
				2604
				2605	rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
				2606	mutex_unlock(&current->signal->cred_guard_mutex);
				2607	out_free:
				2608	kfree(page);
				2609	out:
				2610	return rv;
				2611	}
				2612
				2613	static const struct file_operations proc_pid_attr_operations = {
				2614	.read = proc_pid_attr_read,
				2615	.write = proc_pid_attr_write,
				2616	.llseek = generic_file_llseek,
				2617	};
				2618
				2619	static const struct pid_entry attr_dir_stuff[] = {
				2620	REG("current", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2621	REG("prev", S_IRUGO, proc_pid_attr_operations),
				2622	REG("exec", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2623	REG("fscreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2624	REG("keycreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2625	REG("sockcreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2626	};
				2627
				2628	static int proc_attr_dir_readdir(struct file file, struct dir_context ctx)
				2629	{
				2630	return proc_pident_readdir(file, ctx,
				2631	attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
				2632	}
				2633
				2634	static const struct file_operations proc_attr_dir_operations = {
				2635	.read = generic_read_dir,
				2636	.iterate_shared = proc_attr_dir_readdir,
				2637	.llseek = generic_file_llseek,
				2638	};
				2639
				2640	static struct dentry proc_attr_dir_lookup(struct inode dir,
				2641	struct dentry *dentry, unsigned int flags)
				2642	{
				2643	return proc_pident_lookup(dir, dentry,
				2644	attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
				2645	}
				2646
				2647	static const struct inode_operations proc_attr_dir_inode_operations = {
				2648	.lookup = proc_attr_dir_lookup,
				2649	.getattr = pid_getattr,
				2650	.setattr = proc_setattr,
				2651	};
				2652
				2653	#endif
				2654
				2655	#ifdef CONFIG_ELF_CORE
				2656	static ssize_t proc_coredump_filter_read(struct file file, char __user buf,
				2657	size_t count, loff_t *ppos)
				2658	{
				2659	struct task_struct *task = get_proc_task(file_inode(file));
				2660	struct mm_struct *mm;
				2661	char buffer[PROC_NUMBUF];
				2662	size_t len;
				2663	int ret;
				2664
				2665	if (!task)
				2666	return -ESRCH;
				2667
				2668	ret = 0;
				2669	mm = get_task_mm(task);
				2670	if (mm) {
				2671	len = snprintf(buffer, sizeof(buffer), "%08lx\n",
				2672	((mm->flags & MMF_DUMP_FILTER_MASK) >>
				2673	MMF_DUMP_FILTER_SHIFT));
				2674	mmput(mm);
				2675	ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
				2676	}
				2677
				2678	put_task_struct(task);
				2679
				2680	return ret;
				2681	}
				2682
				2683	static ssize_t proc_coredump_filter_write(struct file *file,
				2684	const char __user *buf,
				2685	size_t count,
				2686	loff_t *ppos)
				2687	{
				2688	struct task_struct *task;
				2689	struct mm_struct *mm;
				2690	unsigned int val;
				2691	int ret;
				2692	int i;
				2693	unsigned long mask;
				2694
				2695	ret = kstrtouint_from_user(buf, count, 0, &val);
				2696	if (ret < 0)
				2697	return ret;
				2698
				2699	ret = -ESRCH;
				2700	task = get_proc_task(file_inode(file));
				2701	if (!task)
				2702	goto out_no_task;
				2703
				2704	mm = get_task_mm(task);
				2705	if (!mm)
				2706	goto out_no_mm;
				2707	ret = 0;
				2708
				2709	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
				2710	if (val & mask)
				2711	set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2712	else
				2713	clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2714	}
				2715
				2716	mmput(mm);
				2717	out_no_mm:
				2718	put_task_struct(task);
				2719	out_no_task:
				2720	if (ret < 0)
				2721	return ret;
				2722	return count;
				2723	}
				2724
				2725	static const struct file_operations proc_coredump_filter_operations = {
				2726	.read = proc_coredump_filter_read,
				2727	.write = proc_coredump_filter_write,
				2728	.llseek = generic_file_llseek,
				2729	};
				2730	#endif
				2731
				2732	#ifdef CONFIG_TASK_IO_ACCOUNTING
				2733	static int do_io_accounting(struct task_struct task, struct seq_file m, int whole)
				2734	{
				2735	struct task_io_accounting acct = task->ioac;
				2736	unsigned long flags;
				2737	int result;
				2738
				2739	result = mutex_lock_killable(&task->signal->cred_guard_mutex);
				2740	if (result)
				2741	return result;
				2742
				2743	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
				2744	result = -EACCES;
				2745	goto out_unlock;
				2746	}
				2747
				2748	if (whole && lock_task_sighand(task, &flags)) {
				2749	struct task_struct *t = task;
				2750
				2751	task_io_accounting_add(&acct, &task->signal->ioac);
				2752	while_each_thread(task, t)
				2753	task_io_accounting_add(&acct, &t->ioac);
				2754
				2755	unlock_task_sighand(task, &flags);
				2756	}
				2757	seq_printf(m,
				2758	"rchar: %llu\n"
				2759	"wchar: %llu\n"
				2760	"syscr: %llu\n"
				2761	"syscw: %llu\n"
				2762	"read_bytes: %llu\n"
				2763	"write_bytes: %llu\n"
				2764	"cancelled_write_bytes: %llu\n",
				2765	(unsigned long long)acct.rchar,
				2766	(unsigned long long)acct.wchar,
				2767	(unsigned long long)acct.syscr,
				2768	(unsigned long long)acct.syscw,
				2769	(unsigned long long)acct.read_bytes,
				2770	(unsigned long long)acct.write_bytes,
				2771	(unsigned long long)acct.cancelled_write_bytes);
				2772	result = 0;
				2773
				2774	out_unlock:
				2775	mutex_unlock(&task->signal->cred_guard_mutex);
				2776	return result;
				2777	}
				2778
				2779	static int proc_tid_io_accounting(struct seq_file m, struct pid_namespace ns,
				2780	struct pid pid, struct task_struct task)
				2781	{
				2782	return do_io_accounting(task, m, 0);
				2783	}
				2784
				2785	static int proc_tgid_io_accounting(struct seq_file m, struct pid_namespace ns,
				2786	struct pid pid, struct task_struct task)
				2787	{
				2788	return do_io_accounting(task, m, 1);
				2789	}
				2790	#endif /* CONFIG_TASK_IO_ACCOUNTING */
				2791
				2792	#ifdef CONFIG_USER_NS
				2793	static int proc_id_map_open(struct inode inode, struct file file,
				2794	const struct seq_operations *seq_ops)
				2795	{
				2796	struct user_namespace *ns = NULL;
				2797	struct task_struct *task;
				2798	struct seq_file *seq;
				2799	int ret = -EINVAL;
				2800
				2801	task = get_proc_task(inode);
				2802	if (task) {
				2803	rcu_read_lock();
				2804	ns = get_user_ns(task_cred_xxx(task, user_ns));
				2805	rcu_read_unlock();
				2806	put_task_struct(task);
				2807	}
				2808	if (!ns)
				2809	goto err;
				2810
				2811	ret = seq_open(file, seq_ops);
				2812	if (ret)
				2813	goto err_put_ns;
				2814
				2815	seq = file->private_data;
				2816	seq->private = ns;
				2817
				2818	return 0;
				2819	err_put_ns:
				2820	put_user_ns(ns);
				2821	err:
				2822	return ret;
				2823	}
				2824
				2825	static int proc_id_map_release(struct inode inode, struct file file)
				2826	{
				2827	struct seq_file *seq = file->private_data;
				2828	struct user_namespace *ns = seq->private;
				2829	put_user_ns(ns);
				2830	return seq_release(inode, file);
				2831	}
				2832
				2833	static int proc_uid_map_open(struct inode inode, struct file file)
				2834	{
				2835	return proc_id_map_open(inode, file, &proc_uid_seq_operations);
				2836	}
				2837
				2838	static int proc_gid_map_open(struct inode inode, struct file file)
				2839	{
				2840	return proc_id_map_open(inode, file, &proc_gid_seq_operations);
				2841	}
				2842
				2843	static int proc_projid_map_open(struct inode inode, struct file file)
				2844	{
				2845	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
				2846	}
				2847
				2848	static const struct file_operations proc_uid_map_operations = {
				2849	.open = proc_uid_map_open,
				2850	.write = proc_uid_map_write,
				2851	.read = seq_read,
				2852	.llseek = seq_lseek,
				2853	.release = proc_id_map_release,
				2854	};
				2855
				2856	static const struct file_operations proc_gid_map_operations = {
				2857	.open = proc_gid_map_open,
				2858	.write = proc_gid_map_write,
				2859	.read = seq_read,
				2860	.llseek = seq_lseek,
				2861	.release = proc_id_map_release,
				2862	};
				2863
				2864	static const struct file_operations proc_projid_map_operations = {
				2865	.open = proc_projid_map_open,
				2866	.write = proc_projid_map_write,
				2867	.read = seq_read,
				2868	.llseek = seq_lseek,
				2869	.release = proc_id_map_release,
				2870	};
				2871
				2872	static int proc_setgroups_open(struct inode inode, struct file file)
				2873	{
				2874	struct user_namespace *ns = NULL;
				2875	struct task_struct *task;
				2876	int ret;
				2877
				2878	ret = -ESRCH;
				2879	task = get_proc_task(inode);
				2880	if (task) {
				2881	rcu_read_lock();
				2882	ns = get_user_ns(task_cred_xxx(task, user_ns));
				2883	rcu_read_unlock();
				2884	put_task_struct(task);
				2885	}
				2886	if (!ns)
				2887	goto err;
				2888
				2889	if (file->f_mode & FMODE_WRITE) {
				2890	ret = -EACCES;
				2891	if (!ns_capable(ns, CAP_SYS_ADMIN))
				2892	goto err_put_ns;
				2893	}
				2894
				2895	ret = single_open(file, &proc_setgroups_show, ns);
				2896	if (ret)
				2897	goto err_put_ns;
				2898
				2899	return 0;
				2900	err_put_ns:
				2901	put_user_ns(ns);
				2902	err:
				2903	return ret;
				2904	}
				2905
				2906	static int proc_setgroups_release(struct inode inode, struct file file)
				2907	{
				2908	struct seq_file *seq = file->private_data;
				2909	struct user_namespace *ns = seq->private;
				2910	int ret = single_release(inode, file);
				2911	put_user_ns(ns);
				2912	return ret;
				2913	}
				2914
				2915	static const struct file_operations proc_setgroups_operations = {
				2916	.open = proc_setgroups_open,
				2917	.write = proc_setgroups_write,
				2918	.read = seq_read,
				2919	.llseek = seq_lseek,
				2920	.release = proc_setgroups_release,
				2921	};
				2922	#endif /* CONFIG_USER_NS */
				2923
				2924	static int proc_pid_personality(struct seq_file m, struct pid_namespace ns,
				2925	struct pid pid, struct task_struct task)
				2926	{
				2927	int err = lock_trace(task);
				2928	if (!err) {
				2929	seq_printf(m, "%08x\n", task->personality);
				2930	unlock_trace(task);
				2931	}
				2932	return err;
				2933	}
				2934
				2935	#ifdef CONFIG_LIVEPATCH
				2936	static int proc_pid_patch_state(struct seq_file m, struct pid_namespace ns,
				2937	struct pid pid, struct task_struct task)
				2938	{
				2939	seq_printf(m, "%d\n", task->patch_state);
				2940	return 0;
				2941	}
				2942	#endif /* CONFIG_LIVEPATCH */
				2943
				2944	/*
				2945	* Thread groups
				2946	*/
				2947	static const struct file_operations proc_task_operations;
				2948	static const struct inode_operations proc_task_inode_operations;
				2949
				2950	static const struct pid_entry tgid_base_stuff[] = {
				2951	DIR("task", S_IRUGO\|S_IXUGO, proc_task_inode_operations, proc_task_operations),
				2952	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				2953	DIR("map_files", S_IRUSR\|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
				2954	DIR("fdinfo", S_IRUSR\|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				2955	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				2956	#ifdef CONFIG_NET
				2957	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				2958	#endif
				2959	REG("environ", S_IRUSR, proc_environ_operations),
				2960	REG("auxv", S_IRUSR, proc_auxv_operations),
				2961	ONE("status", S_IRUGO, proc_pid_status),
				2962	ONE("personality", S_IRUSR, proc_pid_personality),
				2963	ONE("limits", S_IRUGO, proc_pid_limits),
				2964	#ifdef CONFIG_SCHED_DEBUG
				2965	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				2966	#endif
				2967	#ifdef CONFIG_SCHED_AUTOGROUP
				2968	REG("autogroup", S_IRUGO\|S_IWUSR, proc_pid_sched_autogroup_operations),
				2969	#endif
				2970	REG("comm", S_IRUGO\|S_IWUSR, proc_pid_set_comm_operations),
				2971	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				2972	ONE("syscall", S_IRUSR, proc_pid_syscall),
				2973	#endif
				2974	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
				2975	ONE("stat", S_IRUGO, proc_tgid_stat),
				2976	ONE("statm", S_IRUGO, proc_pid_statm),
				2977	REG("maps", S_IRUGO, proc_pid_maps_operations),
				2978	#ifdef CONFIG_NUMA
				2979	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
				2980	#endif
				2981	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				2982	LNK("cwd", proc_cwd_link),
				2983	LNK("root", proc_root_link),
				2984	LNK("exe", proc_exe_link),
				2985	REG("mounts", S_IRUGO, proc_mounts_operations),
				2986	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				2987	REG("mountstats", S_IRUSR, proc_mountstats_operations),
				2988	#ifdef CONFIG_PROC_PAGE_MONITOR
				2989	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				2990	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
				2991	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
				2992	REG("pagemap", S_IRUSR, proc_pagemap_operations),
				2993	#endif
				2994	#ifdef CONFIG_SECURITY
				2995	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				2996	#endif
				2997	#ifdef CONFIG_KALLSYMS
				2998	ONE("wchan", S_IRUGO, proc_pid_wchan),
				2999	#endif
				3000	#ifdef CONFIG_STACKTRACE
				3001	ONE("stack", S_IRUSR, proc_pid_stack),
				3002	#endif
				3003	#ifdef CONFIG_SCHED_INFO
				3004	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
				3005	#endif
				3006	#ifdef CONFIG_LATENCYTOP
				3007	REG("latency", S_IRUGO, proc_lstats_operations),
				3008	#endif
				3009	#ifdef CONFIG_PROC_PID_CPUSET
				3010	ONE("cpuset", S_IRUGO, proc_cpuset_show),
				3011	#endif
				3012	#ifdef CONFIG_CGROUPS
				3013	ONE("cgroup", S_IRUGO, proc_cgroup_show),
				3014	#endif
				3015	ONE("oom_score", S_IRUGO, proc_oom_score),
				3016	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
				3017	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3018	#ifdef CONFIG_AUDITSYSCALL
				3019	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3020	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3021	#endif
				3022	#ifdef CONFIG_FAULT_INJECTION
				3023	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3024	REG("fail-nth", 0644, proc_fail_nth_operations),
				3025	#endif
				3026	#ifdef CONFIG_ELF_CORE
				3027	REG("coredump_filter", S_IRUGO\|S_IWUSR, proc_coredump_filter_operations),
				3028	#endif
				3029	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3030	ONE("io", S_IRUSR, proc_tgid_io_accounting),
				3031	#endif
				3032	#ifdef CONFIG_USER_NS
				3033	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
				3034	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
				3035	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
				3036	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
				3037	#endif
				3038	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
				3039	REG("timers", S_IRUGO, proc_timers_operations),
				3040	#endif
				3041	REG("timerslack_ns", S_IRUGO\|S_IWUGO, proc_pid_set_timerslack_ns_operations),
				3042	#ifdef CONFIG_LIVEPATCH
				3043	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
				3044	#endif
				3045	#ifdef CONFIG_CPU_FREQ_TIMES
				3046	ONE("time_in_state", 0444, proc_time_in_state_show),
				3047	#endif
				3048	};
				3049
				3050	static int proc_tgid_base_readdir(struct file file, struct dir_context ctx)
				3051	{
				3052	return proc_pident_readdir(file, ctx,
				3053	tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3054	}
				3055
				3056	static const struct file_operations proc_tgid_base_operations = {
				3057	.read = generic_read_dir,
				3058	.iterate_shared = proc_tgid_base_readdir,
				3059	.llseek = generic_file_llseek,
				3060	};
				3061
				3062	struct pid tgid_pidfd_to_pid(const struct file file)
				3063	{
				3064	if (!d_is_dir(file->f_path.dentry) \|\|
				3065	(file->f_op != &proc_tgid_base_operations))
				3066	return ERR_PTR(-EBADF);
				3067
				3068	return proc_pid(file_inode(file));
				3069	}
				3070
				3071	static struct dentry proc_tgid_base_lookup(struct inode dir, struct dentry *dentry, unsigned int flags)
				3072	{
				3073	return proc_pident_lookup(dir, dentry,
				3074	tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3075	}
				3076
				3077	static const struct inode_operations proc_tgid_base_inode_operations = {
				3078	.lookup = proc_tgid_base_lookup,
				3079	.getattr = pid_getattr,
				3080	.setattr = proc_setattr,
				3081	.permission = proc_pid_permission,
				3082	};
				3083
				3084	static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
				3085	{
				3086	struct dentry dentry, leader, *dir;
				3087	char buf[10 + 1];
				3088	struct qstr name;
				3089
				3090	name.name = buf;
				3091	name.len = snprintf(buf, sizeof(buf), "%u", pid);
				3092	/* no ->d_hash() rejects on procfs */
				3093	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
				3094	if (dentry) {
				3095	d_invalidate(dentry);
				3096	dput(dentry);
				3097	}
				3098
				3099	if (pid == tgid)
				3100	return;
				3101
				3102	name.name = buf;
				3103	name.len = snprintf(buf, sizeof(buf), "%u", tgid);
				3104	leader = d_hash_and_lookup(mnt->mnt_root, &name);
				3105	if (!leader)
				3106	goto out;
				3107
				3108	name.name = "task";
				3109	name.len = strlen(name.name);
				3110	dir = d_hash_and_lookup(leader, &name);
				3111	if (!dir)
				3112	goto out_put_leader;
				3113
				3114	name.name = buf;
				3115	name.len = snprintf(buf, sizeof(buf), "%u", pid);
				3116	dentry = d_hash_and_lookup(dir, &name);
				3117	if (dentry) {
				3118	d_invalidate(dentry);
				3119	dput(dentry);
				3120	}
				3121
				3122	dput(dir);
				3123	out_put_leader:
				3124	dput(leader);
				3125	out:
				3126	return;
				3127	}
				3128
				3129	/**
				3130	* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
				3131	* @task: task that should be flushed.
				3132	*
				3133	* When flushing dentries from proc, one needs to flush them from global
				3134	* proc (proc_mnt) and from all the namespaces' procs this task was seen
				3135	* in. This call is supposed to do all of this job.
				3136	*
				3137	* Looks in the dcache for
				3138	* /proc/@pid
				3139	* /proc/@tgid/task/@pid
				3140	* if either directory is present flushes it and all of it'ts children
				3141	* from the dcache.
				3142	*
				3143	* It is safe and reasonable to cache /proc entries for a task until
				3144	* that task exits. After that they just clog up the dcache with
				3145	* useless entries, possibly causing useful dcache entries to be
				3146	* flushed instead. This routine is proved to flush those useless
				3147	* dcache entries at process exit time.
				3148	*
				3149	* NOTE: This routine is just an optimization so it does not guarantee
				3150	* that no dcache entries will exist at process exit time it
				3151	* just makes it very unlikely that any will persist.
				3152	*/
				3153
				3154	void proc_flush_task(struct task_struct *task)
				3155	{
				3156	int i;
				3157	struct pid pid, tgid;
				3158	struct upid *upid;
				3159
				3160	pid = task_pid(task);
				3161	tgid = task_tgid(task);
				3162
				3163	for (i = 0; i <= pid->level; i++) {
				3164	upid = &pid->numbers[i];
				3165	proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
				3166	tgid->numbers[i].nr);
				3167	}
				3168	}
				3169
				3170	static struct dentry proc_pid_instantiate(struct dentry dentry,
				3171	struct task_struct task, const void ptr)
				3172	{
				3173	struct inode *inode;
				3174
				3175	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR \| S_IRUGO \| S_IXUGO);
				3176	if (!inode)
				3177	return ERR_PTR(-ENOENT);
				3178
				3179	inode->i_op = &proc_tgid_base_inode_operations;
				3180	inode->i_fop = &proc_tgid_base_operations;
				3181	inode->i_flags\|=S_IMMUTABLE;
				3182
				3183	set_nlink(inode, nlink_tgid);
				3184	pid_update_inode(task, inode);
				3185
				3186	d_set_d_op(dentry, &pid_dentry_operations);
				3187	return d_splice_alias(inode, dentry);
				3188	}
				3189
				3190	struct dentry proc_pid_lookup(struct inode dir, struct dentry * dentry, unsigned int flags)
				3191	{
				3192	struct task_struct *task;
				3193	unsigned tgid;
				3194	struct pid_namespace *ns;
				3195	struct dentry *result = ERR_PTR(-ENOENT);
				3196
				3197	tgid = name_to_int(&dentry->d_name);
				3198	if (tgid == ~0U)
				3199	goto out;
				3200
				3201	ns = dentry->d_sb->s_fs_info;
				3202	rcu_read_lock();
				3203	task = find_task_by_pid_ns(tgid, ns);
				3204	if (task)
				3205	get_task_struct(task);
				3206	rcu_read_unlock();
				3207	if (!task)
				3208	goto out;
				3209
				3210	result = proc_pid_instantiate(dentry, task, NULL);
				3211	put_task_struct(task);
				3212	out:
				3213	return result;
				3214	}
				3215
				3216	/*
				3217	* Find the first task with tgid >= tgid
				3218	*
				3219	*/
				3220	struct tgid_iter {
				3221	unsigned int tgid;
				3222	struct task_struct *task;
				3223	};
				3224	static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
				3225	{
				3226	struct pid *pid;
				3227
				3228	if (iter.task)
				3229	put_task_struct(iter.task);
				3230	rcu_read_lock();
				3231	retry:
				3232	iter.task = NULL;
				3233	pid = find_ge_pid(iter.tgid, ns);
				3234	if (pid) {
				3235	iter.tgid = pid_nr_ns(pid, ns);
				3236	iter.task = pid_task(pid, PIDTYPE_PID);
				3237	/* What we to know is if the pid we have find is the
				3238	* pid of a thread_group_leader. Testing for task
				3239	* being a thread_group_leader is the obvious thing
				3240	* todo but there is a window when it fails, due to
				3241	* the pid transfer logic in de_thread.
				3242	*
				3243	* So we perform the straight forward test of seeing
				3244	* if the pid we have found is the pid of a thread
				3245	* group leader, and don't worry if the task we have
				3246	* found doesn't happen to be a thread group leader.
				3247	* As we don't care in the case of readdir.
				3248	*/
				3249	if (!iter.task \|\| !has_group_leader_pid(iter.task)) {
				3250	iter.tgid += 1;
				3251	goto retry;
				3252	}
				3253	get_task_struct(iter.task);
				3254	}
				3255	rcu_read_unlock();
				3256	return iter;
				3257	}
				3258
				3259	#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
				3260
				3261	/* for the /proc/ directory itself, after non-process stuff has been done */
				3262	int proc_pid_readdir(struct file file, struct dir_context ctx)
				3263	{
				3264	struct tgid_iter iter;
				3265	struct pid_namespace *ns = proc_pid_ns(file_inode(file));
				3266	loff_t pos = ctx->pos;
				3267
				3268	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
				3269	return 0;
				3270
				3271	if (pos == TGID_OFFSET - 2) {
				3272	struct inode *inode = d_inode(ns->proc_self);
				3273	if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
				3274	return 0;
				3275	ctx->pos = pos = pos + 1;
				3276	}
				3277	if (pos == TGID_OFFSET - 1) {
				3278	struct inode *inode = d_inode(ns->proc_thread_self);
				3279	if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
				3280	return 0;
				3281	ctx->pos = pos = pos + 1;
				3282	}
				3283	iter.tgid = pos - TGID_OFFSET;
				3284	iter.task = NULL;
				3285	for (iter = next_tgid(ns, iter);
				3286	iter.task;
				3287	iter.tgid += 1, iter = next_tgid(ns, iter)) {
				3288	char name[10 + 1];
				3289	unsigned int len;
				3290
				3291	cond_resched();
				3292	if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
				3293	continue;
				3294
				3295	len = snprintf(name, sizeof(name), "%u", iter.tgid);
				3296	ctx->pos = iter.tgid + TGID_OFFSET;
				3297	if (!proc_fill_cache(file, ctx, name, len,
				3298	proc_pid_instantiate, iter.task, NULL)) {
				3299	put_task_struct(iter.task);
				3300	return 0;
				3301	}
				3302	}
				3303	ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
				3304	return 0;
				3305	}
				3306
				3307	/*
				3308	* proc_tid_comm_permission is a special permission function exclusively
				3309	* used for the node /proc/<pid>/task/<tid>/comm.
				3310	* It bypasses generic permission checks in the case where a task of the same
				3311	* task group attempts to access the node.
				3312	* The rationale behind this is that glibc and bionic access this node for
				3313	* cross thread naming (pthread_set/getname_np(!self)). However, if
				3314	* PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
				3315	* which locks out the cross thread naming implementation.
				3316	* This function makes sure that the node is always accessible for members of
				3317	* same thread group.
				3318	*/
				3319	static int proc_tid_comm_permission(struct inode *inode, int mask)
				3320	{
				3321	bool is_same_tgroup;
				3322	struct task_struct *task;
				3323
				3324	task = get_proc_task(inode);
				3325	if (!task)
				3326	return -ESRCH;
				3327	is_same_tgroup = same_thread_group(current, task);
				3328	put_task_struct(task);
				3329
				3330	if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
				3331	/* This file (/proc/<pid>/task/<tid>/comm) can always be
				3332	* read or written by the members of the corresponding
				3333	* thread group.
				3334	*/
				3335	return 0;
				3336	}
				3337
				3338	return generic_permission(inode, mask);
				3339	}
				3340
				3341	static const struct inode_operations proc_tid_comm_inode_operations = {
				3342	.permission = proc_tid_comm_permission,
				3343	};
				3344
				3345	/*
				3346	* Tasks
				3347	*/
				3348	static const struct pid_entry tid_base_stuff[] = {
				3349	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				3350	DIR("fdinfo", S_IRUSR\|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				3351	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				3352	#ifdef CONFIG_NET
				3353	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				3354	#endif
				3355	REG("environ", S_IRUSR, proc_environ_operations),
				3356	REG("auxv", S_IRUSR, proc_auxv_operations),
				3357	ONE("status", S_IRUGO, proc_pid_status),
				3358	ONE("personality", S_IRUSR, proc_pid_personality),
				3359	ONE("limits", S_IRUGO, proc_pid_limits),
				3360	#ifdef CONFIG_SCHED_DEBUG
				3361	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				3362	#endif
				3363	NOD("comm", S_IFREG\|S_IRUGO\|S_IWUSR,
				3364	&proc_tid_comm_inode_operations,
				3365	&proc_pid_set_comm_operations, {}),
				3366	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				3367	ONE("syscall", S_IRUSR, proc_pid_syscall),
				3368	#endif
				3369	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
				3370	ONE("stat", S_IRUGO, proc_tid_stat),
				3371	ONE("statm", S_IRUGO, proc_pid_statm),
				3372	REG("maps", S_IRUGO, proc_pid_maps_operations),
				3373	#ifdef CONFIG_PROC_CHILDREN
				3374	REG("children", S_IRUGO, proc_tid_children_operations),
				3375	#endif
				3376	#ifdef CONFIG_NUMA
				3377	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
				3378	#endif
				3379	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				3380	LNK("cwd", proc_cwd_link),
				3381	LNK("root", proc_root_link),
				3382	LNK("exe", proc_exe_link),
				3383	REG("mounts", S_IRUGO, proc_mounts_operations),
				3384	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				3385	#ifdef CONFIG_PROC_PAGE_MONITOR
				3386	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				3387	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
				3388	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
				3389	REG("pagemap", S_IRUSR, proc_pagemap_operations),
				3390	#endif
				3391	#ifdef CONFIG_SECURITY
				3392	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				3393	#endif
				3394	#ifdef CONFIG_KALLSYMS
				3395	ONE("wchan", S_IRUGO, proc_pid_wchan),
				3396	#endif
				3397	#ifdef CONFIG_STACKTRACE
				3398	ONE("stack", S_IRUSR, proc_pid_stack),
				3399	#endif
				3400	#ifdef CONFIG_SCHED_INFO
				3401	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
				3402	#endif
				3403	#ifdef CONFIG_LATENCYTOP
				3404	REG("latency", S_IRUGO, proc_lstats_operations),
				3405	#endif
				3406	#ifdef CONFIG_PROC_PID_CPUSET
				3407	ONE("cpuset", S_IRUGO, proc_cpuset_show),
				3408	#endif
				3409	#ifdef CONFIG_CGROUPS
				3410	ONE("cgroup", S_IRUGO, proc_cgroup_show),
				3411	#endif
				3412	ONE("oom_score", S_IRUGO, proc_oom_score),
				3413	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
				3414	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3415	#ifdef CONFIG_AUDITSYSCALL
				3416	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3417	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3418	#endif
				3419	#ifdef CONFIG_FAULT_INJECTION
				3420	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3421	REG("fail-nth", 0644, proc_fail_nth_operations),
				3422	#endif
				3423	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3424	ONE("io", S_IRUSR, proc_tid_io_accounting),
				3425	#endif
				3426	#ifdef CONFIG_USER_NS
				3427	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
				3428	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
				3429	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
				3430	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
				3431	#endif
				3432	#ifdef CONFIG_LIVEPATCH
				3433	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
				3434	#endif
				3435	#ifdef CONFIG_CPU_FREQ_TIMES
				3436	ONE("time_in_state", 0444, proc_time_in_state_show),
				3437	#endif
				3438	};
				3439
				3440	static int proc_tid_base_readdir(struct file file, struct dir_context ctx)
				3441	{
				3442	return proc_pident_readdir(file, ctx,
				3443	tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3444	}
				3445
				3446	static struct dentry proc_tid_base_lookup(struct inode dir, struct dentry *dentry, unsigned int flags)
				3447	{
				3448	return proc_pident_lookup(dir, dentry,
				3449	tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3450	}
				3451
				3452	static const struct file_operations proc_tid_base_operations = {
				3453	.read = generic_read_dir,
				3454	.iterate_shared = proc_tid_base_readdir,
				3455	.llseek = generic_file_llseek,
				3456	};
				3457
				3458	static const struct inode_operations proc_tid_base_inode_operations = {
				3459	.lookup = proc_tid_base_lookup,
				3460	.getattr = pid_getattr,
				3461	.setattr = proc_setattr,
				3462	};
				3463
				3464	static struct dentry proc_task_instantiate(struct dentry dentry,
				3465	struct task_struct task, const void ptr)
				3466	{
				3467	struct inode *inode;
				3468	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR \| S_IRUGO \| S_IXUGO);
				3469	if (!inode)
				3470	return ERR_PTR(-ENOENT);
				3471
				3472	inode->i_op = &proc_tid_base_inode_operations;
				3473	inode->i_fop = &proc_tid_base_operations;
				3474	inode->i_flags \|= S_IMMUTABLE;
				3475
				3476	set_nlink(inode, nlink_tid);
				3477	pid_update_inode(task, inode);
				3478
				3479	d_set_d_op(dentry, &pid_dentry_operations);
				3480	return d_splice_alias(inode, dentry);
				3481	}
				3482
				3483	static struct dentry proc_task_lookup(struct inode dir, struct dentry * dentry, unsigned int flags)
				3484	{
				3485	struct task_struct *task;
				3486	struct task_struct *leader = get_proc_task(dir);
				3487	unsigned tid;
				3488	struct pid_namespace *ns;
				3489	struct dentry *result = ERR_PTR(-ENOENT);
				3490
				3491	if (!leader)
				3492	goto out_no_task;
				3493
				3494	tid = name_to_int(&dentry->d_name);
				3495	if (tid == ~0U)
				3496	goto out;
				3497
				3498	ns = dentry->d_sb->s_fs_info;
				3499	rcu_read_lock();
				3500	task = find_task_by_pid_ns(tid, ns);
				3501	if (task)
				3502	get_task_struct(task);
				3503	rcu_read_unlock();
				3504	if (!task)
				3505	goto out;
				3506	if (!same_thread_group(leader, task))
				3507	goto out_drop_task;
				3508
				3509	result = proc_task_instantiate(dentry, task, NULL);
				3510	out_drop_task:
				3511	put_task_struct(task);
				3512	out:
				3513	put_task_struct(leader);
				3514	out_no_task:
				3515	return result;
				3516	}
				3517
				3518	/*
				3519	* Find the first tid of a thread group to return to user space.
				3520	*
				3521	* Usually this is just the thread group leader, but if the users
				3522	* buffer was too small or there was a seek into the middle of the
				3523	* directory we have more work todo.
				3524	*
				3525	* In the case of a short read we start with find_task_by_pid.
				3526	*
				3527	* In the case of a seek we start with the leader and walk nr
				3528	* threads past it.
				3529	*/
				3530	static struct task_struct first_tid(struct pid pid, int tid, loff_t f_pos,
				3531	struct pid_namespace *ns)
				3532	{
				3533	struct task_struct pos, task;
				3534	unsigned long nr = f_pos;
				3535
				3536	if (nr != f_pos) /* 32bit overflow? */
				3537	return NULL;
				3538
				3539	rcu_read_lock();
				3540	task = pid_task(pid, PIDTYPE_PID);
				3541	if (!task)
				3542	goto fail;
				3543
				3544	/* Attempt to start with the tid of a thread */
				3545	if (tid && nr) {
				3546	pos = find_task_by_pid_ns(tid, ns);
				3547	if (pos && same_thread_group(pos, task))
				3548	goto found;
				3549	}
				3550
				3551	/* If nr exceeds the number of threads there is nothing todo */
				3552	if (nr >= get_nr_threads(task))
				3553	goto fail;
				3554
				3555	/* If we haven't found our starting place yet start
				3556	* with the leader and walk nr threads forward.
				3557	*/
				3558	pos = task = task->group_leader;
				3559	do {
				3560	if (!nr--)
				3561	goto found;
				3562	} while_each_thread(task, pos);
				3563	fail:
				3564	pos = NULL;
				3565	goto out;
				3566	found:
				3567	get_task_struct(pos);
				3568	out:
				3569	rcu_read_unlock();
				3570	return pos;
				3571	}
				3572
				3573	/*
				3574	* Find the next thread in the thread list.
				3575	* Return NULL if there is an error or no next thread.
				3576	*
				3577	* The reference to the input task_struct is released.
				3578	*/
				3579	static struct task_struct next_tid(struct task_struct start)
				3580	{
				3581	struct task_struct *pos = NULL;
				3582	rcu_read_lock();
				3583	if (pid_alive(start)) {
				3584	pos = next_thread(start);
				3585	if (thread_group_leader(pos))
				3586	pos = NULL;
				3587	else
				3588	get_task_struct(pos);
				3589	}
				3590	rcu_read_unlock();
				3591	put_task_struct(start);
				3592	return pos;
				3593	}
				3594
				3595	/* for the /proc/TGID/task/ directories */
				3596	static int proc_task_readdir(struct file file, struct dir_context ctx)
				3597	{
				3598	struct inode *inode = file_inode(file);
				3599	struct task_struct *task;
				3600	struct pid_namespace *ns;
				3601	int tid;
				3602
				3603	if (proc_inode_is_dead(inode))
				3604	return -ENOENT;
				3605
				3606	if (!dir_emit_dots(file, ctx))
				3607	return 0;
				3608
				3609	/* f_version caches the tgid value that the last readdir call couldn't
				3610	* return. lseek aka telldir automagically resets f_version to 0.
				3611	*/
				3612	ns = proc_pid_ns(inode);
				3613	tid = (int)file->f_version;
				3614	file->f_version = 0;
				3615	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
				3616	task;
				3617	task = next_tid(task), ctx->pos++) {
				3618	char name[10 + 1];
				3619	unsigned int len;
				3620	tid = task_pid_nr_ns(task, ns);
				3621	len = snprintf(name, sizeof(name), "%u", tid);
				3622	if (!proc_fill_cache(file, ctx, name, len,
				3623	proc_task_instantiate, task, NULL)) {
				3624	/* returning this tgid failed, save it as the first
				3625	* pid for the next readir call */
				3626	file->f_version = (u64)tid;
				3627	put_task_struct(task);
				3628	break;
				3629	}
				3630	}
				3631
				3632	return 0;
				3633	}
				3634
				3635	static int proc_task_getattr(const struct path path, struct kstat stat,
				3636	u32 request_mask, unsigned int query_flags)
				3637	{
				3638	struct inode *inode = d_inode(path->dentry);
				3639	struct task_struct *p = get_proc_task(inode);
				3640	generic_fillattr(inode, stat);
				3641
				3642	if (p) {
				3643	stat->nlink += get_nr_threads(p);
				3644	put_task_struct(p);
				3645	}
				3646
				3647	return 0;
				3648	}
				3649
				3650	static const struct inode_operations proc_task_inode_operations = {
				3651	.lookup = proc_task_lookup,
				3652	.getattr = proc_task_getattr,
				3653	.setattr = proc_setattr,
				3654	.permission = proc_pid_permission,
				3655	};
				3656
				3657	static const struct file_operations proc_task_operations = {
				3658	.read = generic_read_dir,
				3659	.iterate_shared = proc_task_readdir,
				3660	.llseek = generic_file_llseek,
				3661	};
				3662
				3663	void __init set_proc_pid_nlink(void)
				3664	{
				3665	nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3666	nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3667	}