Blame - src/kernel/linux/v4.14/fs/proc/base.c - T103

blob: 3b593d1bafaf0bcba433ebd69d72f79811e9b551 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/fs/proc/base.c
				4	*
				5	* Copyright (C) 1991, 1992 Linus Torvalds
				6	*
				7	* proc base directory handling functions
				8	*
				9	* 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
				10	* Instead of using magical inumbers to determine the kind of object
				11	* we allocate and fill in-core inodes upon lookup. They don't even
				12	* go into icache. We cache the reference to task_struct upon lookup too.
				13	* Eventually it should become a filesystem in its own. We don't use the
				14	* rest of procfs anymore.
				15	*
				16	*
				17	* Changelog:
				18	* 17-Jan-2005
				19	* Allan Bezerra
				20	* Bruna Moreira <bruna.moreira@indt.org.br>
				21	* Edjard Mota <edjard.mota@indt.org.br>
				22	* Ilias Biris <ilias.biris@indt.org.br>
				23	* Mauricio Lin <mauricio.lin@indt.org.br>
				24	*
				25	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				26	*
				27	* A new process specific entry (smaps) included in /proc. It shows the
				28	* size of rss for each memory area. The maps entry lacks information
				29	* about physical memory size (rss) for each mapped file, i.e.,
				30	* rss information for executables and library files.
				31	* This additional information is useful for any tools that need to know
				32	* about physical memory consumption for a process specific library.
				33	*
				34	* Changelog:
				35	* 21-Feb-2005
				36	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				37	* Pud inclusion in the page table walking.
				38	*
				39	* ChangeLog:
				40	* 10-Mar-2005
				41	* 10LE Instituto Nokia de Tecnologia - INdT:
				42	* A better way to walks through the page table as suggested by Hugh Dickins.
				43	*
				44	* Simo Piiroinen <simo.piiroinen@nokia.com>:
				45	* Smaps information related to shared, private, clean and dirty pages.
				46	*
				47	* Paul Mundt <paul.mundt@nokia.com>:
				48	* Overall revision about smaps.
				49	*/
				50
				51	#include <linux/uaccess.h>
				52
				53	#include <linux/errno.h>
				54	#include <linux/time.h>
				55	#include <linux/proc_fs.h>
				56	#include <linux/stat.h>
				57	#include <linux/task_io_accounting_ops.h>
				58	#include <linux/init.h>
				59	#include <linux/capability.h>
				60	#include <linux/file.h>
				61	#include <linux/fdtable.h>
				62	#include <linux/string.h>
				63	#include <linux/seq_file.h>
				64	#include <linux/namei.h>
				65	#include <linux/mnt_namespace.h>
				66	#include <linux/mm.h>
				67	#include <linux/swap.h>
				68	#include <linux/rcupdate.h>
				69	#include <linux/kallsyms.h>
				70	#include <linux/stacktrace.h>
				71	#include <linux/resource.h>
				72	#include <linux/module.h>
				73	#include <linux/mount.h>
				74	#include <linux/security.h>
				75	#include <linux/ptrace.h>
				76	#include <linux/tracehook.h>
				77	#include <linux/printk.h>
				78	#include <linux/cgroup.h>
				79	#include <linux/cpuset.h>
				80	#include <linux/audit.h>
				81	#include <linux/poll.h>
				82	#include <linux/nsproxy.h>
				83	#include <linux/oom.h>
				84	#include <linux/elf.h>
				85	#include <linux/pid_namespace.h>
				86	#include <linux/user_namespace.h>
				87	#include <linux/fs_struct.h>
				88	#include <linux/slab.h>
				89	#include <linux/sched/autogroup.h>
				90	#include <linux/sched/mm.h>
				91	#include <linux/sched/coredump.h>
				92	#include <linux/sched/debug.h>
				93	#include <linux/sched/stat.h>
				94	#include <linux/flex_array.h>
				95	#include <linux/posix-timers.h>
				96	#include <linux/cpufreq_times.h>
				97	#ifdef CONFIG_HARDWALL
				98	#include <asm/hardwall.h>
				99	#endif
				100	#include <trace/events/oom.h>
				101	#include "internal.h"
				102	#include "fd.h"
				103
				104	#include "../../lib/kstrtox.h"
				105
				106	/* NOTE:
				107	* Implementing inode permission operations in /proc is almost
				108	* certainly an error. Permission checks need to happen during
				109	* each system call not at open time. The reason is that most of
				110	* what we wish to check for permissions in /proc varies at runtime.
				111	*
				112	* The classic example of a problem is opening file descriptors
				113	* in /proc for a task before it execs a suid executable.
				114	*/
				115
				116	static u8 nlink_tid;
				117	static u8 nlink_tgid;
				118
				119	struct pid_entry {
				120	const char *name;
				121	unsigned int len;
				122	umode_t mode;
				123	const struct inode_operations *iop;
				124	const struct file_operations *fop;
				125	union proc_op op;
				126	};
				127
				128	#define NOD(NAME, MODE, IOP, FOP, OP) { \
				129	.name = (NAME), \
				130	.len = sizeof(NAME) - 1, \
				131	.mode = MODE, \
				132	.iop = IOP, \
				133	.fop = FOP, \
				134	.op = OP, \
				135	}
				136
				137	#define DIR(NAME, MODE, iops, fops) \
				138	NOD(NAME, (S_IFDIR\|(MODE)), &iops, &fops, {} )
				139	#define LNK(NAME, get_link) \
				140	NOD(NAME, (S_IFLNK\|S_IRWXUGO), \
				141	&proc_pid_link_inode_operations, NULL, \
				142	{ .proc_get_link = get_link } )
				143	#define REG(NAME, MODE, fops) \
				144	NOD(NAME, (S_IFREG\|(MODE)), NULL, &fops, {})
				145	#define ONE(NAME, MODE, show) \
				146	NOD(NAME, (S_IFREG\|(MODE)), \
				147	NULL, &proc_single_file_operations, \
				148	{ .proc_show = show } )
				149
				150	/*
				151	* Count the number of hardlinks for the pid_entry table, excluding the .
				152	* and .. links.
				153	*/
				154	static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
				155	unsigned int n)
				156	{
				157	unsigned int i;
				158	unsigned int count;
				159
				160	count = 2;
				161	for (i = 0; i < n; ++i) {
				162	if (S_ISDIR(entries[i].mode))
				163	++count;
				164	}
				165
				166	return count;
				167	}
				168
				169	static int get_task_root(struct task_struct task, struct path root)
				170	{
				171	int result = -ENOENT;
				172
				173	task_lock(task);
				174	if (task->fs) {
				175	get_fs_root(task->fs, root);
				176	result = 0;
				177	}
				178	task_unlock(task);
				179	return result;
				180	}
				181
				182	static int proc_cwd_link(struct dentry dentry, struct path path)
				183	{
				184	struct task_struct *task = get_proc_task(d_inode(dentry));
				185	int result = -ENOENT;
				186
				187	if (task) {
				188	task_lock(task);
				189	if (task->fs) {
				190	get_fs_pwd(task->fs, path);
				191	result = 0;
				192	}
				193	task_unlock(task);
				194	put_task_struct(task);
				195	}
				196	return result;
				197	}
				198
				199	static int proc_root_link(struct dentry dentry, struct path path)
				200	{
				201	struct task_struct *task = get_proc_task(d_inode(dentry));
				202	int result = -ENOENT;
				203
				204	if (task) {
				205	result = get_task_root(task, path);
				206	put_task_struct(task);
				207	}
				208	return result;
				209	}
				210
				211	static ssize_t proc_pid_cmdline_read(struct file file, char __user buf,
				212	size_t _count, loff_t *pos)
				213	{
				214	struct task_struct *tsk;
				215	struct mm_struct *mm;
				216	char *page;
				217	unsigned long count = _count;
				218	unsigned long arg_start, arg_end, env_start, env_end;
				219	unsigned long len1, len2, len;
				220	unsigned long p;
				221	char c;
				222	ssize_t rv;
				223
				224	BUG_ON(*pos < 0);
				225
				226	tsk = get_proc_task(file_inode(file));
				227	if (!tsk)
				228	return -ESRCH;
				229	mm = get_task_mm(tsk);
				230	put_task_struct(tsk);
				231	if (!mm)
				232	return 0;
				233	/* Check if process spawned far enough to have cmdline. */
				234	if (!mm->env_end) {
				235	rv = 0;
				236	goto out_mmput;
				237	}
				238
				239	page = (char *)__get_free_page(GFP_KERNEL);
				240	if (!page) {
				241	rv = -ENOMEM;
				242	goto out_mmput;
				243	}
				244
				245	down_read(&mm->mmap_sem);
				246	arg_start = mm->arg_start;
				247	arg_end = mm->arg_end;
				248	env_start = mm->env_start;
				249	env_end = mm->env_end;
				250	up_read(&mm->mmap_sem);
				251
				252	BUG_ON(arg_start > arg_end);
				253	BUG_ON(env_start > env_end);
				254
				255	len1 = arg_end - arg_start;
				256	len2 = env_end - env_start;
				257
				258	/* Empty ARGV. */
				259	if (len1 == 0) {
				260	rv = 0;
				261	goto out_free_page;
				262	}
				263	/*
				264	* Inherently racy -- command line shares address space
				265	* with code and data.
				266	*/
				267	rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
				268	if (rv <= 0)
				269	goto out_free_page;
				270
				271	rv = 0;
				272
				273	if (c == '\0') {
				274	/* Command line (set of strings) occupies whole ARGV. */
				275	if (len1 <= *pos)
				276	goto out_free_page;
				277
				278	p = arg_start + *pos;
				279	len = len1 - *pos;
				280	while (count > 0 && len > 0) {
				281	unsigned int _count;
				282	int nr_read;
				283
				284	_count = min3(count, len, PAGE_SIZE);
				285	nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
				286	if (nr_read < 0)
				287	rv = nr_read;
				288	if (nr_read <= 0)
				289	goto out_free_page;
				290
				291	if (copy_to_user(buf, page, nr_read)) {
				292	rv = -EFAULT;
				293	goto out_free_page;
				294	}
				295
				296	p += nr_read;
				297	len -= nr_read;
				298	buf += nr_read;
				299	count -= nr_read;
				300	rv += nr_read;
				301	}
				302	} else {
				303	/*
				304	* Command line (1 string) occupies ARGV and
				305	* extends into ENVP.
				306	*/
				307	struct {
				308	unsigned long p;
				309	unsigned long len;
				310	} cmdline[2] = {
				311	{ .p = arg_start, .len = len1 },
				312	{ .p = env_start, .len = len2 },
				313	};
				314	loff_t pos1 = *pos;
				315	unsigned int i;
				316
				317	i = 0;
				318	while (i < 2 && pos1 >= cmdline[i].len) {
				319	pos1 -= cmdline[i].len;
				320	i++;
				321	}
				322	while (i < 2) {
				323	p = cmdline[i].p + pos1;
				324	len = cmdline[i].len - pos1;
				325	while (count > 0 && len > 0) {
				326	unsigned int _count, l;
				327	int nr_read;
				328	bool final;
				329
				330	_count = min3(count, len, PAGE_SIZE);
				331	nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
				332	if (nr_read < 0)
				333	rv = nr_read;
				334	if (nr_read <= 0)
				335	goto out_free_page;
				336
				337	/*
				338	* Command line can be shorter than whole ARGV
				339	* even if last "marker" byte says it is not.
				340	*/
				341	final = false;
				342	l = strnlen(page, nr_read);
				343	if (l < nr_read) {
				344	nr_read = l;
				345	final = true;
				346	}
				347
				348	if (copy_to_user(buf, page, nr_read)) {
				349	rv = -EFAULT;
				350	goto out_free_page;
				351	}
				352
				353	p += nr_read;
				354	len -= nr_read;
				355	buf += nr_read;
				356	count -= nr_read;
				357	rv += nr_read;
				358
				359	if (final)
				360	goto out_free_page;
				361	}
				362
				363	/* Only first chunk can be read partially. */
				364	pos1 = 0;
				365	i++;
				366	}
				367	}
				368
				369	out_free_page:
				370	free_page((unsigned long)page);
				371	out_mmput:
				372	mmput(mm);
				373	if (rv > 0)
				374	*pos += rv;
				375	return rv;
				376	}
				377
				378	static const struct file_operations proc_pid_cmdline_ops = {
				379	.read = proc_pid_cmdline_read,
				380	.llseek = generic_file_llseek,
				381	};
				382
				383	#ifdef CONFIG_KALLSYMS
				384	/*
				385	* Provides a wchan file via kallsyms in a proper one-value-per-file format.
				386	* Returns the resolved symbol. If that fails, simply return the address.
				387	*/
				388	static int proc_pid_wchan(struct seq_file m, struct pid_namespace ns,
				389	struct pid pid, struct task_struct task)
				390	{
				391	unsigned long wchan;
				392	char symname[KSYM_NAME_LEN];
				393
				394	wchan = get_wchan(task);
				395
				396	if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
				397	&& !lookup_symbol_name(wchan, symname))
				398	seq_printf(m, "%s", symname);
				399	else
				400	seq_putc(m, '0');
				401
				402	return 0;
				403	}
				404	#endif /* CONFIG_KALLSYMS */
				405
				406	static int lock_trace(struct task_struct *task)
				407	{
				408	int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
				409	if (err)
				410	return err;
				411	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
				412	mutex_unlock(&task->signal->cred_guard_mutex);
				413	return -EPERM;
				414	}
				415	return 0;
				416	}
				417
				418	static void unlock_trace(struct task_struct *task)
				419	{
				420	mutex_unlock(&task->signal->cred_guard_mutex);
				421	}
				422
				423	#ifdef CONFIG_STACKTRACE
				424
				425	#define MAX_STACK_TRACE_DEPTH 64
				426
				427	static int proc_pid_stack(struct seq_file m, struct pid_namespace ns,
				428	struct pid pid, struct task_struct task)
				429	{
				430	struct stack_trace trace;
				431	unsigned long *entries;
				432	int err;
				433	int i;
				434
				435	/*
				436	* The ability to racily run the kernel stack unwinder on a running task
				437	* and then observe the unwinder output is scary; while it is useful for
				438	* debugging kernel issues, it can also allow an attacker to leak kernel
				439	* stack contents.
				440	* Doing this in a manner that is at least safe from races would require
				441	* some work to ensure that the remote task can not be scheduled; and
				442	* even then, this would still expose the unwinder as local attack
				443	* surface.
				444	* Therefore, this interface is restricted to root.
				445	*/
				446	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
				447	return -EACCES;
				448
				449	entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
				450	if (!entries)
				451	return -ENOMEM;
				452
				453	trace.nr_entries = 0;
				454	trace.max_entries = MAX_STACK_TRACE_DEPTH;
				455	trace.entries = entries;
				456	trace.skip = 0;
				457
				458	err = lock_trace(task);
				459	if (!err) {
				460	save_stack_trace_tsk(task, &trace);
				461
				462	for (i = 0; i < trace.nr_entries; i++) {
				463	seq_printf(m, "[<%pK>] %pB\n",
				464	(void )entries[i], (void )entries[i]);
				465	}
				466	unlock_trace(task);
				467	}
				468	kfree(entries);
				469
				470	return err;
				471	}
				472	#endif
				473
				474	#ifdef CONFIG_SCHED_INFO
				475	/*
				476	* Provides /proc/PID/schedstat
				477	*/
				478	static int proc_pid_schedstat(struct seq_file m, struct pid_namespace ns,
				479	struct pid pid, struct task_struct task)
				480	{
				481	if (unlikely(!sched_info_on()))
				482	seq_printf(m, "0 0 0\n");
				483	else
				484	seq_printf(m, "%llu %llu %lu\n",
				485	(unsigned long long)task->se.sum_exec_runtime,
				486	(unsigned long long)task->sched_info.run_delay,
				487	task->sched_info.pcount);
				488
				489	return 0;
				490	}
				491	#endif
				492
				493	#ifdef CONFIG_LATENCYTOP
				494	static int lstats_show_proc(struct seq_file m, void v)
				495	{
				496	int i;
				497	struct inode *inode = m->private;
				498	struct task_struct *task = get_proc_task(inode);
				499
				500	if (!task)
				501	return -ESRCH;
				502	seq_puts(m, "Latency Top version : v0.1\n");
				503	for (i = 0; i < 32; i++) {
				504	struct latency_record *lr = &task->latency_record[i];
				505	if (lr->backtrace[0]) {
				506	int q;
				507	seq_printf(m, "%i %li %li",
				508	lr->count, lr->time, lr->max);
				509	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
				510	unsigned long bt = lr->backtrace[q];
				511	if (!bt)
				512	break;
				513	if (bt == ULONG_MAX)
				514	break;
				515	seq_printf(m, " %ps", (void *)bt);
				516	}
				517	seq_putc(m, '\n');
				518	}
				519
				520	}
				521	put_task_struct(task);
				522	return 0;
				523	}
				524
				525	static int lstats_open(struct inode inode, struct file file)
				526	{
				527	return single_open(file, lstats_show_proc, inode);
				528	}
				529
				530	static ssize_t lstats_write(struct file file, const char __user buf,
				531	size_t count, loff_t *offs)
				532	{
				533	struct task_struct *task = get_proc_task(file_inode(file));
				534
				535	if (!task)
				536	return -ESRCH;
				537	clear_all_latency_tracing(task);
				538	put_task_struct(task);
				539
				540	return count;
				541	}
				542
				543	static const struct file_operations proc_lstats_operations = {
				544	.open = lstats_open,
				545	.read = seq_read,
				546	.write = lstats_write,
				547	.llseek = seq_lseek,
				548	.release = single_release,
				549	};
				550
				551	#endif
				552
				553	static int proc_oom_score(struct seq_file m, struct pid_namespace ns,
				554	struct pid pid, struct task_struct task)
				555	{
				556	unsigned long totalpages = totalram_pages + total_swap_pages;
				557	unsigned long points = 0;
				558
				559	points = oom_badness(task, NULL, NULL, totalpages) *
				560	1000 / totalpages;
				561	seq_printf(m, "%lu\n", points);
				562
				563	return 0;
				564	}
				565
				566	struct limit_names {
				567	const char *name;
				568	const char *unit;
				569	};
				570
				571	static const struct limit_names lnames[RLIM_NLIMITS] = {
				572	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
				573	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
				574	[RLIMIT_DATA] = {"Max data size", "bytes"},
				575	[RLIMIT_STACK] = {"Max stack size", "bytes"},
				576	[RLIMIT_CORE] = {"Max core file size", "bytes"},
				577	[RLIMIT_RSS] = {"Max resident set", "bytes"},
				578	[RLIMIT_NPROC] = {"Max processes", "processes"},
				579	[RLIMIT_NOFILE] = {"Max open files", "files"},
				580	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
				581	[RLIMIT_AS] = {"Max address space", "bytes"},
				582	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
				583	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
				584	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
				585	[RLIMIT_NICE] = {"Max nice priority", NULL},
				586	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
				587	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
				588	};
				589
				590	/* Display limits for a process */
				591	static int proc_pid_limits(struct seq_file m, struct pid_namespace ns,
				592	struct pid pid, struct task_struct task)
				593	{
				594	unsigned int i;
				595	unsigned long flags;
				596
				597	struct rlimit rlim[RLIM_NLIMITS];
				598
				599	if (!lock_task_sighand(task, &flags))
				600	return 0;
				601	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
				602	unlock_task_sighand(task, &flags);
				603
				604	/*
				605	* print the file header
				606	*/
				607	seq_printf(m, "%-25s %-20s %-20s %-10s\n",
				608	"Limit", "Soft Limit", "Hard Limit", "Units");
				609
				610	for (i = 0; i < RLIM_NLIMITS; i++) {
				611	if (rlim[i].rlim_cur == RLIM_INFINITY)
				612	seq_printf(m, "%-25s %-20s ",
				613	lnames[i].name, "unlimited");
				614	else
				615	seq_printf(m, "%-25s %-20lu ",
				616	lnames[i].name, rlim[i].rlim_cur);
				617
				618	if (rlim[i].rlim_max == RLIM_INFINITY)
				619	seq_printf(m, "%-20s ", "unlimited");
				620	else
				621	seq_printf(m, "%-20lu ", rlim[i].rlim_max);
				622
				623	if (lnames[i].unit)
				624	seq_printf(m, "%-10s\n", lnames[i].unit);
				625	else
				626	seq_putc(m, '\n');
				627	}
				628
				629	return 0;
				630	}
				631
				632	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				633	static int proc_pid_syscall(struct seq_file m, struct pid_namespace ns,
				634	struct pid pid, struct task_struct task)
				635	{
				636	long nr;
				637	unsigned long args[6], sp, pc;
				638	int res;
				639
				640	res = lock_trace(task);
				641	if (res)
				642	return res;
				643
				644	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
				645	seq_puts(m, "running\n");
				646	else if (nr < 0)
				647	seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
				648	else
				649	seq_printf(m,
				650	"%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
				651	nr,
				652	args[0], args[1], args[2], args[3], args[4], args[5],
				653	sp, pc);
				654	unlock_trace(task);
				655
				656	return 0;
				657	}
				658	#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
				659
				660	/************************************************************************/
				661	/* Here the fs part begins */
				662	/************************************************************************/
				663
				664	/* permission checks */
				665	static int proc_fd_access_allowed(struct inode *inode)
				666	{
				667	struct task_struct *task;
				668	int allowed = 0;
				669	/* Allow access to a task's file descriptors if it is us or we
				670	* may use ptrace attach to the process and find out that
				671	* information.
				672	*/
				673	task = get_proc_task(inode);
				674	if (task) {
				675	allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
				676	put_task_struct(task);
				677	}
				678	return allowed;
				679	}
				680
				681	int proc_setattr(struct dentry dentry, struct iattr attr)
				682	{
				683	int error;
				684	struct inode *inode = d_inode(dentry);
				685
				686	if (attr->ia_valid & ATTR_MODE)
				687	return -EPERM;
				688
				689	error = setattr_prepare(dentry, attr);
				690	if (error)
				691	return error;
				692
				693	setattr_copy(inode, attr);
				694	mark_inode_dirty(inode);
				695	return 0;
				696	}
				697
				698	/*
				699	* May current process learn task's sched/cmdline info (for hide_pid_min=1)
				700	* or euid/egid (for hide_pid_min=2)?
				701	*/
				702	static bool has_pid_permissions(struct pid_namespace *pid,
				703	struct task_struct *task,
				704	int hide_pid_min)
				705	{
				706	if (pid->hide_pid < hide_pid_min)
				707	return true;
				708	if (in_group_p(pid->pid_gid))
				709	return true;
				710	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
				711	}
				712
				713
				714	static int proc_pid_permission(struct inode *inode, int mask)
				715	{
				716	struct pid_namespace *pid = inode->i_sb->s_fs_info;
				717	struct task_struct *task;
				718	bool has_perms;
				719
				720	task = get_proc_task(inode);
				721	if (!task)
				722	return -ESRCH;
				723	has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
				724	put_task_struct(task);
				725
				726	if (!has_perms) {
				727	if (pid->hide_pid == HIDEPID_INVISIBLE) {
				728	/*
				729	* Let's make getdents(), stat(), and open()
				730	* consistent with each other. If a process
				731	* may not stat() a file, it shouldn't be seen
				732	* in procfs at all.
				733	*/
				734	return -ENOENT;
				735	}
				736
				737	return -EPERM;
				738	}
				739	return generic_permission(inode, mask);
				740	}
				741
				742
				743
				744	static const struct inode_operations proc_def_inode_operations = {
				745	.setattr = proc_setattr,
				746	};
				747
				748	static int proc_single_show(struct seq_file m, void v)
				749	{
				750	struct inode *inode = m->private;
				751	struct pid_namespace *ns;
				752	struct pid *pid;
				753	struct task_struct *task;
				754	int ret;
				755
				756	ns = inode->i_sb->s_fs_info;
				757	pid = proc_pid(inode);
				758	task = get_pid_task(pid, PIDTYPE_PID);
				759	if (!task)
				760	return -ESRCH;
				761
				762	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
				763
				764	put_task_struct(task);
				765	return ret;
				766	}
				767
				768	static int proc_single_open(struct inode inode, struct file filp)
				769	{
				770	return single_open(filp, proc_single_show, inode);
				771	}
				772
				773	static const struct file_operations proc_single_file_operations = {
				774	.open = proc_single_open,
				775	.read = seq_read,
				776	.llseek = seq_lseek,
				777	.release = single_release,
				778	};
				779
				780
				781	struct mm_struct proc_mem_open(struct inode inode, unsigned int mode)
				782	{
				783	struct task_struct *task = get_proc_task(inode);
				784	struct mm_struct *mm = ERR_PTR(-ESRCH);
				785
				786	if (task) {
				787	mm = mm_access(task, mode \| PTRACE_MODE_FSCREDS);
				788	put_task_struct(task);
				789
				790	if (!IS_ERR_OR_NULL(mm)) {
				791	/* ensure this mm_struct can't be freed */
				792	mmgrab(mm);
				793	/* but do not pin its memory */
				794	mmput(mm);
				795	}
				796	}
				797
				798	return mm;
				799	}
				800
				801	static int __mem_open(struct inode inode, struct file file, unsigned int mode)
				802	{
				803	struct mm_struct *mm = proc_mem_open(inode, mode);
				804
				805	if (IS_ERR(mm))
				806	return PTR_ERR(mm);
				807
				808	file->private_data = mm;
				809	return 0;
				810	}
				811
				812	static int mem_open(struct inode inode, struct file file)
				813	{
				814	int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
				815
				816	/* OK to pass negative loff_t, we can catch out-of-range */
				817	file->f_mode \|= FMODE_UNSIGNED_OFFSET;
				818
				819	return ret;
				820	}
				821
				822	static ssize_t mem_rw(struct file file, char __user buf,
				823	size_t count, loff_t *ppos, int write)
				824	{
				825	struct mm_struct *mm = file->private_data;
				826	unsigned long addr = *ppos;
				827	ssize_t copied;
				828	char *page;
				829	unsigned int flags;
				830
				831	if (!mm)
				832	return 0;
				833
				834	page = (char *)__get_free_page(GFP_KERNEL);
				835	if (!page)
				836	return -ENOMEM;
				837
				838	copied = 0;
				839	if (!mmget_not_zero(mm))
				840	goto free;
				841
				842	flags = FOLL_FORCE \| (write ? FOLL_WRITE : 0);
				843
				844	while (count > 0) {
				845	int this_len = min_t(int, count, PAGE_SIZE);
				846
				847	if (write && copy_from_user(page, buf, this_len)) {
				848	copied = -EFAULT;
				849	break;
				850	}
				851
				852	this_len = access_remote_vm(mm, addr, page, this_len, flags);
				853	if (!this_len) {
				854	if (!copied)
				855	copied = -EIO;
				856	break;
				857	}
				858
				859	if (!write && copy_to_user(buf, page, this_len)) {
				860	copied = -EFAULT;
				861	break;
				862	}
				863
				864	buf += this_len;
				865	addr += this_len;
				866	copied += this_len;
				867	count -= this_len;
				868	}
				869	*ppos = addr;
				870
				871	mmput(mm);
				872	free:
				873	free_page((unsigned long) page);
				874	return copied;
				875	}
				876
				877	static ssize_t mem_read(struct file file, char __user buf,
				878	size_t count, loff_t *ppos)
				879	{
				880	return mem_rw(file, buf, count, ppos, 0);
				881	}
				882
				883	static ssize_t mem_write(struct file file, const char __user buf,
				884	size_t count, loff_t *ppos)
				885	{
				886	return mem_rw(file, (char __user*)buf, count, ppos, 1);
				887	}
				888
				889	loff_t mem_lseek(struct file *file, loff_t offset, int orig)
				890	{
				891	switch (orig) {
				892	case 0:
				893	file->f_pos = offset;
				894	break;
				895	case 1:
				896	file->f_pos += offset;
				897	break;
				898	default:
				899	return -EINVAL;
				900	}
				901	force_successful_syscall_return();
				902	return file->f_pos;
				903	}
				904
				905	static int mem_release(struct inode inode, struct file file)
				906	{
				907	struct mm_struct *mm = file->private_data;
				908	if (mm)
				909	mmdrop(mm);
				910	return 0;
				911	}
				912
				913	static const struct file_operations proc_mem_operations = {
				914	.llseek = mem_lseek,
				915	.read = mem_read,
				916	.write = mem_write,
				917	.open = mem_open,
				918	.release = mem_release,
				919	};
				920
				921	static int environ_open(struct inode inode, struct file file)
				922	{
				923	return __mem_open(inode, file, PTRACE_MODE_READ);
				924	}
				925
				926	static ssize_t environ_read(struct file file, char __user buf,
				927	size_t count, loff_t *ppos)
				928	{
				929	char *page;
				930	unsigned long src = *ppos;
				931	int ret = 0;
				932	struct mm_struct *mm = file->private_data;
				933	unsigned long env_start, env_end;
				934
				935	/* Ensure the process spawned far enough to have an environment. */
				936	if (!mm \|\| !mm->env_end)
				937	return 0;
				938
				939	page = (char *)__get_free_page(GFP_KERNEL);
				940	if (!page)
				941	return -ENOMEM;
				942
				943	ret = 0;
				944	if (!mmget_not_zero(mm))
				945	goto free;
				946
				947	down_read(&mm->mmap_sem);
				948	env_start = mm->env_start;
				949	env_end = mm->env_end;
				950	up_read(&mm->mmap_sem);
				951
				952	while (count > 0) {
				953	size_t this_len, max_len;
				954	int retval;
				955
				956	if (src >= (env_end - env_start))
				957	break;
				958
				959	this_len = env_end - (env_start + src);
				960
				961	max_len = min_t(size_t, PAGE_SIZE, count);
				962	this_len = min(max_len, this_len);
				963
				964	retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
				965
				966	if (retval <= 0) {
				967	ret = retval;
				968	break;
				969	}
				970
				971	if (copy_to_user(buf, page, retval)) {
				972	ret = -EFAULT;
				973	break;
				974	}
				975
				976	ret += retval;
				977	src += retval;
				978	buf += retval;
				979	count -= retval;
				980	}
				981	*ppos = src;
				982	mmput(mm);
				983
				984	free:
				985	free_page((unsigned long) page);
				986	return ret;
				987	}
				988
				989	static const struct file_operations proc_environ_operations = {
				990	.open = environ_open,
				991	.read = environ_read,
				992	.llseek = generic_file_llseek,
				993	.release = mem_release,
				994	};
				995
				996	static int auxv_open(struct inode inode, struct file file)
				997	{
				998	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
				999	}
				1000
				1001	static ssize_t auxv_read(struct file file, char __user buf,
				1002	size_t count, loff_t *ppos)
				1003	{
				1004	struct mm_struct *mm = file->private_data;
				1005	unsigned int nwords = 0;
				1006
				1007	if (!mm)
				1008	return 0;
				1009	do {
				1010	nwords += 2;
				1011	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
				1012	return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
				1013	nwords * sizeof(mm->saved_auxv[0]));
				1014	}
				1015
				1016	static const struct file_operations proc_auxv_operations = {
				1017	.open = auxv_open,
				1018	.read = auxv_read,
				1019	.llseek = generic_file_llseek,
				1020	.release = mem_release,
				1021	};
				1022
				1023	static ssize_t oom_adj_read(struct file file, char __user buf, size_t count,
				1024	loff_t *ppos)
				1025	{
				1026	struct task_struct *task = get_proc_task(file_inode(file));
				1027	char buffer[PROC_NUMBUF];
				1028	int oom_adj = OOM_ADJUST_MIN;
				1029	size_t len;
				1030
				1031	if (!task)
				1032	return -ESRCH;
				1033	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
				1034	oom_adj = OOM_ADJUST_MAX;
				1035	else
				1036	oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
				1037	OOM_SCORE_ADJ_MAX;
				1038	put_task_struct(task);
				1039	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
				1040	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1041	}
				1042
				1043	static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
				1044	{
				1045	static DEFINE_MUTEX(oom_adj_mutex);
				1046	struct mm_struct *mm = NULL;
				1047	struct task_struct *task;
				1048	int err = 0;
				1049
				1050	task = get_proc_task(file_inode(file));
				1051	if (!task)
				1052	return -ESRCH;
				1053
				1054	mutex_lock(&oom_adj_mutex);
				1055	if (legacy) {
				1056	if (oom_adj < task->signal->oom_score_adj &&
				1057	!capable(CAP_SYS_RESOURCE)) {
				1058	err = -EACCES;
				1059	goto err_unlock;
				1060	}
				1061	/*
				1062	* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
				1063	* /proc/pid/oom_score_adj instead.
				1064	*/
				1065	pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
				1066	current->comm, task_pid_nr(current), task_pid_nr(task),
				1067	task_pid_nr(task));
				1068	} else {
				1069	if ((short)oom_adj < task->signal->oom_score_adj_min &&
				1070	!capable(CAP_SYS_RESOURCE)) {
				1071	err = -EACCES;
				1072	goto err_unlock;
				1073	}
				1074	}
				1075
				1076	/*
				1077	* Make sure we will check other processes sharing the mm if this is
				1078	* not vfrok which wants its own oom_score_adj.
				1079	* pin the mm so it doesn't go away and get reused after task_unlock
				1080	*/
				1081	if (!task->vfork_done) {
				1082	struct task_struct *p = find_lock_task_mm(task);
				1083
				1084	if (p) {
				1085	if (atomic_read(&p->mm->mm_users) > 1) {
				1086	mm = p->mm;
				1087	mmgrab(mm);
				1088	}
				1089	task_unlock(p);
				1090	}
				1091	}
				1092
				1093	task->signal->oom_score_adj = oom_adj;
				1094	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1095	task->signal->oom_score_adj_min = (short)oom_adj;
				1096	trace_oom_score_adj_update(task);
				1097
				1098	if (mm) {
				1099	struct task_struct *p;
				1100
				1101	rcu_read_lock();
				1102	for_each_process(p) {
				1103	if (same_thread_group(task, p))
				1104	continue;
				1105
				1106	/* do not touch kernel threads or the global init */
				1107	if (p->flags & PF_KTHREAD \|\| is_global_init(p))
				1108	continue;
				1109
				1110	task_lock(p);
				1111	if (!p->vfork_done && process_shares_mm(p, mm)) {
				1112	p->signal->oom_score_adj = oom_adj;
				1113	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1114	p->signal->oom_score_adj_min = (short)oom_adj;
				1115	}
				1116	task_unlock(p);
				1117	}
				1118	rcu_read_unlock();
				1119	mmdrop(mm);
				1120	}
				1121	err_unlock:
				1122	mutex_unlock(&oom_adj_mutex);
				1123	put_task_struct(task);
				1124	return err;
				1125	}
				1126
				1127	/*
				1128	* /proc/pid/oom_adj exists solely for backwards compatibility with previous
				1129	* kernels. The effective policy is defined by oom_score_adj, which has a
				1130	* different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
				1131	* Values written to oom_adj are simply mapped linearly to oom_score_adj.
				1132	* Processes that become oom disabled via oom_adj will still be oom disabled
				1133	* with this implementation.
				1134	*
				1135	* oom_adj cannot be removed since existing userspace binaries use it.
				1136	*/
				1137	static ssize_t oom_adj_write(struct file file, const char __user buf,
				1138	size_t count, loff_t *ppos)
				1139	{
				1140	char buffer[PROC_NUMBUF];
				1141	int oom_adj;
				1142	int err;
				1143
				1144	memset(buffer, 0, sizeof(buffer));
				1145	if (count > sizeof(buffer) - 1)
				1146	count = sizeof(buffer) - 1;
				1147	if (copy_from_user(buffer, buf, count)) {
				1148	err = -EFAULT;
				1149	goto out;
				1150	}
				1151
				1152	err = kstrtoint(strstrip(buffer), 0, &oom_adj);
				1153	if (err)
				1154	goto out;
				1155	if ((oom_adj < OOM_ADJUST_MIN \|\| oom_adj > OOM_ADJUST_MAX) &&
				1156	oom_adj != OOM_DISABLE) {
				1157	err = -EINVAL;
				1158	goto out;
				1159	}
				1160
				1161	/*
				1162	* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
				1163	* value is always attainable.
				1164	*/
				1165	if (oom_adj == OOM_ADJUST_MAX)
				1166	oom_adj = OOM_SCORE_ADJ_MAX;
				1167	else
				1168	oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
				1169
				1170	err = __set_oom_adj(file, oom_adj, true);
				1171	out:
				1172	return err < 0 ? err : count;
				1173	}
				1174
				1175	static const struct file_operations proc_oom_adj_operations = {
				1176	.read = oom_adj_read,
				1177	.write = oom_adj_write,
				1178	.llseek = generic_file_llseek,
				1179	};
				1180
				1181	static ssize_t oom_score_adj_read(struct file file, char __user buf,
				1182	size_t count, loff_t *ppos)
				1183	{
				1184	struct task_struct *task = get_proc_task(file_inode(file));
				1185	char buffer[PROC_NUMBUF];
				1186	short oom_score_adj = OOM_SCORE_ADJ_MIN;
				1187	size_t len;
				1188
				1189	if (!task)
				1190	return -ESRCH;
				1191	oom_score_adj = task->signal->oom_score_adj;
				1192	put_task_struct(task);
				1193	len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
				1194	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1195	}
				1196
				1197	static ssize_t oom_score_adj_write(struct file file, const char __user buf,
				1198	size_t count, loff_t *ppos)
				1199	{
				1200	char buffer[PROC_NUMBUF];
				1201	int oom_score_adj;
				1202	int err;
				1203
				1204	memset(buffer, 0, sizeof(buffer));
				1205	if (count > sizeof(buffer) - 1)
				1206	count = sizeof(buffer) - 1;
				1207	if (copy_from_user(buffer, buf, count)) {
				1208	err = -EFAULT;
				1209	goto out;
				1210	}
				1211
				1212	err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
				1213	if (err)
				1214	goto out;
				1215	if (oom_score_adj < OOM_SCORE_ADJ_MIN \|\|
				1216	oom_score_adj > OOM_SCORE_ADJ_MAX) {
				1217	err = -EINVAL;
				1218	goto out;
				1219	}
				1220
				1221	err = __set_oom_adj(file, oom_score_adj, false);
				1222	out:
				1223	return err < 0 ? err : count;
				1224	}
				1225
				1226	static const struct file_operations proc_oom_score_adj_operations = {
				1227	.read = oom_score_adj_read,
				1228	.write = oom_score_adj_write,
				1229	.llseek = default_llseek,
				1230	};
				1231
				1232	#ifdef CONFIG_AUDITSYSCALL
				1233	#define TMPBUFLEN 11
				1234	static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
				1235	size_t count, loff_t *ppos)
				1236	{
				1237	struct inode * inode = file_inode(file);
				1238	struct task_struct *task = get_proc_task(inode);
				1239	ssize_t length;
				1240	char tmpbuf[TMPBUFLEN];
				1241
				1242	if (!task)
				1243	return -ESRCH;
				1244	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1245	from_kuid(file->f_cred->user_ns,
				1246	audit_get_loginuid(task)));
				1247	put_task_struct(task);
				1248	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1249	}
				1250
				1251	static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
				1252	size_t count, loff_t *ppos)
				1253	{
				1254	struct inode * inode = file_inode(file);
				1255	uid_t loginuid;
				1256	kuid_t kloginuid;
				1257	int rv;
				1258
				1259	rcu_read_lock();
				1260	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
				1261	rcu_read_unlock();
				1262	return -EPERM;
				1263	}
				1264	rcu_read_unlock();
				1265
				1266	if (*ppos != 0) {
				1267	/* No partial writes. */
				1268	return -EINVAL;
				1269	}
				1270
				1271	rv = kstrtou32_from_user(buf, count, 10, &loginuid);
				1272	if (rv < 0)
				1273	return rv;
				1274
				1275	/* is userspace tring to explicitly UNSET the loginuid? */
				1276	if (loginuid == AUDIT_UID_UNSET) {
				1277	kloginuid = INVALID_UID;
				1278	} else {
				1279	kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
				1280	if (!uid_valid(kloginuid))
				1281	return -EINVAL;
				1282	}
				1283
				1284	rv = audit_set_loginuid(kloginuid);
				1285	if (rv < 0)
				1286	return rv;
				1287	return count;
				1288	}
				1289
				1290	static const struct file_operations proc_loginuid_operations = {
				1291	.read = proc_loginuid_read,
				1292	.write = proc_loginuid_write,
				1293	.llseek = generic_file_llseek,
				1294	};
				1295
				1296	static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
				1297	size_t count, loff_t *ppos)
				1298	{
				1299	struct inode * inode = file_inode(file);
				1300	struct task_struct *task = get_proc_task(inode);
				1301	ssize_t length;
				1302	char tmpbuf[TMPBUFLEN];
				1303
				1304	if (!task)
				1305	return -ESRCH;
				1306	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1307	audit_get_sessionid(task));
				1308	put_task_struct(task);
				1309	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1310	}
				1311
				1312	static const struct file_operations proc_sessionid_operations = {
				1313	.read = proc_sessionid_read,
				1314	.llseek = generic_file_llseek,
				1315	};
				1316	#endif
				1317
				1318	#ifdef CONFIG_FAULT_INJECTION
				1319	static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
				1320	size_t count, loff_t *ppos)
				1321	{
				1322	struct task_struct *task = get_proc_task(file_inode(file));
				1323	char buffer[PROC_NUMBUF];
				1324	size_t len;
				1325	int make_it_fail;
				1326
				1327	if (!task)
				1328	return -ESRCH;
				1329	make_it_fail = task->make_it_fail;
				1330	put_task_struct(task);
				1331
				1332	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
				1333
				1334	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1335	}
				1336
				1337	static ssize_t proc_fault_inject_write(struct file * file,
				1338	const char __user * buf, size_t count, loff_t *ppos)
				1339	{
				1340	struct task_struct *task;
				1341	char buffer[PROC_NUMBUF];
				1342	int make_it_fail;
				1343	int rv;
				1344
				1345	if (!capable(CAP_SYS_RESOURCE))
				1346	return -EPERM;
				1347	memset(buffer, 0, sizeof(buffer));
				1348	if (count > sizeof(buffer) - 1)
				1349	count = sizeof(buffer) - 1;
				1350	if (copy_from_user(buffer, buf, count))
				1351	return -EFAULT;
				1352	rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
				1353	if (rv < 0)
				1354	return rv;
				1355	if (make_it_fail < 0 \|\| make_it_fail > 1)
				1356	return -EINVAL;
				1357
				1358	task = get_proc_task(file_inode(file));
				1359	if (!task)
				1360	return -ESRCH;
				1361	task->make_it_fail = make_it_fail;
				1362	put_task_struct(task);
				1363
				1364	return count;
				1365	}
				1366
				1367	static const struct file_operations proc_fault_inject_operations = {
				1368	.read = proc_fault_inject_read,
				1369	.write = proc_fault_inject_write,
				1370	.llseek = generic_file_llseek,
				1371	};
				1372
				1373	static ssize_t proc_fail_nth_write(struct file file, const char __user buf,
				1374	size_t count, loff_t *ppos)
				1375	{
				1376	struct task_struct *task;
				1377	int err;
				1378	unsigned int n;
				1379
				1380	err = kstrtouint_from_user(buf, count, 0, &n);
				1381	if (err)
				1382	return err;
				1383
				1384	task = get_proc_task(file_inode(file));
				1385	if (!task)
				1386	return -ESRCH;
				1387	WRITE_ONCE(task->fail_nth, n);
				1388	put_task_struct(task);
				1389
				1390	return count;
				1391	}
				1392
				1393	static ssize_t proc_fail_nth_read(struct file file, char __user buf,
				1394	size_t count, loff_t *ppos)
				1395	{
				1396	struct task_struct *task;
				1397	char numbuf[PROC_NUMBUF];
				1398	ssize_t len;
				1399
				1400	task = get_proc_task(file_inode(file));
				1401	if (!task)
				1402	return -ESRCH;
				1403	len = snprintf(numbuf, sizeof(numbuf), "%u\n",
				1404	READ_ONCE(task->fail_nth));
				1405	len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
				1406	put_task_struct(task);
				1407
				1408	return len;
				1409	}
				1410
				1411	static const struct file_operations proc_fail_nth_operations = {
				1412	.read = proc_fail_nth_read,
				1413	.write = proc_fail_nth_write,
				1414	};
				1415	#endif
				1416
				1417
				1418	#ifdef CONFIG_SCHED_DEBUG
				1419	/*
				1420	* Print out various scheduling related per-task fields:
				1421	*/
				1422	static int sched_show(struct seq_file m, void v)
				1423	{
				1424	struct inode *inode = m->private;
				1425	struct pid_namespace *ns = inode->i_sb->s_fs_info;
				1426	struct task_struct *p;
				1427
				1428	p = get_proc_task(inode);
				1429	if (!p)
				1430	return -ESRCH;
				1431	proc_sched_show_task(p, ns, m);
				1432
				1433	put_task_struct(p);
				1434
				1435	return 0;
				1436	}
				1437
				1438	static ssize_t
				1439	sched_write(struct file file, const char __user buf,
				1440	size_t count, loff_t *offset)
				1441	{
				1442	struct inode *inode = file_inode(file);
				1443	struct task_struct *p;
				1444
				1445	p = get_proc_task(inode);
				1446	if (!p)
				1447	return -ESRCH;
				1448	proc_sched_set_task(p);
				1449
				1450	put_task_struct(p);
				1451
				1452	return count;
				1453	}
				1454
				1455	static int sched_open(struct inode inode, struct file filp)
				1456	{
				1457	return single_open(filp, sched_show, inode);
				1458	}
				1459
				1460	static const struct file_operations proc_pid_sched_operations = {
				1461	.open = sched_open,
				1462	.read = seq_read,
				1463	.write = sched_write,
				1464	.llseek = seq_lseek,
				1465	.release = single_release,
				1466	};
				1467
				1468	#endif
				1469
				1470	#ifdef CONFIG_SCHED_AUTOGROUP
				1471	/*
				1472	* Print out autogroup related information:
				1473	*/
				1474	static int sched_autogroup_show(struct seq_file m, void v)
				1475	{
				1476	struct inode *inode = m->private;
				1477	struct task_struct *p;
				1478
				1479	p = get_proc_task(inode);
				1480	if (!p)
				1481	return -ESRCH;
				1482	proc_sched_autogroup_show_task(p, m);
				1483
				1484	put_task_struct(p);
				1485
				1486	return 0;
				1487	}
				1488
				1489	static ssize_t
				1490	sched_autogroup_write(struct file file, const char __user buf,
				1491	size_t count, loff_t *offset)
				1492	{
				1493	struct inode *inode = file_inode(file);
				1494	struct task_struct *p;
				1495	char buffer[PROC_NUMBUF];
				1496	int nice;
				1497	int err;
				1498
				1499	memset(buffer, 0, sizeof(buffer));
				1500	if (count > sizeof(buffer) - 1)
				1501	count = sizeof(buffer) - 1;
				1502	if (copy_from_user(buffer, buf, count))
				1503	return -EFAULT;
				1504
				1505	err = kstrtoint(strstrip(buffer), 0, &nice);
				1506	if (err < 0)
				1507	return err;
				1508
				1509	p = get_proc_task(inode);
				1510	if (!p)
				1511	return -ESRCH;
				1512
				1513	err = proc_sched_autogroup_set_nice(p, nice);
				1514	if (err)
				1515	count = err;
				1516
				1517	put_task_struct(p);
				1518
				1519	return count;
				1520	}
				1521
				1522	static int sched_autogroup_open(struct inode inode, struct file filp)
				1523	{
				1524	int ret;
				1525
				1526	ret = single_open(filp, sched_autogroup_show, NULL);
				1527	if (!ret) {
				1528	struct seq_file *m = filp->private_data;
				1529
				1530	m->private = inode;
				1531	}
				1532	return ret;
				1533	}
				1534
				1535	static const struct file_operations proc_pid_sched_autogroup_operations = {
				1536	.open = sched_autogroup_open,
				1537	.read = seq_read,
				1538	.write = sched_autogroup_write,
				1539	.llseek = seq_lseek,
				1540	.release = single_release,
				1541	};
				1542
				1543	#endif /* CONFIG_SCHED_AUTOGROUP */
				1544
				1545	static ssize_t comm_write(struct file file, const char __user buf,
				1546	size_t count, loff_t *offset)
				1547	{
				1548	struct inode *inode = file_inode(file);
				1549	struct task_struct *p;
				1550	char buffer[TASK_COMM_LEN];
				1551	const size_t maxlen = sizeof(buffer) - 1;
				1552
				1553	memset(buffer, 0, sizeof(buffer));
				1554	if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
				1555	return -EFAULT;
				1556
				1557	p = get_proc_task(inode);
				1558	if (!p)
				1559	return -ESRCH;
				1560
				1561	if (same_thread_group(current, p))
				1562	set_task_comm(p, buffer);
				1563	else
				1564	count = -EINVAL;
				1565
				1566	put_task_struct(p);
				1567
				1568	return count;
				1569	}
				1570
				1571	static int comm_show(struct seq_file m, void v)
				1572	{
				1573	struct inode *inode = m->private;
				1574	struct task_struct *p;
				1575
				1576	p = get_proc_task(inode);
				1577	if (!p)
				1578	return -ESRCH;
				1579
				1580	task_lock(p);
				1581	seq_printf(m, "%s\n", p->comm);
				1582	task_unlock(p);
				1583
				1584	put_task_struct(p);
				1585
				1586	return 0;
				1587	}
				1588
				1589	static int comm_open(struct inode inode, struct file filp)
				1590	{
				1591	return single_open(filp, comm_show, inode);
				1592	}
				1593
				1594	static const struct file_operations proc_pid_set_comm_operations = {
				1595	.open = comm_open,
				1596	.read = seq_read,
				1597	.write = comm_write,
				1598	.llseek = seq_lseek,
				1599	.release = single_release,
				1600	};
				1601
				1602	static int proc_exe_link(struct dentry dentry, struct path exe_path)
				1603	{
				1604	struct task_struct *task;
				1605	struct file *exe_file;
				1606
				1607	task = get_proc_task(d_inode(dentry));
				1608	if (!task)
				1609	return -ENOENT;
				1610	exe_file = get_task_exe_file(task);
				1611	put_task_struct(task);
				1612	if (exe_file) {
				1613	*exe_path = exe_file->f_path;
				1614	path_get(&exe_file->f_path);
				1615	fput(exe_file);
				1616	return 0;
				1617	} else
				1618	return -ENOENT;
				1619	}
				1620
				1621	static const char proc_pid_get_link(struct dentry dentry,
				1622	struct inode *inode,
				1623	struct delayed_call *done)
				1624	{
				1625	struct path path;
				1626	int error = -EACCES;
				1627
				1628	if (!dentry)
				1629	return ERR_PTR(-ECHILD);
				1630
				1631	/* Are we allowed to snoop on the tasks file descriptors? */
				1632	if (!proc_fd_access_allowed(inode))
				1633	goto out;
				1634
				1635	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1636	if (error)
				1637	goto out;
				1638
				1639	nd_jump_link(&path);
				1640	return NULL;
				1641	out:
				1642	return ERR_PTR(error);
				1643	}
				1644
				1645	static int do_proc_readlink(struct path path, char __user buffer, int buflen)
				1646	{
				1647	char tmp = (char )__get_free_page(GFP_KERNEL);
				1648	char *pathname;
				1649	int len;
				1650
				1651	if (!tmp)
				1652	return -ENOMEM;
				1653
				1654	pathname = d_path(path, tmp, PAGE_SIZE);
				1655	len = PTR_ERR(pathname);
				1656	if (IS_ERR(pathname))
				1657	goto out;
				1658	len = tmp + PAGE_SIZE - 1 - pathname;
				1659
				1660	if (len > buflen)
				1661	len = buflen;
				1662	if (copy_to_user(buffer, pathname, len))
				1663	len = -EFAULT;
				1664	out:
				1665	free_page((unsigned long)tmp);
				1666	return len;
				1667	}
				1668
				1669	static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
				1670	{
				1671	int error = -EACCES;
				1672	struct inode *inode = d_inode(dentry);
				1673	struct path path;
				1674
				1675	/* Are we allowed to snoop on the tasks file descriptors? */
				1676	if (!proc_fd_access_allowed(inode))
				1677	goto out;
				1678
				1679	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1680	if (error)
				1681	goto out;
				1682
				1683	error = do_proc_readlink(&path, buffer, buflen);
				1684	path_put(&path);
				1685	out:
				1686	return error;
				1687	}
				1688
				1689	const struct inode_operations proc_pid_link_inode_operations = {
				1690	.readlink = proc_pid_readlink,
				1691	.get_link = proc_pid_get_link,
				1692	.setattr = proc_setattr,
				1693	};
				1694
				1695
				1696	/* building an inode */
				1697
				1698	void task_dump_owner(struct task_struct *task, mode_t mode,
				1699	kuid_t ruid, kgid_t rgid)
				1700	{
				1701	/* Depending on the state of dumpable compute who should own a
				1702	* proc file for a task.
				1703	*/
				1704	const struct cred *cred;
				1705	kuid_t uid;
				1706	kgid_t gid;
				1707
				1708	if (unlikely(task->flags & PF_KTHREAD)) {
				1709	*ruid = GLOBAL_ROOT_UID;
				1710	*rgid = GLOBAL_ROOT_GID;
				1711	return;
				1712	}
				1713
				1714	/* Default to the tasks effective ownership */
				1715	rcu_read_lock();
				1716	cred = __task_cred(task);
				1717	uid = cred->euid;
				1718	gid = cred->egid;
				1719	rcu_read_unlock();
				1720
				1721	/*
				1722	* Before the /proc/pid/status file was created the only way to read
				1723	* the effective uid of a /process was to stat /proc/pid. Reading
				1724	* /proc/pid/status is slow enough that procps and other packages
				1725	* kept stating /proc/pid. To keep the rules in /proc simple I have
				1726	* made this apply to all per process world readable and executable
				1727	* directories.
				1728	*/
				1729	if (mode != (S_IFDIR\|S_IRUGO\|S_IXUGO)) {
				1730	struct mm_struct *mm;
				1731	task_lock(task);
				1732	mm = task->mm;
				1733	/* Make non-dumpable tasks owned by some root */
				1734	if (mm) {
				1735	if (get_dumpable(mm) != SUID_DUMP_USER) {
				1736	struct user_namespace *user_ns = mm->user_ns;
				1737
				1738	uid = make_kuid(user_ns, 0);
				1739	if (!uid_valid(uid))
				1740	uid = GLOBAL_ROOT_UID;
				1741
				1742	gid = make_kgid(user_ns, 0);
				1743	if (!gid_valid(gid))
				1744	gid = GLOBAL_ROOT_GID;
				1745	}
				1746	} else {
				1747	uid = GLOBAL_ROOT_UID;
				1748	gid = GLOBAL_ROOT_GID;
				1749	}
				1750	task_unlock(task);
				1751	}
				1752	*ruid = uid;
				1753	*rgid = gid;
				1754	}
				1755
				1756	struct inode proc_pid_make_inode(struct super_block sb,
				1757	struct task_struct *task, umode_t mode)
				1758	{
				1759	struct inode * inode;
				1760	struct proc_inode *ei;
				1761
				1762	/* We need a new inode */
				1763
				1764	inode = new_inode(sb);
				1765	if (!inode)
				1766	goto out;
				1767
				1768	/* Common stuff */
				1769	ei = PROC_I(inode);
				1770	inode->i_mode = mode;
				1771	inode->i_ino = get_next_ino();
				1772	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
				1773	inode->i_op = &proc_def_inode_operations;
				1774
				1775	/*
				1776	* grab the reference to task.
				1777	*/
				1778	ei->pid = get_task_pid(task, PIDTYPE_PID);
				1779	if (!ei->pid)
				1780	goto out_unlock;
				1781
				1782	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
				1783	security_task_to_inode(task, inode);
				1784
				1785	out:
				1786	return inode;
				1787
				1788	out_unlock:
				1789	iput(inode);
				1790	return NULL;
				1791	}
				1792
				1793	int pid_getattr(const struct path path, struct kstat stat,
				1794	u32 request_mask, unsigned int query_flags)
				1795	{
				1796	struct inode *inode = d_inode(path->dentry);
				1797	struct task_struct *task;
				1798	struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
				1799
				1800	generic_fillattr(inode, stat);
				1801
				1802	rcu_read_lock();
				1803	stat->uid = GLOBAL_ROOT_UID;
				1804	stat->gid = GLOBAL_ROOT_GID;
				1805	task = pid_task(proc_pid(inode), PIDTYPE_PID);
				1806	if (task) {
				1807	if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
				1808	rcu_read_unlock();
				1809	/*
				1810	* This doesn't prevent learning whether PID exists,
				1811	* it only makes getattr() consistent with readdir().
				1812	*/
				1813	return -ENOENT;
				1814	}
				1815	task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
				1816	}
				1817	rcu_read_unlock();
				1818	return 0;
				1819	}
				1820
				1821	/* dentry stuff */
				1822
				1823	/*
				1824	* Exceptional case: normally we are not allowed to unhash a busy
				1825	* directory. In this case, however, we can do it - no aliasing problems
				1826	* due to the way we treat inodes.
				1827	*
				1828	* Rewrite the inode's ownerships here because the owning task may have
				1829	* performed a setuid(), etc.
				1830	*
				1831	*/
				1832	int pid_revalidate(struct dentry *dentry, unsigned int flags)
				1833	{
				1834	struct inode *inode;
				1835	struct task_struct *task;
				1836
				1837	if (flags & LOOKUP_RCU)
				1838	return -ECHILD;
				1839
				1840	inode = d_inode(dentry);
				1841	task = get_proc_task(inode);
				1842
				1843	if (task) {
				1844	task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
				1845
				1846	inode->i_mode &= ~(S_ISUID \| S_ISGID);
				1847	security_task_to_inode(task, inode);
				1848	put_task_struct(task);
				1849	return 1;
				1850	}
				1851	return 0;
				1852	}
				1853
				1854	static inline bool proc_inode_is_dead(struct inode *inode)
				1855	{
				1856	return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
				1857	}
				1858
				1859	int pid_delete_dentry(const struct dentry *dentry)
				1860	{
				1861	/* Is the task we represent dead?
				1862	* If so, then don't put the dentry on the lru list,
				1863	* kill it immediately.
				1864	*/
				1865	return proc_inode_is_dead(d_inode(dentry));
				1866	}
				1867
				1868	const struct dentry_operations pid_dentry_operations =
				1869	{
				1870	.d_revalidate = pid_revalidate,
				1871	.d_delete = pid_delete_dentry,
				1872	};
				1873
				1874	/* Lookups */
				1875
				1876	/*
				1877	* Fill a directory entry.
				1878	*
				1879	* If possible create the dcache entry and derive our inode number and
				1880	* file type from dcache entry.
				1881	*
				1882	* Since all of the proc inode numbers are dynamically generated, the inode
				1883	* numbers do not exist until the inode is cache. This means creating the
				1884	* the dcache entry in readdir is necessary to keep the inode numbers
				1885	* reported by readdir in sync with the inode numbers reported
				1886	* by stat.
				1887	*/
				1888	bool proc_fill_cache(struct file file, struct dir_context ctx,
				1889	const char *name, int len,
				1890	instantiate_t instantiate, struct task_struct task, const void ptr)
				1891	{
				1892	struct dentry child, dir = file->f_path.dentry;
				1893	struct qstr qname = QSTR_INIT(name, len);
				1894	struct inode *inode;
				1895	unsigned type;
				1896	ino_t ino;
				1897
				1898	child = d_hash_and_lookup(dir, &qname);
				1899	if (!child) {
				1900	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
				1901	child = d_alloc_parallel(dir, &qname, &wq);
				1902	if (IS_ERR(child))
				1903	goto end_instantiate;
				1904	if (d_in_lookup(child)) {
				1905	int err = instantiate(d_inode(dir), child, task, ptr);
				1906	d_lookup_done(child);
				1907	if (err < 0) {
				1908	dput(child);
				1909	goto end_instantiate;
				1910	}
				1911	}
				1912	}
				1913	inode = d_inode(child);
				1914	ino = inode->i_ino;
				1915	type = inode->i_mode >> 12;
				1916	dput(child);
				1917	return dir_emit(ctx, name, len, ino, type);
				1918
				1919	end_instantiate:
				1920	return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
				1921	}
				1922
				1923	/*
				1924	* dname_to_vma_addr - maps a dentry name into two unsigned longs
				1925	* which represent vma start and end addresses.
				1926	*/
				1927	static int dname_to_vma_addr(struct dentry *dentry,
				1928	unsigned long start, unsigned long end)
				1929	{
				1930	const char *str = dentry->d_name.name;
				1931	unsigned long long sval, eval;
				1932	unsigned int len;
				1933
				1934	len = _parse_integer(str, 16, &sval);
				1935	if (len & KSTRTOX_OVERFLOW)
				1936	return -EINVAL;
				1937	if (sval != (unsigned long)sval)
				1938	return -EINVAL;
				1939	str += len;
				1940
				1941	if (*str != '-')
				1942	return -EINVAL;
				1943	str++;
				1944
				1945	len = _parse_integer(str, 16, &eval);
				1946	if (len & KSTRTOX_OVERFLOW)
				1947	return -EINVAL;
				1948	if (eval != (unsigned long)eval)
				1949	return -EINVAL;
				1950	str += len;
				1951
				1952	if (*str != '\0')
				1953	return -EINVAL;
				1954
				1955	*start = sval;
				1956	*end = eval;
				1957
				1958	return 0;
				1959	}
				1960
				1961	static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
				1962	{
				1963	unsigned long vm_start, vm_end;
				1964	bool exact_vma_exists = false;
				1965	struct mm_struct *mm = NULL;
				1966	struct task_struct *task;
				1967	struct inode *inode;
				1968	int status = 0;
				1969
				1970	if (flags & LOOKUP_RCU)
				1971	return -ECHILD;
				1972
				1973	inode = d_inode(dentry);
				1974	task = get_proc_task(inode);
				1975	if (!task)
				1976	goto out_notask;
				1977
				1978	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
				1979	if (IS_ERR_OR_NULL(mm))
				1980	goto out;
				1981
				1982	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
				1983	down_read(&mm->mmap_sem);
				1984	exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
				1985	up_read(&mm->mmap_sem);
				1986	}
				1987
				1988	mmput(mm);
				1989
				1990	if (exact_vma_exists) {
				1991	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
				1992
				1993	security_task_to_inode(task, inode);
				1994	status = 1;
				1995	}
				1996
				1997	out:
				1998	put_task_struct(task);
				1999
				2000	out_notask:
				2001	return status;
				2002	}
				2003
				2004	static const struct dentry_operations tid_map_files_dentry_operations = {
				2005	.d_revalidate = map_files_d_revalidate,
				2006	.d_delete = pid_delete_dentry,
				2007	};
				2008
				2009	static int map_files_get_link(struct dentry dentry, struct path path)
				2010	{
				2011	unsigned long vm_start, vm_end;
				2012	struct vm_area_struct *vma;
				2013	struct task_struct *task;
				2014	struct mm_struct *mm;
				2015	int rc;
				2016
				2017	rc = -ENOENT;
				2018	task = get_proc_task(d_inode(dentry));
				2019	if (!task)
				2020	goto out;
				2021
				2022	mm = get_task_mm(task);
				2023	put_task_struct(task);
				2024	if (!mm)
				2025	goto out;
				2026
				2027	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
				2028	if (rc)
				2029	goto out_mmput;
				2030
				2031	rc = -ENOENT;
				2032	down_read(&mm->mmap_sem);
				2033	vma = find_exact_vma(mm, vm_start, vm_end);
				2034	if (vma && vma->vm_file) {
				2035	*path = vma->vm_file->f_path;
				2036	path_get(path);
				2037	rc = 0;
				2038	}
				2039	up_read(&mm->mmap_sem);
				2040
				2041	out_mmput:
				2042	mmput(mm);
				2043	out:
				2044	return rc;
				2045	}
				2046
				2047	struct map_files_info {
				2048	fmode_t mode;
				2049	unsigned int len;
				2050	unsigned char name[4sizeof(long)+2]; / max: %lx-%lx\0 */
				2051	};
				2052
				2053	/*
				2054	* Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
				2055	* symlinks may be used to bypass permissions on ancestor directories in the
				2056	* path to the file in question.
				2057	*/
				2058	static const char *
				2059	proc_map_files_get_link(struct dentry *dentry,
				2060	struct inode *inode,
				2061	struct delayed_call *done)
				2062	{
				2063	if (!capable(CAP_SYS_ADMIN))
				2064	return ERR_PTR(-EPERM);
				2065
				2066	return proc_pid_get_link(dentry, inode, done);
				2067	}
				2068
				2069	/*
				2070	* Identical to proc_pid_link_inode_operations except for get_link()
				2071	*/
				2072	static const struct inode_operations proc_map_files_link_inode_operations = {
				2073	.readlink = proc_pid_readlink,
				2074	.get_link = proc_map_files_get_link,
				2075	.setattr = proc_setattr,
				2076	};
				2077
				2078	static int
				2079	proc_map_files_instantiate(struct inode dir, struct dentry dentry,
				2080	struct task_struct task, const void ptr)
				2081	{
				2082	fmode_t mode = (fmode_t)(unsigned long)ptr;
				2083	struct proc_inode *ei;
				2084	struct inode *inode;
				2085
				2086	inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK \|
				2087	((mode & FMODE_READ ) ? S_IRUSR : 0) \|
				2088	((mode & FMODE_WRITE) ? S_IWUSR : 0));
				2089	if (!inode)
				2090	return -ENOENT;
				2091
				2092	ei = PROC_I(inode);
				2093	ei->op.proc_get_link = map_files_get_link;
				2094
				2095	inode->i_op = &proc_map_files_link_inode_operations;
				2096	inode->i_size = 64;
				2097
				2098	d_set_d_op(dentry, &tid_map_files_dentry_operations);
				2099	d_add(dentry, inode);
				2100
				2101	return 0;
				2102	}
				2103
				2104	static struct dentry proc_map_files_lookup(struct inode dir,
				2105	struct dentry *dentry, unsigned int flags)
				2106	{
				2107	unsigned long vm_start, vm_end;
				2108	struct vm_area_struct *vma;
				2109	struct task_struct *task;
				2110	int result;
				2111	struct mm_struct *mm;
				2112
				2113	result = -ENOENT;
				2114	task = get_proc_task(dir);
				2115	if (!task)
				2116	goto out;
				2117
				2118	result = -EACCES;
				2119	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				2120	goto out_put_task;
				2121
				2122	result = -ENOENT;
				2123	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
				2124	goto out_put_task;
				2125
				2126	mm = get_task_mm(task);
				2127	if (!mm)
				2128	goto out_put_task;
				2129
				2130	down_read(&mm->mmap_sem);
				2131	vma = find_exact_vma(mm, vm_start, vm_end);
				2132	if (!vma)
				2133	goto out_no_vma;
				2134
				2135	if (vma->vm_file)
				2136	result = proc_map_files_instantiate(dir, dentry, task,
				2137	(void *)(unsigned long)vma->vm_file->f_mode);
				2138
				2139	out_no_vma:
				2140	up_read(&mm->mmap_sem);
				2141	mmput(mm);
				2142	out_put_task:
				2143	put_task_struct(task);
				2144	out:
				2145	return ERR_PTR(result);
				2146	}
				2147
				2148	static const struct inode_operations proc_map_files_inode_operations = {
				2149	.lookup = proc_map_files_lookup,
				2150	.permission = proc_fd_permission,
				2151	.setattr = proc_setattr,
				2152	};
				2153
				2154	static int
				2155	proc_map_files_readdir(struct file file, struct dir_context ctx)
				2156	{
				2157	struct vm_area_struct *vma;
				2158	struct task_struct *task;
				2159	struct mm_struct *mm;
				2160	unsigned long nr_files, pos, i;
				2161	struct flex_array *fa = NULL;
				2162	struct map_files_info info;
				2163	struct map_files_info *p;
				2164	int ret;
				2165
				2166	ret = -ENOENT;
				2167	task = get_proc_task(file_inode(file));
				2168	if (!task)
				2169	goto out;
				2170
				2171	ret = -EACCES;
				2172	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
				2173	goto out_put_task;
				2174
				2175	ret = 0;
				2176	if (!dir_emit_dots(file, ctx))
				2177	goto out_put_task;
				2178
				2179	mm = get_task_mm(task);
				2180	if (!mm)
				2181	goto out_put_task;
				2182	down_read(&mm->mmap_sem);
				2183
				2184	nr_files = 0;
				2185
				2186	/*
				2187	* We need two passes here:
				2188	*
				2189	* 1) Collect vmas of mapped files with mmap_sem taken
				2190	* 2) Release mmap_sem and instantiate entries
				2191	*
				2192	* otherwise we get lockdep complained, since filldir()
				2193	* routine might require mmap_sem taken in might_fault().
				2194	*/
				2195
				2196	for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
				2197	if (vma->vm_file && ++pos > ctx->pos)
				2198	nr_files++;
				2199	}
				2200
				2201	if (nr_files) {
				2202	fa = flex_array_alloc(sizeof(info), nr_files,
				2203	GFP_KERNEL);
				2204	if (!fa \|\| flex_array_prealloc(fa, 0, nr_files,
				2205	GFP_KERNEL)) {
				2206	ret = -ENOMEM;
				2207	if (fa)
				2208	flex_array_free(fa);
				2209	up_read(&mm->mmap_sem);
				2210	mmput(mm);
				2211	goto out_put_task;
				2212	}
				2213	for (i = 0, vma = mm->mmap, pos = 2; vma;
				2214	vma = vma->vm_next) {
				2215	if (!vma->vm_file)
				2216	continue;
				2217	if (++pos <= ctx->pos)
				2218	continue;
				2219
				2220	info.mode = vma->vm_file->f_mode;
				2221	info.len = snprintf(info.name,
				2222	sizeof(info.name), "%lx-%lx",
				2223	vma->vm_start, vma->vm_end);
				2224	if (flex_array_put(fa, i++, &info, GFP_KERNEL))
				2225	BUG();
				2226	}
				2227	}
				2228	up_read(&mm->mmap_sem);
				2229
				2230	for (i = 0; i < nr_files; i++) {
				2231	p = flex_array_get(fa, i);
				2232	if (!proc_fill_cache(file, ctx,
				2233	p->name, p->len,
				2234	proc_map_files_instantiate,
				2235	task,
				2236	(void *)(unsigned long)p->mode))
				2237	break;
				2238	ctx->pos++;
				2239	}
				2240	if (fa)
				2241	flex_array_free(fa);
				2242	mmput(mm);
				2243
				2244	out_put_task:
				2245	put_task_struct(task);
				2246	out:
				2247	return ret;
				2248	}
				2249
				2250	static const struct file_operations proc_map_files_operations = {
				2251	.read = generic_read_dir,
				2252	.iterate_shared = proc_map_files_readdir,
				2253	.llseek = generic_file_llseek,
				2254	};
				2255
				2256	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
				2257	struct timers_private {
				2258	struct pid *pid;
				2259	struct task_struct *task;
				2260	struct sighand_struct *sighand;
				2261	struct pid_namespace *ns;
				2262	unsigned long flags;
				2263	};
				2264
				2265	static void timers_start(struct seq_file m, loff_t *pos)
				2266	{
				2267	struct timers_private *tp = m->private;
				2268
				2269	tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
				2270	if (!tp->task)
				2271	return ERR_PTR(-ESRCH);
				2272
				2273	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
				2274	if (!tp->sighand)
				2275	return ERR_PTR(-ESRCH);
				2276
				2277	return seq_list_start(&tp->task->signal->posix_timers, *pos);
				2278	}
				2279
				2280	static void timers_next(struct seq_file m, void v, loff_t pos)
				2281	{
				2282	struct timers_private *tp = m->private;
				2283	return seq_list_next(v, &tp->task->signal->posix_timers, pos);
				2284	}
				2285
				2286	static void timers_stop(struct seq_file m, void v)
				2287	{
				2288	struct timers_private *tp = m->private;
				2289
				2290	if (tp->sighand) {
				2291	unlock_task_sighand(tp->task, &tp->flags);
				2292	tp->sighand = NULL;
				2293	}
				2294
				2295	if (tp->task) {
				2296	put_task_struct(tp->task);
				2297	tp->task = NULL;
				2298	}
				2299	}
				2300
				2301	static int show_timer(struct seq_file m, void v)
				2302	{
				2303	struct k_itimer *timer;
				2304	struct timers_private *tp = m->private;
				2305	int notify;
				2306	static const char * const nstr[] = {
				2307	[SIGEV_SIGNAL] = "signal",
				2308	[SIGEV_NONE] = "none",
				2309	[SIGEV_THREAD] = "thread",
				2310	};
				2311
				2312	timer = list_entry((struct list_head *)v, struct k_itimer, list);
				2313	notify = timer->it_sigev_notify;
				2314
				2315	seq_printf(m, "ID: %d\n", timer->it_id);
				2316	seq_printf(m, "signal: %d/%p\n",
				2317	timer->sigq->info.si_signo,
				2318	timer->sigq->info.si_value.sival_ptr);
				2319	seq_printf(m, "notify: %s/%s.%d\n",
				2320	nstr[notify & ~SIGEV_THREAD_ID],
				2321	(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
				2322	pid_nr_ns(timer->it_pid, tp->ns));
				2323	seq_printf(m, "ClockID: %d\n", timer->it_clock);
				2324
				2325	return 0;
				2326	}
				2327
				2328	static const struct seq_operations proc_timers_seq_ops = {
				2329	.start = timers_start,
				2330	.next = timers_next,
				2331	.stop = timers_stop,
				2332	.show = show_timer,
				2333	};
				2334
				2335	static int proc_timers_open(struct inode inode, struct file file)
				2336	{
				2337	struct timers_private *tp;
				2338
				2339	tp = __seq_open_private(file, &proc_timers_seq_ops,
				2340	sizeof(struct timers_private));
				2341	if (!tp)
				2342	return -ENOMEM;
				2343
				2344	tp->pid = proc_pid(inode);
				2345	tp->ns = inode->i_sb->s_fs_info;
				2346	return 0;
				2347	}
				2348
				2349	static const struct file_operations proc_timers_operations = {
				2350	.open = proc_timers_open,
				2351	.read = seq_read,
				2352	.llseek = seq_lseek,
				2353	.release = seq_release_private,
				2354	};
				2355	#endif
				2356
				2357	static ssize_t timerslack_ns_write(struct file file, const char __user buf,
				2358	size_t count, loff_t *offset)
				2359	{
				2360	struct inode *inode = file_inode(file);
				2361	struct task_struct *p;
				2362	u64 slack_ns;
				2363	int err;
				2364
				2365	err = kstrtoull_from_user(buf, count, 10, &slack_ns);
				2366	if (err < 0)
				2367	return err;
				2368
				2369	p = get_proc_task(inode);
				2370	if (!p)
				2371	return -ESRCH;
				2372
				2373	if (p != current) {
				2374	if (!capable(CAP_SYS_NICE)) {
				2375	count = -EPERM;
				2376	goto out;
				2377	}
				2378
				2379	err = security_task_setscheduler(p);
				2380	if (err) {
				2381	count = err;
				2382	goto out;
				2383	}
				2384	}
				2385
				2386	task_lock(p);
				2387	if (slack_ns == 0)
				2388	p->timer_slack_ns = p->default_timer_slack_ns;
				2389	else
				2390	p->timer_slack_ns = slack_ns;
				2391	task_unlock(p);
				2392
				2393	out:
				2394	put_task_struct(p);
				2395
				2396	return count;
				2397	}
				2398
				2399	static int timerslack_ns_show(struct seq_file m, void v)
				2400	{
				2401	struct inode *inode = m->private;
				2402	struct task_struct *p;
				2403	int err = 0;
				2404
				2405	p = get_proc_task(inode);
				2406	if (!p)
				2407	return -ESRCH;
				2408
				2409	if (p != current) {
				2410
				2411	if (!capable(CAP_SYS_NICE)) {
				2412	err = -EPERM;
				2413	goto out;
				2414	}
				2415	err = security_task_getscheduler(p);
				2416	if (err)
				2417	goto out;
				2418	}
				2419
				2420	task_lock(p);
				2421	seq_printf(m, "%llu\n", p->timer_slack_ns);
				2422	task_unlock(p);
				2423
				2424	out:
				2425	put_task_struct(p);
				2426
				2427	return err;
				2428	}
				2429
				2430	static int timerslack_ns_open(struct inode inode, struct file filp)
				2431	{
				2432	return single_open(filp, timerslack_ns_show, inode);
				2433	}
				2434
				2435	static const struct file_operations proc_pid_set_timerslack_ns_operations = {
				2436	.open = timerslack_ns_open,
				2437	.read = seq_read,
				2438	.write = timerslack_ns_write,
				2439	.llseek = seq_lseek,
				2440	.release = single_release,
				2441	};
				2442
				2443	static int proc_pident_instantiate(struct inode *dir,
				2444	struct dentry dentry, struct task_struct task, const void *ptr)
				2445	{
				2446	const struct pid_entry *p = ptr;
				2447	struct inode *inode;
				2448	struct proc_inode *ei;
				2449
				2450	inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
				2451	if (!inode)
				2452	goto out;
				2453
				2454	ei = PROC_I(inode);
				2455	if (S_ISDIR(inode->i_mode))
				2456	set_nlink(inode, 2); /* Use getattr to fix if necessary */
				2457	if (p->iop)
				2458	inode->i_op = p->iop;
				2459	if (p->fop)
				2460	inode->i_fop = p->fop;
				2461	ei->op = p->op;
				2462	d_set_d_op(dentry, &pid_dentry_operations);
				2463	d_add(dentry, inode);
				2464	/* Close the race of the process dying before we return the dentry */
				2465	if (pid_revalidate(dentry, 0))
				2466	return 0;
				2467	out:
				2468	return -ENOENT;
				2469	}
				2470
				2471	static struct dentry proc_pident_lookup(struct inode dir,
				2472	struct dentry *dentry,
				2473	const struct pid_entry *ents,
				2474	unsigned int nents)
				2475	{
				2476	int error;
				2477	struct task_struct *task = get_proc_task(dir);
				2478	const struct pid_entry p, last;
				2479
				2480	error = -ENOENT;
				2481
				2482	if (!task)
				2483	goto out_no_task;
				2484
				2485	/*
				2486	* Yes, it does not scale. And it should not. Don't add
				2487	* new entries into /proc/<tgid>/ without very good reasons.
				2488	*/
				2489	last = &ents[nents];
				2490	for (p = ents; p < last; p++) {
				2491	if (p->len != dentry->d_name.len)
				2492	continue;
				2493	if (!memcmp(dentry->d_name.name, p->name, p->len))
				2494	break;
				2495	}
				2496	if (p >= last)
				2497	goto out;
				2498
				2499	error = proc_pident_instantiate(dir, dentry, task, p);
				2500	out:
				2501	put_task_struct(task);
				2502	out_no_task:
				2503	return ERR_PTR(error);
				2504	}
				2505
				2506	static int proc_pident_readdir(struct file file, struct dir_context ctx,
				2507	const struct pid_entry *ents, unsigned int nents)
				2508	{
				2509	struct task_struct *task = get_proc_task(file_inode(file));
				2510	const struct pid_entry *p;
				2511
				2512	if (!task)
				2513	return -ENOENT;
				2514
				2515	if (!dir_emit_dots(file, ctx))
				2516	goto out;
				2517
				2518	if (ctx->pos >= nents + 2)
				2519	goto out;
				2520
				2521	for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
				2522	if (!proc_fill_cache(file, ctx, p->name, p->len,
				2523	proc_pident_instantiate, task, p))
				2524	break;
				2525	ctx->pos++;
				2526	}
				2527	out:
				2528	put_task_struct(task);
				2529	return 0;
				2530	}
				2531
				2532	#ifdef CONFIG_SECURITY
				2533	static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
				2534	size_t count, loff_t *ppos)
				2535	{
				2536	struct inode * inode = file_inode(file);
				2537	char *p = NULL;
				2538	ssize_t length;
				2539	struct task_struct *task = get_proc_task(inode);
				2540
				2541	if (!task)
				2542	return -ESRCH;
				2543
				2544	length = security_getprocattr(task,
				2545	(char*)file->f_path.dentry->d_name.name,
				2546	&p);
				2547	put_task_struct(task);
				2548	if (length > 0)
				2549	length = simple_read_from_buffer(buf, count, ppos, p, length);
				2550	kfree(p);
				2551	return length;
				2552	}
				2553
				2554	static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
				2555	size_t count, loff_t *ppos)
				2556	{
				2557	struct inode * inode = file_inode(file);
				2558	void *page;
				2559	ssize_t length;
				2560	struct task_struct *task = get_proc_task(inode);
				2561
				2562	length = -ESRCH;
				2563	if (!task)
				2564	goto out_no_task;
				2565
				2566	/* A task may only write its own attributes. */
				2567	length = -EACCES;
				2568	if (current != task)
				2569	goto out;
				2570
				2571	if (count > PAGE_SIZE)
				2572	count = PAGE_SIZE;
				2573
				2574	/* No partial writes. */
				2575	length = -EINVAL;
				2576	if (*ppos != 0)
				2577	goto out;
				2578
				2579	page = memdup_user(buf, count);
				2580	if (IS_ERR(page)) {
				2581	length = PTR_ERR(page);
				2582	goto out;
				2583	}
				2584
				2585	/* Guard against adverse ptrace interaction */
				2586	length = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
				2587	if (length < 0)
				2588	goto out_free;
				2589
				2590	length = security_setprocattr(file->f_path.dentry->d_name.name,
				2591	page, count);
				2592	mutex_unlock(&current->signal->cred_guard_mutex);
				2593	out_free:
				2594	kfree(page);
				2595	out:
				2596	put_task_struct(task);
				2597	out_no_task:
				2598	return length;
				2599	}
				2600
				2601	static const struct file_operations proc_pid_attr_operations = {
				2602	.read = proc_pid_attr_read,
				2603	.write = proc_pid_attr_write,
				2604	.llseek = generic_file_llseek,
				2605	};
				2606
				2607	static const struct pid_entry attr_dir_stuff[] = {
				2608	REG("current", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2609	REG("prev", S_IRUGO, proc_pid_attr_operations),
				2610	REG("exec", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2611	REG("fscreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2612	REG("keycreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2613	REG("sockcreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2614	};
				2615
				2616	static int proc_attr_dir_readdir(struct file file, struct dir_context ctx)
				2617	{
				2618	return proc_pident_readdir(file, ctx,
				2619	attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
				2620	}
				2621
				2622	static const struct file_operations proc_attr_dir_operations = {
				2623	.read = generic_read_dir,
				2624	.iterate_shared = proc_attr_dir_readdir,
				2625	.llseek = generic_file_llseek,
				2626	};
				2627
				2628	static struct dentry proc_attr_dir_lookup(struct inode dir,
				2629	struct dentry *dentry, unsigned int flags)
				2630	{
				2631	return proc_pident_lookup(dir, dentry,
				2632	attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
				2633	}
				2634
				2635	static const struct inode_operations proc_attr_dir_inode_operations = {
				2636	.lookup = proc_attr_dir_lookup,
				2637	.getattr = pid_getattr,
				2638	.setattr = proc_setattr,
				2639	};
				2640
				2641	#endif
				2642
				2643	#ifdef CONFIG_ELF_CORE
				2644	static ssize_t proc_coredump_filter_read(struct file file, char __user buf,
				2645	size_t count, loff_t *ppos)
				2646	{
				2647	struct task_struct *task = get_proc_task(file_inode(file));
				2648	struct mm_struct *mm;
				2649	char buffer[PROC_NUMBUF];
				2650	size_t len;
				2651	int ret;
				2652
				2653	if (!task)
				2654	return -ESRCH;
				2655
				2656	ret = 0;
				2657	mm = get_task_mm(task);
				2658	if (mm) {
				2659	len = snprintf(buffer, sizeof(buffer), "%08lx\n",
				2660	((mm->flags & MMF_DUMP_FILTER_MASK) >>
				2661	MMF_DUMP_FILTER_SHIFT));
				2662	mmput(mm);
				2663	ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
				2664	}
				2665
				2666	put_task_struct(task);
				2667
				2668	return ret;
				2669	}
				2670
				2671	static ssize_t proc_coredump_filter_write(struct file *file,
				2672	const char __user *buf,
				2673	size_t count,
				2674	loff_t *ppos)
				2675	{
				2676	struct task_struct *task;
				2677	struct mm_struct *mm;
				2678	unsigned int val;
				2679	int ret;
				2680	int i;
				2681	unsigned long mask;
				2682
				2683	ret = kstrtouint_from_user(buf, count, 0, &val);
				2684	if (ret < 0)
				2685	return ret;
				2686
				2687	ret = -ESRCH;
				2688	task = get_proc_task(file_inode(file));
				2689	if (!task)
				2690	goto out_no_task;
				2691
				2692	mm = get_task_mm(task);
				2693	if (!mm)
				2694	goto out_no_mm;
				2695	ret = 0;
				2696
				2697	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
				2698	if (val & mask)
				2699	set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2700	else
				2701	clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2702	}
				2703
				2704	mmput(mm);
				2705	out_no_mm:
				2706	put_task_struct(task);
				2707	out_no_task:
				2708	if (ret < 0)
				2709	return ret;
				2710	return count;
				2711	}
				2712
				2713	static const struct file_operations proc_coredump_filter_operations = {
				2714	.read = proc_coredump_filter_read,
				2715	.write = proc_coredump_filter_write,
				2716	.llseek = generic_file_llseek,
				2717	};
				2718	#endif
				2719
				2720	#ifdef CONFIG_TASK_IO_ACCOUNTING
				2721	static int do_io_accounting(struct task_struct task, struct seq_file m, int whole)
				2722	{
				2723	struct task_io_accounting acct = task->ioac;
				2724	unsigned long flags;
				2725	int result;
				2726
				2727	result = mutex_lock_killable(&task->signal->cred_guard_mutex);
				2728	if (result)
				2729	return result;
				2730
				2731	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
				2732	result = -EACCES;
				2733	goto out_unlock;
				2734	}
				2735
				2736	if (whole && lock_task_sighand(task, &flags)) {
				2737	struct task_struct *t = task;
				2738
				2739	task_io_accounting_add(&acct, &task->signal->ioac);
				2740	while_each_thread(task, t)
				2741	task_io_accounting_add(&acct, &t->ioac);
				2742
				2743	unlock_task_sighand(task, &flags);
				2744	}
				2745	seq_printf(m,
				2746	"rchar: %llu\n"
				2747	"wchar: %llu\n"
				2748	"syscr: %llu\n"
				2749	"syscw: %llu\n"
				2750	"read_bytes: %llu\n"
				2751	"write_bytes: %llu\n"
				2752	"cancelled_write_bytes: %llu\n",
				2753	(unsigned long long)acct.rchar,
				2754	(unsigned long long)acct.wchar,
				2755	(unsigned long long)acct.syscr,
				2756	(unsigned long long)acct.syscw,
				2757	(unsigned long long)acct.read_bytes,
				2758	(unsigned long long)acct.write_bytes,
				2759	(unsigned long long)acct.cancelled_write_bytes);
				2760	result = 0;
				2761
				2762	out_unlock:
				2763	mutex_unlock(&task->signal->cred_guard_mutex);
				2764	return result;
				2765	}
				2766
				2767	static int proc_tid_io_accounting(struct seq_file m, struct pid_namespace ns,
				2768	struct pid pid, struct task_struct task)
				2769	{
				2770	return do_io_accounting(task, m, 0);
				2771	}
				2772
				2773	static int proc_tgid_io_accounting(struct seq_file m, struct pid_namespace ns,
				2774	struct pid pid, struct task_struct task)
				2775	{
				2776	return do_io_accounting(task, m, 1);
				2777	}
				2778	#endif /* CONFIG_TASK_IO_ACCOUNTING */
				2779
				2780	#ifdef CONFIG_USER_NS
				2781	static int proc_id_map_open(struct inode inode, struct file file,
				2782	const struct seq_operations *seq_ops)
				2783	{
				2784	struct user_namespace *ns = NULL;
				2785	struct task_struct *task;
				2786	struct seq_file *seq;
				2787	int ret = -EINVAL;
				2788
				2789	task = get_proc_task(inode);
				2790	if (task) {
				2791	rcu_read_lock();
				2792	ns = get_user_ns(task_cred_xxx(task, user_ns));
				2793	rcu_read_unlock();
				2794	put_task_struct(task);
				2795	}
				2796	if (!ns)
				2797	goto err;
				2798
				2799	ret = seq_open(file, seq_ops);
				2800	if (ret)
				2801	goto err_put_ns;
				2802
				2803	seq = file->private_data;
				2804	seq->private = ns;
				2805
				2806	return 0;
				2807	err_put_ns:
				2808	put_user_ns(ns);
				2809	err:
				2810	return ret;
				2811	}
				2812
				2813	static int proc_id_map_release(struct inode inode, struct file file)
				2814	{
				2815	struct seq_file *seq = file->private_data;
				2816	struct user_namespace *ns = seq->private;
				2817	put_user_ns(ns);
				2818	return seq_release(inode, file);
				2819	}
				2820
				2821	static int proc_uid_map_open(struct inode inode, struct file file)
				2822	{
				2823	return proc_id_map_open(inode, file, &proc_uid_seq_operations);
				2824	}
				2825
				2826	static int proc_gid_map_open(struct inode inode, struct file file)
				2827	{
				2828	return proc_id_map_open(inode, file, &proc_gid_seq_operations);
				2829	}
				2830
				2831	static int proc_projid_map_open(struct inode inode, struct file file)
				2832	{
				2833	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
				2834	}
				2835
				2836	static const struct file_operations proc_uid_map_operations = {
				2837	.open = proc_uid_map_open,
				2838	.write = proc_uid_map_write,
				2839	.read = seq_read,
				2840	.llseek = seq_lseek,
				2841	.release = proc_id_map_release,
				2842	};
				2843
				2844	static const struct file_operations proc_gid_map_operations = {
				2845	.open = proc_gid_map_open,
				2846	.write = proc_gid_map_write,
				2847	.read = seq_read,
				2848	.llseek = seq_lseek,
				2849	.release = proc_id_map_release,
				2850	};
				2851
				2852	static const struct file_operations proc_projid_map_operations = {
				2853	.open = proc_projid_map_open,
				2854	.write = proc_projid_map_write,
				2855	.read = seq_read,
				2856	.llseek = seq_lseek,
				2857	.release = proc_id_map_release,
				2858	};
				2859
				2860	static int proc_setgroups_open(struct inode inode, struct file file)
				2861	{
				2862	struct user_namespace *ns = NULL;
				2863	struct task_struct *task;
				2864	int ret;
				2865
				2866	ret = -ESRCH;
				2867	task = get_proc_task(inode);
				2868	if (task) {
				2869	rcu_read_lock();
				2870	ns = get_user_ns(task_cred_xxx(task, user_ns));
				2871	rcu_read_unlock();
				2872	put_task_struct(task);
				2873	}
				2874	if (!ns)
				2875	goto err;
				2876
				2877	if (file->f_mode & FMODE_WRITE) {
				2878	ret = -EACCES;
				2879	if (!ns_capable(ns, CAP_SYS_ADMIN))
				2880	goto err_put_ns;
				2881	}
				2882
				2883	ret = single_open(file, &proc_setgroups_show, ns);
				2884	if (ret)
				2885	goto err_put_ns;
				2886
				2887	return 0;
				2888	err_put_ns:
				2889	put_user_ns(ns);
				2890	err:
				2891	return ret;
				2892	}
				2893
				2894	static int proc_setgroups_release(struct inode inode, struct file file)
				2895	{
				2896	struct seq_file *seq = file->private_data;
				2897	struct user_namespace *ns = seq->private;
				2898	int ret = single_release(inode, file);
				2899	put_user_ns(ns);
				2900	return ret;
				2901	}
				2902
				2903	static const struct file_operations proc_setgroups_operations = {
				2904	.open = proc_setgroups_open,
				2905	.write = proc_setgroups_write,
				2906	.read = seq_read,
				2907	.llseek = seq_lseek,
				2908	.release = proc_setgroups_release,
				2909	};
				2910	#endif /* CONFIG_USER_NS */
				2911
				2912	static int proc_pid_personality(struct seq_file m, struct pid_namespace ns,
				2913	struct pid pid, struct task_struct task)
				2914	{
				2915	int err = lock_trace(task);
				2916	if (!err) {
				2917	seq_printf(m, "%08x\n", task->personality);
				2918	unlock_trace(task);
				2919	}
				2920	return err;
				2921	}
				2922
				2923	#ifdef CONFIG_LIVEPATCH
				2924	static int proc_pid_patch_state(struct seq_file m, struct pid_namespace ns,
				2925	struct pid pid, struct task_struct task)
				2926	{
				2927	seq_printf(m, "%d\n", task->patch_state);
				2928	return 0;
				2929	}
				2930	#endif /* CONFIG_LIVEPATCH */
				2931
				2932	/*
				2933	* Thread groups
				2934	*/
				2935	static const struct file_operations proc_task_operations;
				2936	static const struct inode_operations proc_task_inode_operations;
				2937
				2938	static const struct pid_entry tgid_base_stuff[] = {
				2939	DIR("task", S_IRUGO\|S_IXUGO, proc_task_inode_operations, proc_task_operations),
				2940	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				2941	DIR("map_files", S_IRUSR\|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
				2942	DIR("fdinfo", S_IRUSR\|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				2943	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				2944	#ifdef CONFIG_NET
				2945	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				2946	#endif
				2947	REG("environ", S_IRUSR, proc_environ_operations),
				2948	REG("auxv", S_IRUSR, proc_auxv_operations),
				2949	ONE("status", S_IRUGO, proc_pid_status),
				2950	ONE("personality", S_IRUSR, proc_pid_personality),
				2951	ONE("limits", S_IRUGO, proc_pid_limits),
				2952	#ifdef CONFIG_SCHED_DEBUG
				2953	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				2954	#endif
				2955	#ifdef CONFIG_SCHED_AUTOGROUP
				2956	REG("autogroup", S_IRUGO\|S_IWUSR, proc_pid_sched_autogroup_operations),
				2957	#endif
				2958	REG("comm", S_IRUGO\|S_IWUSR, proc_pid_set_comm_operations),
				2959	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				2960	ONE("syscall", S_IRUSR, proc_pid_syscall),
				2961	#endif
				2962	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
				2963	ONE("stat", S_IRUGO, proc_tgid_stat),
				2964	ONE("statm", S_IRUGO, proc_pid_statm),
				2965	REG("maps", S_IRUGO, proc_pid_maps_operations),
				2966	#ifdef CONFIG_NUMA
				2967	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
				2968	#endif
				2969	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				2970	LNK("cwd", proc_cwd_link),
				2971	LNK("root", proc_root_link),
				2972	LNK("exe", proc_exe_link),
				2973	REG("mounts", S_IRUGO, proc_mounts_operations),
				2974	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				2975	REG("mountstats", S_IRUSR, proc_mountstats_operations),
				2976	#ifdef CONFIG_PROC_PAGE_MONITOR
				2977	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				2978	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
				2979	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
				2980	REG("pagemap", S_IRUSR, proc_pagemap_operations),
				2981	#endif
				2982	#ifdef CONFIG_SECURITY
				2983	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				2984	#endif
				2985	#ifdef CONFIG_KALLSYMS
				2986	ONE("wchan", S_IRUGO, proc_pid_wchan),
				2987	#endif
				2988	#ifdef CONFIG_STACKTRACE
				2989	ONE("stack", S_IRUSR, proc_pid_stack),
				2990	#endif
				2991	#ifdef CONFIG_SCHED_INFO
				2992	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
				2993	#endif
				2994	#ifdef CONFIG_LATENCYTOP
				2995	REG("latency", S_IRUGO, proc_lstats_operations),
				2996	#endif
				2997	#ifdef CONFIG_PROC_PID_CPUSET
				2998	ONE("cpuset", S_IRUGO, proc_cpuset_show),
				2999	#endif
				3000	#ifdef CONFIG_CGROUPS
				3001	ONE("cgroup", S_IRUGO, proc_cgroup_show),
				3002	#endif
				3003	ONE("oom_score", S_IRUGO, proc_oom_score),
				3004	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
				3005	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3006	#ifdef CONFIG_AUDITSYSCALL
				3007	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3008	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3009	#endif
				3010	#ifdef CONFIG_FAULT_INJECTION
				3011	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3012	REG("fail-nth", 0644, proc_fail_nth_operations),
				3013	#endif
				3014	#ifdef CONFIG_ELF_CORE
				3015	REG("coredump_filter", S_IRUGO\|S_IWUSR, proc_coredump_filter_operations),
				3016	#endif
				3017	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3018	ONE("io", S_IRUSR, proc_tgid_io_accounting),
				3019	#endif
				3020	#ifdef CONFIG_HARDWALL
				3021	ONE("hardwall", S_IRUGO, proc_pid_hardwall),
				3022	#endif
				3023	#ifdef CONFIG_USER_NS
				3024	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
				3025	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
				3026	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
				3027	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
				3028	#endif
				3029	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
				3030	REG("timers", S_IRUGO, proc_timers_operations),
				3031	#endif
				3032	REG("timerslack_ns", S_IRUGO\|S_IWUGO, proc_pid_set_timerslack_ns_operations),
				3033	#ifdef CONFIG_LIVEPATCH
				3034	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
				3035	#endif
				3036	#ifdef CONFIG_CPU_FREQ_TIMES
				3037	ONE("time_in_state", 0444, proc_time_in_state_show),
				3038	#endif
				3039	};
				3040
				3041	static int proc_tgid_base_readdir(struct file file, struct dir_context ctx)
				3042	{
				3043	return proc_pident_readdir(file, ctx,
				3044	tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3045	}
				3046
				3047	static const struct file_operations proc_tgid_base_operations = {
				3048	.read = generic_read_dir,
				3049	.iterate_shared = proc_tgid_base_readdir,
				3050	.llseek = generic_file_llseek,
				3051	};
				3052
				3053	static struct dentry proc_tgid_base_lookup(struct inode dir, struct dentry *dentry, unsigned int flags)
				3054	{
				3055	return proc_pident_lookup(dir, dentry,
				3056	tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3057	}
				3058
				3059	static const struct inode_operations proc_tgid_base_inode_operations = {
				3060	.lookup = proc_tgid_base_lookup,
				3061	.getattr = pid_getattr,
				3062	.setattr = proc_setattr,
				3063	.permission = proc_pid_permission,
				3064	};
				3065
				3066	static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
				3067	{
				3068	struct dentry dentry, leader, *dir;
				3069	char buf[PROC_NUMBUF];
				3070	struct qstr name;
				3071
				3072	name.name = buf;
				3073	name.len = snprintf(buf, sizeof(buf), "%d", pid);
				3074	/* no ->d_hash() rejects on procfs */
				3075	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
				3076	if (dentry) {
				3077	d_invalidate(dentry);
				3078	dput(dentry);
				3079	}
				3080
				3081	if (pid == tgid)
				3082	return;
				3083
				3084	name.name = buf;
				3085	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
				3086	leader = d_hash_and_lookup(mnt->mnt_root, &name);
				3087	if (!leader)
				3088	goto out;
				3089
				3090	name.name = "task";
				3091	name.len = strlen(name.name);
				3092	dir = d_hash_and_lookup(leader, &name);
				3093	if (!dir)
				3094	goto out_put_leader;
				3095
				3096	name.name = buf;
				3097	name.len = snprintf(buf, sizeof(buf), "%d", pid);
				3098	dentry = d_hash_and_lookup(dir, &name);
				3099	if (dentry) {
				3100	d_invalidate(dentry);
				3101	dput(dentry);
				3102	}
				3103
				3104	dput(dir);
				3105	out_put_leader:
				3106	dput(leader);
				3107	out:
				3108	return;
				3109	}
				3110
				3111	/**
				3112	* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
				3113	* @task: task that should be flushed.
				3114	*
				3115	* When flushing dentries from proc, one needs to flush them from global
				3116	* proc (proc_mnt) and from all the namespaces' procs this task was seen
				3117	* in. This call is supposed to do all of this job.
				3118	*
				3119	* Looks in the dcache for
				3120	* /proc/@pid
				3121	* /proc/@tgid/task/@pid
				3122	* if either directory is present flushes it and all of it'ts children
				3123	* from the dcache.
				3124	*
				3125	* It is safe and reasonable to cache /proc entries for a task until
				3126	* that task exits. After that they just clog up the dcache with
				3127	* useless entries, possibly causing useful dcache entries to be
				3128	* flushed instead. This routine is proved to flush those useless
				3129	* dcache entries at process exit time.
				3130	*
				3131	* NOTE: This routine is just an optimization so it does not guarantee
				3132	* that no dcache entries will exist at process exit time it
				3133	* just makes it very unlikely that any will persist.
				3134	*/
				3135
				3136	void proc_flush_task(struct task_struct *task)
				3137	{
				3138	int i;
				3139	struct pid pid, tgid;
				3140	struct upid *upid;
				3141
				3142	pid = task_pid(task);
				3143	tgid = task_tgid(task);
				3144
				3145	for (i = 0; i <= pid->level; i++) {
				3146	upid = &pid->numbers[i];
				3147	proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
				3148	tgid->numbers[i].nr);
				3149	}
				3150	}
				3151
				3152	static int proc_pid_instantiate(struct inode *dir,
				3153	struct dentry * dentry,
				3154	struct task_struct task, const void ptr)
				3155	{
				3156	struct inode *inode;
				3157
				3158	inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR \| S_IRUGO \| S_IXUGO);
				3159	if (!inode)
				3160	goto out;
				3161
				3162	inode->i_op = &proc_tgid_base_inode_operations;
				3163	inode->i_fop = &proc_tgid_base_operations;
				3164	inode->i_flags\|=S_IMMUTABLE;
				3165
				3166	set_nlink(inode, nlink_tgid);
				3167
				3168	d_set_d_op(dentry, &pid_dentry_operations);
				3169
				3170	d_add(dentry, inode);
				3171	/* Close the race of the process dying before we return the dentry */
				3172	if (pid_revalidate(dentry, 0))
				3173	return 0;
				3174	out:
				3175	return -ENOENT;
				3176	}
				3177
				3178	struct dentry proc_pid_lookup(struct inode dir, struct dentry * dentry, unsigned int flags)
				3179	{
				3180	int result = -ENOENT;
				3181	struct task_struct *task;
				3182	unsigned tgid;
				3183	struct pid_namespace *ns;
				3184
				3185	tgid = name_to_int(&dentry->d_name);
				3186	if (tgid == ~0U)
				3187	goto out;
				3188
				3189	ns = dentry->d_sb->s_fs_info;
				3190	rcu_read_lock();
				3191	task = find_task_by_pid_ns(tgid, ns);
				3192	if (task)
				3193	get_task_struct(task);
				3194	rcu_read_unlock();
				3195	if (!task)
				3196	goto out;
				3197
				3198	result = proc_pid_instantiate(dir, dentry, task, NULL);
				3199	put_task_struct(task);
				3200	out:
				3201	return ERR_PTR(result);
				3202	}
				3203
				3204	/*
				3205	* Find the first task with tgid >= tgid
				3206	*
				3207	*/
				3208	struct tgid_iter {
				3209	unsigned int tgid;
				3210	struct task_struct *task;
				3211	};
				3212	static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
				3213	{
				3214	struct pid *pid;
				3215
				3216	if (iter.task)
				3217	put_task_struct(iter.task);
				3218	rcu_read_lock();
				3219	retry:
				3220	iter.task = NULL;
				3221	pid = find_ge_pid(iter.tgid, ns);
				3222	if (pid) {
				3223	iter.tgid = pid_nr_ns(pid, ns);
				3224	iter.task = pid_task(pid, PIDTYPE_PID);
				3225	/* What we to know is if the pid we have find is the
				3226	* pid of a thread_group_leader. Testing for task
				3227	* being a thread_group_leader is the obvious thing
				3228	* todo but there is a window when it fails, due to
				3229	* the pid transfer logic in de_thread.
				3230	*
				3231	* So we perform the straight forward test of seeing
				3232	* if the pid we have found is the pid of a thread
				3233	* group leader, and don't worry if the task we have
				3234	* found doesn't happen to be a thread group leader.
				3235	* As we don't care in the case of readdir.
				3236	*/
				3237	if (!iter.task \|\| !has_group_leader_pid(iter.task)) {
				3238	iter.tgid += 1;
				3239	goto retry;
				3240	}
				3241	get_task_struct(iter.task);
				3242	}
				3243	rcu_read_unlock();
				3244	return iter;
				3245	}
				3246
				3247	#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
				3248
				3249	/* for the /proc/ directory itself, after non-process stuff has been done */
				3250	int proc_pid_readdir(struct file file, struct dir_context ctx)
				3251	{
				3252	struct tgid_iter iter;
				3253	struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
				3254	loff_t pos = ctx->pos;
				3255
				3256	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
				3257	return 0;
				3258
				3259	if (pos == TGID_OFFSET - 2) {
				3260	struct inode *inode = d_inode(ns->proc_self);
				3261	if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
				3262	return 0;
				3263	ctx->pos = pos = pos + 1;
				3264	}
				3265	if (pos == TGID_OFFSET - 1) {
				3266	struct inode *inode = d_inode(ns->proc_thread_self);
				3267	if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
				3268	return 0;
				3269	ctx->pos = pos = pos + 1;
				3270	}
				3271	iter.tgid = pos - TGID_OFFSET;
				3272	iter.task = NULL;
				3273	for (iter = next_tgid(ns, iter);
				3274	iter.task;
				3275	iter.tgid += 1, iter = next_tgid(ns, iter)) {
				3276	char name[PROC_NUMBUF];
				3277	int len;
				3278
				3279	cond_resched();
				3280	if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
				3281	continue;
				3282
				3283	len = snprintf(name, sizeof(name), "%d", iter.tgid);
				3284	ctx->pos = iter.tgid + TGID_OFFSET;
				3285	if (!proc_fill_cache(file, ctx, name, len,
				3286	proc_pid_instantiate, iter.task, NULL)) {
				3287	put_task_struct(iter.task);
				3288	return 0;
				3289	}
				3290	}
				3291	ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
				3292	return 0;
				3293	}
				3294
				3295	/*
				3296	* proc_tid_comm_permission is a special permission function exclusively
				3297	* used for the node /proc/<pid>/task/<tid>/comm.
				3298	* It bypasses generic permission checks in the case where a task of the same
				3299	* task group attempts to access the node.
				3300	* The rationale behind this is that glibc and bionic access this node for
				3301	* cross thread naming (pthread_set/getname_np(!self)). However, if
				3302	* PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
				3303	* which locks out the cross thread naming implementation.
				3304	* This function makes sure that the node is always accessible for members of
				3305	* same thread group.
				3306	*/
				3307	static int proc_tid_comm_permission(struct inode *inode, int mask)
				3308	{
				3309	bool is_same_tgroup;
				3310	struct task_struct *task;
				3311
				3312	task = get_proc_task(inode);
				3313	if (!task)
				3314	return -ESRCH;
				3315	is_same_tgroup = same_thread_group(current, task);
				3316	put_task_struct(task);
				3317
				3318	if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
				3319	/* This file (/proc/<pid>/task/<tid>/comm) can always be
				3320	* read or written by the members of the corresponding
				3321	* thread group.
				3322	*/
				3323	return 0;
				3324	}
				3325
				3326	return generic_permission(inode, mask);
				3327	}
				3328
				3329	static const struct inode_operations proc_tid_comm_inode_operations = {
				3330	.permission = proc_tid_comm_permission,
				3331	};
				3332
				3333	/*
				3334	* Tasks
				3335	*/
				3336	static const struct pid_entry tid_base_stuff[] = {
				3337	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				3338	DIR("fdinfo", S_IRUSR\|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				3339	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				3340	#ifdef CONFIG_NET
				3341	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				3342	#endif
				3343	REG("environ", S_IRUSR, proc_environ_operations),
				3344	REG("auxv", S_IRUSR, proc_auxv_operations),
				3345	ONE("status", S_IRUGO, proc_pid_status),
				3346	ONE("personality", S_IRUSR, proc_pid_personality),
				3347	ONE("limits", S_IRUGO, proc_pid_limits),
				3348	#ifdef CONFIG_SCHED_DEBUG
				3349	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				3350	#endif
				3351	NOD("comm", S_IFREG\|S_IRUGO\|S_IWUSR,
				3352	&proc_tid_comm_inode_operations,
				3353	&proc_pid_set_comm_operations, {}),
				3354	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				3355	ONE("syscall", S_IRUSR, proc_pid_syscall),
				3356	#endif
				3357	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
				3358	ONE("stat", S_IRUGO, proc_tid_stat),
				3359	ONE("statm", S_IRUGO, proc_pid_statm),
				3360	REG("maps", S_IRUGO, proc_tid_maps_operations),
				3361	#ifdef CONFIG_PROC_CHILDREN
				3362	REG("children", S_IRUGO, proc_tid_children_operations),
				3363	#endif
				3364	#ifdef CONFIG_NUMA
				3365	REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
				3366	#endif
				3367	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				3368	LNK("cwd", proc_cwd_link),
				3369	LNK("root", proc_root_link),
				3370	LNK("exe", proc_exe_link),
				3371	REG("mounts", S_IRUGO, proc_mounts_operations),
				3372	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				3373	#ifdef CONFIG_PROC_PAGE_MONITOR
				3374	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				3375	REG("smaps", S_IRUGO, proc_tid_smaps_operations),
				3376	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
				3377	REG("pagemap", S_IRUSR, proc_pagemap_operations),
				3378	#endif
				3379	#ifdef CONFIG_SECURITY
				3380	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				3381	#endif
				3382	#ifdef CONFIG_KALLSYMS
				3383	ONE("wchan", S_IRUGO, proc_pid_wchan),
				3384	#endif
				3385	#ifdef CONFIG_STACKTRACE
				3386	ONE("stack", S_IRUSR, proc_pid_stack),
				3387	#endif
				3388	#ifdef CONFIG_SCHED_INFO
				3389	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
				3390	#endif
				3391	#ifdef CONFIG_LATENCYTOP
				3392	REG("latency", S_IRUGO, proc_lstats_operations),
				3393	#endif
				3394	#ifdef CONFIG_PROC_PID_CPUSET
				3395	ONE("cpuset", S_IRUGO, proc_cpuset_show),
				3396	#endif
				3397	#ifdef CONFIG_CGROUPS
				3398	ONE("cgroup", S_IRUGO, proc_cgroup_show),
				3399	#endif
				3400	ONE("oom_score", S_IRUGO, proc_oom_score),
				3401	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
				3402	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3403	#ifdef CONFIG_AUDITSYSCALL
				3404	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3405	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3406	#endif
				3407	#ifdef CONFIG_FAULT_INJECTION
				3408	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3409	REG("fail-nth", 0644, proc_fail_nth_operations),
				3410	#endif
				3411	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3412	ONE("io", S_IRUSR, proc_tid_io_accounting),
				3413	#endif
				3414	#ifdef CONFIG_HARDWALL
				3415	ONE("hardwall", S_IRUGO, proc_pid_hardwall),
				3416	#endif
				3417	#ifdef CONFIG_USER_NS
				3418	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
				3419	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
				3420	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
				3421	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
				3422	#endif
				3423	#ifdef CONFIG_LIVEPATCH
				3424	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
				3425	#endif
				3426	#ifdef CONFIG_CPU_FREQ_TIMES
				3427	ONE("time_in_state", 0444, proc_time_in_state_show),
				3428	#endif
				3429	};
				3430
				3431	static int proc_tid_base_readdir(struct file file, struct dir_context ctx)
				3432	{
				3433	return proc_pident_readdir(file, ctx,
				3434	tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3435	}
				3436
				3437	static struct dentry proc_tid_base_lookup(struct inode dir, struct dentry *dentry, unsigned int flags)
				3438	{
				3439	return proc_pident_lookup(dir, dentry,
				3440	tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3441	}
				3442
				3443	static const struct file_operations proc_tid_base_operations = {
				3444	.read = generic_read_dir,
				3445	.iterate_shared = proc_tid_base_readdir,
				3446	.llseek = generic_file_llseek,
				3447	};
				3448
				3449	static const struct inode_operations proc_tid_base_inode_operations = {
				3450	.lookup = proc_tid_base_lookup,
				3451	.getattr = pid_getattr,
				3452	.setattr = proc_setattr,
				3453	};
				3454
				3455	static int proc_task_instantiate(struct inode *dir,
				3456	struct dentry dentry, struct task_struct task, const void *ptr)
				3457	{
				3458	struct inode *inode;
				3459	inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR \| S_IRUGO \| S_IXUGO);
				3460
				3461	if (!inode)
				3462	goto out;
				3463	inode->i_op = &proc_tid_base_inode_operations;
				3464	inode->i_fop = &proc_tid_base_operations;
				3465	inode->i_flags\|=S_IMMUTABLE;
				3466
				3467	set_nlink(inode, nlink_tid);
				3468
				3469	d_set_d_op(dentry, &pid_dentry_operations);
				3470
				3471	d_add(dentry, inode);
				3472	/* Close the race of the process dying before we return the dentry */
				3473	if (pid_revalidate(dentry, 0))
				3474	return 0;
				3475	out:
				3476	return -ENOENT;
				3477	}
				3478
				3479	static struct dentry proc_task_lookup(struct inode dir, struct dentry * dentry, unsigned int flags)
				3480	{
				3481	int result = -ENOENT;
				3482	struct task_struct *task;
				3483	struct task_struct *leader = get_proc_task(dir);
				3484	unsigned tid;
				3485	struct pid_namespace *ns;
				3486
				3487	if (!leader)
				3488	goto out_no_task;
				3489
				3490	tid = name_to_int(&dentry->d_name);
				3491	if (tid == ~0U)
				3492	goto out;
				3493
				3494	ns = dentry->d_sb->s_fs_info;
				3495	rcu_read_lock();
				3496	task = find_task_by_pid_ns(tid, ns);
				3497	if (task)
				3498	get_task_struct(task);
				3499	rcu_read_unlock();
				3500	if (!task)
				3501	goto out;
				3502	if (!same_thread_group(leader, task))
				3503	goto out_drop_task;
				3504
				3505	result = proc_task_instantiate(dir, dentry, task, NULL);
				3506	out_drop_task:
				3507	put_task_struct(task);
				3508	out:
				3509	put_task_struct(leader);
				3510	out_no_task:
				3511	return ERR_PTR(result);
				3512	}
				3513
				3514	/*
				3515	* Find the first tid of a thread group to return to user space.
				3516	*
				3517	* Usually this is just the thread group leader, but if the users
				3518	* buffer was too small or there was a seek into the middle of the
				3519	* directory we have more work todo.
				3520	*
				3521	* In the case of a short read we start with find_task_by_pid.
				3522	*
				3523	* In the case of a seek we start with the leader and walk nr
				3524	* threads past it.
				3525	*/
				3526	static struct task_struct first_tid(struct pid pid, int tid, loff_t f_pos,
				3527	struct pid_namespace *ns)
				3528	{
				3529	struct task_struct pos, task;
				3530	unsigned long nr = f_pos;
				3531
				3532	if (nr != f_pos) /* 32bit overflow? */
				3533	return NULL;
				3534
				3535	rcu_read_lock();
				3536	task = pid_task(pid, PIDTYPE_PID);
				3537	if (!task)
				3538	goto fail;
				3539
				3540	/* Attempt to start with the tid of a thread */
				3541	if (tid && nr) {
				3542	pos = find_task_by_pid_ns(tid, ns);
				3543	if (pos && same_thread_group(pos, task))
				3544	goto found;
				3545	}
				3546
				3547	/* If nr exceeds the number of threads there is nothing todo */
				3548	if (nr >= get_nr_threads(task))
				3549	goto fail;
				3550
				3551	/* If we haven't found our starting place yet start
				3552	* with the leader and walk nr threads forward.
				3553	*/
				3554	pos = task = task->group_leader;
				3555	do {
				3556	if (!nr--)
				3557	goto found;
				3558	} while_each_thread(task, pos);
				3559	fail:
				3560	pos = NULL;
				3561	goto out;
				3562	found:
				3563	get_task_struct(pos);
				3564	out:
				3565	rcu_read_unlock();
				3566	return pos;
				3567	}
				3568
				3569	/*
				3570	* Find the next thread in the thread list.
				3571	* Return NULL if there is an error or no next thread.
				3572	*
				3573	* The reference to the input task_struct is released.
				3574	*/
				3575	static struct task_struct next_tid(struct task_struct start)
				3576	{
				3577	struct task_struct *pos = NULL;
				3578	rcu_read_lock();
				3579	if (pid_alive(start)) {
				3580	pos = next_thread(start);
				3581	if (thread_group_leader(pos))
				3582	pos = NULL;
				3583	else
				3584	get_task_struct(pos);
				3585	}
				3586	rcu_read_unlock();
				3587	put_task_struct(start);
				3588	return pos;
				3589	}
				3590
				3591	/* for the /proc/TGID/task/ directories */
				3592	static int proc_task_readdir(struct file file, struct dir_context ctx)
				3593	{
				3594	struct inode *inode = file_inode(file);
				3595	struct task_struct *task;
				3596	struct pid_namespace *ns;
				3597	int tid;
				3598
				3599	if (proc_inode_is_dead(inode))
				3600	return -ENOENT;
				3601
				3602	if (!dir_emit_dots(file, ctx))
				3603	return 0;
				3604
				3605	/* f_version caches the tgid value that the last readdir call couldn't
				3606	* return. lseek aka telldir automagically resets f_version to 0.
				3607	*/
				3608	ns = inode->i_sb->s_fs_info;
				3609	tid = (int)file->f_version;
				3610	file->f_version = 0;
				3611	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
				3612	task;
				3613	task = next_tid(task), ctx->pos++) {
				3614	char name[PROC_NUMBUF];
				3615	int len;
				3616	tid = task_pid_nr_ns(task, ns);
				3617	len = snprintf(name, sizeof(name), "%d", tid);
				3618	if (!proc_fill_cache(file, ctx, name, len,
				3619	proc_task_instantiate, task, NULL)) {
				3620	/* returning this tgid failed, save it as the first
				3621	* pid for the next readir call */
				3622	file->f_version = (u64)tid;
				3623	put_task_struct(task);
				3624	break;
				3625	}
				3626	}
				3627
				3628	return 0;
				3629	}
				3630
				3631	static int proc_task_getattr(const struct path path, struct kstat stat,
				3632	u32 request_mask, unsigned int query_flags)
				3633	{
				3634	struct inode *inode = d_inode(path->dentry);
				3635	struct task_struct *p = get_proc_task(inode);
				3636	generic_fillattr(inode, stat);
				3637
				3638	if (p) {
				3639	stat->nlink += get_nr_threads(p);
				3640	put_task_struct(p);
				3641	}
				3642
				3643	return 0;
				3644	}
				3645
				3646	static const struct inode_operations proc_task_inode_operations = {
				3647	.lookup = proc_task_lookup,
				3648	.getattr = proc_task_getattr,
				3649	.setattr = proc_setattr,
				3650	.permission = proc_pid_permission,
				3651	};
				3652
				3653	static const struct file_operations proc_task_operations = {
				3654	.read = generic_read_dir,
				3655	.iterate_shared = proc_task_readdir,
				3656	.llseek = generic_file_llseek,
				3657	};
				3658
				3659	void __init set_proc_pid_nlink(void)
				3660	{
				3661	nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3662	nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3663	}