Blame - ap/os/linux/linux-3.4.x/fs/proc/base.c - T106_DC

blob: dbf2283e67e6697f2f237c1420c52f40fd95a67d [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* linux/fs/proc/base.c
				3	*
				4	* Copyright (C) 1991, 1992 Linus Torvalds
				5	*
				6	* proc base directory handling functions
				7	*
				8	* 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
				9	* Instead of using magical inumbers to determine the kind of object
				10	* we allocate and fill in-core inodes upon lookup. They don't even
				11	* go into icache. We cache the reference to task_struct upon lookup too.
				12	* Eventually it should become a filesystem in its own. We don't use the
				13	* rest of procfs anymore.
				14	*
				15	*
				16	* Changelog:
				17	* 17-Jan-2005
				18	* Allan Bezerra
				19	* Bruna Moreira <bruna.moreira@indt.org.br>
				20	* Edjard Mota <edjard.mota@indt.org.br>
				21	* Ilias Biris <ilias.biris@indt.org.br>
				22	* Mauricio Lin <mauricio.lin@indt.org.br>
				23	*
				24	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				25	*
				26	* A new process specific entry (smaps) included in /proc. It shows the
				27	* size of rss for each memory area. The maps entry lacks information
				28	* about physical memory size (rss) for each mapped file, i.e.,
				29	* rss information for executables and library files.
				30	* This additional information is useful for any tools that need to know
				31	* about physical memory consumption for a process specific library.
				32	*
				33	* Changelog:
				34	* 21-Feb-2005
				35	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
				36	* Pud inclusion in the page table walking.
				37	*
				38	* ChangeLog:
				39	* 10-Mar-2005
				40	* 10LE Instituto Nokia de Tecnologia - INdT:
				41	* A better way to walks through the page table as suggested by Hugh Dickins.
				42	*
				43	* Simo Piiroinen <simo.piiroinen@nokia.com>:
				44	* Smaps information related to shared, private, clean and dirty pages.
				45	*
				46	* Paul Mundt <paul.mundt@nokia.com>:
				47	* Overall revision about smaps.
				48	*/
				49
				50	#include <asm/uaccess.h>
				51
				52	#include <linux/errno.h>
				53	#include <linux/time.h>
				54	#include <linux/proc_fs.h>
				55	#include <linux/stat.h>
				56	#include <linux/task_io_accounting_ops.h>
				57	#include <linux/init.h>
				58	#include <linux/capability.h>
				59	#include <linux/file.h>
				60	#include <linux/fdtable.h>
				61	#include <linux/string.h>
				62	#include <linux/seq_file.h>
				63	#include <linux/namei.h>
				64	#include <linux/mnt_namespace.h>
				65	#include <linux/mm.h>
				66	#include <linux/swap.h>
				67	#include <linux/rcupdate.h>
				68	#include <linux/kallsyms.h>
				69	#include <linux/stacktrace.h>
				70	#include <linux/resource.h>
				71	#include <linux/module.h>
				72	#include <linux/mount.h>
				73	#include <linux/security.h>
				74	#include <linux/ptrace.h>
				75	#include <linux/tracehook.h>
				76	#include <linux/cgroup.h>
				77	#include <linux/cpuset.h>
				78	#include <linux/audit.h>
				79	#include <linux/poll.h>
				80	#include <linux/nsproxy.h>
				81	#include <linux/oom.h>
				82	#include <linux/elf.h>
				83	#include <linux/pid_namespace.h>
				84	#include <linux/fs_struct.h>
				85	#include <linux/slab.h>
				86	#include <linux/flex_array.h>
				87	#ifdef CONFIG_HARDWALL
				88	#include <asm/hardwall.h>
				89	#endif
				90	#include <trace/events/oom.h>
				91	#include "internal.h"
				92
				93	/* NOTE:
				94	* Implementing inode permission operations in /proc is almost
				95	* certainly an error. Permission checks need to happen during
				96	* each system call not at open time. The reason is that most of
				97	* what we wish to check for permissions in /proc varies at runtime.
				98	*
				99	* The classic example of a problem is opening file descriptors
				100	* in /proc for a task before it execs a suid executable.
				101	*/
				102
				103	struct pid_entry {
				104	char *name;
				105	int len;
				106	umode_t mode;
				107	const struct inode_operations *iop;
				108	const struct file_operations *fop;
				109	union proc_op op;
				110	};
				111
				112	#define NOD(NAME, MODE, IOP, FOP, OP) { \
				113	.name = (NAME), \
				114	.len = sizeof(NAME) - 1, \
				115	.mode = MODE, \
				116	.iop = IOP, \
				117	.fop = FOP, \
				118	.op = OP, \
				119	}
				120
				121	#define DIR(NAME, MODE, iops, fops) \
				122	NOD(NAME, (S_IFDIR\|(MODE)), &iops, &fops, {} )
				123	#define LNK(NAME, get_link) \
				124	NOD(NAME, (S_IFLNK\|S_IRWXUGO), \
				125	&proc_pid_link_inode_operations, NULL, \
				126	{ .proc_get_link = get_link } )
				127	#define REG(NAME, MODE, fops) \
				128	NOD(NAME, (S_IFREG\|(MODE)), NULL, &fops, {})
				129	#define INF(NAME, MODE, read) \
				130	NOD(NAME, (S_IFREG\|(MODE)), \
				131	NULL, &proc_info_file_operations, \
				132	{ .proc_read = read } )
				133	#define ONE(NAME, MODE, show) \
				134	NOD(NAME, (S_IFREG\|(MODE)), \
				135	NULL, &proc_single_file_operations, \
				136	{ .proc_show = show } )
				137
				138	static int proc_fd_permission(struct inode *inode, int mask);
				139
				140	/* ANDROID is for special files in /proc. */
				141	#define ANDROID(NAME, MODE, OTYPE) \
				142	NOD(NAME, (S_IFREG\|(MODE)), \
				143	&proc_##OTYPE##_inode_operations, \
				144	&proc_##OTYPE##_operations, {})
				145
				146	/*
				147	* Count the number of hardlinks for the pid_entry table, excluding the .
				148	* and .. links.
				149	*/
				150	static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
				151	unsigned int n)
				152	{
				153	unsigned int i;
				154	unsigned int count;
				155
				156	count = 0;
				157	for (i = 0; i < n; ++i) {
				158	if (S_ISDIR(entries[i].mode))
				159	++count;
				160	}
				161
				162	return count;
				163	}
				164
				165	static int get_task_root(struct task_struct task, struct path root)
				166	{
				167	int result = -ENOENT;
				168
				169	task_lock(task);
				170	if (task->fs) {
				171	get_fs_root(task->fs, root);
				172	result = 0;
				173	}
				174	task_unlock(task);
				175	return result;
				176	}
				177
				178	static int proc_cwd_link(struct dentry dentry, struct path path)
				179	{
				180	struct task_struct *task = get_proc_task(dentry->d_inode);
				181	int result = -ENOENT;
				182
				183	if (task) {
				184	task_lock(task);
				185	if (task->fs) {
				186	get_fs_pwd(task->fs, path);
				187	result = 0;
				188	}
				189	task_unlock(task);
				190	put_task_struct(task);
				191	}
				192	return result;
				193	}
				194
				195	static int proc_root_link(struct dentry dentry, struct path path)
				196	{
				197	struct task_struct *task = get_proc_task(dentry->d_inode);
				198	int result = -ENOENT;
				199
				200	if (task) {
				201	result = get_task_root(task, path);
				202	put_task_struct(task);
				203	}
				204	return result;
				205	}
				206
				207	struct mm_struct mm_for_maps(struct task_struct task)
				208	{
				209	return mm_access(task, PTRACE_MODE_READ);
				210	}
				211
				212	static int proc_pid_cmdline(struct task_struct task, char buffer)
				213	{
				214	int res = 0;
				215	unsigned int len;
				216	struct mm_struct *mm = get_task_mm(task);
				217	if (!mm)
				218	goto out;
				219	if (!mm->arg_end)
				220	goto out_mm; /* Shh! No looking before we're done */
				221
				222	len = mm->arg_end - mm->arg_start;
				223
				224	if (len > PAGE_SIZE)
				225	len = PAGE_SIZE;
				226
				227	res = access_process_vm(task, mm->arg_start, buffer, len, 0);
				228
				229	// If the nul at the end of args has been overwritten, then
				230	// assume application is using setproctitle(3).
				231	if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
				232	len = strnlen(buffer, res);
				233	if (len < res) {
				234	res = len;
				235	} else {
				236	len = mm->env_end - mm->env_start;
				237	if (len > PAGE_SIZE - res)
				238	len = PAGE_SIZE - res;
				239	res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
				240	res = strnlen(buffer, res);
				241	}
				242	}
				243	out_mm:
				244	mmput(mm);
				245	out:
				246	return res;
				247	}
				248
				249	static int proc_pid_auxv(struct task_struct task, char buffer)
				250	{
				251	struct mm_struct *mm = mm_for_maps(task);
				252	int res = PTR_ERR(mm);
				253	if (mm && !IS_ERR(mm)) {
				254	unsigned int nwords = 0;
				255	do {
				256	nwords += 2;
				257	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
				258	res = nwords * sizeof(mm->saved_auxv[0]);
				259	if (res > PAGE_SIZE)
				260	res = PAGE_SIZE;
				261	memcpy(buffer, mm->saved_auxv, res);
				262	mmput(mm);
				263	}
				264	return res;
				265	}
				266
				267
				268	#ifdef CONFIG_KALLSYMS
				269	/*
				270	* Provides a wchan file via kallsyms in a proper one-value-per-file format.
				271	* Returns the resolved symbol. If that fails, simply return the address.
				272	*/
				273	static int proc_pid_wchan(struct task_struct task, char buffer)
				274	{
				275	unsigned long wchan;
				276	char symname[KSYM_NAME_LEN];
				277
				278	wchan = get_wchan(task);
				279
				280	if (lookup_symbol_name(wchan, symname) < 0)
				281	if (!ptrace_may_access(task, PTRACE_MODE_READ))
				282	return 0;
				283	else
				284	return sprintf(buffer, "%lu", wchan);
				285	else
				286	return sprintf(buffer, "%s", symname);
				287	}
				288	#endif /* CONFIG_KALLSYMS */
				289
				290	static int lock_trace(struct task_struct *task)
				291	{
				292	int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
				293	if (err)
				294	return err;
				295	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
				296	mutex_unlock(&task->signal->cred_guard_mutex);
				297	return -EPERM;
				298	}
				299	return 0;
				300	}
				301
				302	static void unlock_trace(struct task_struct *task)
				303	{
				304	mutex_unlock(&task->signal->cred_guard_mutex);
				305	}
				306
				307	#ifdef CONFIG_STACKTRACE
				308
				309	#define MAX_STACK_TRACE_DEPTH 64
				310
				311	static int proc_pid_stack(struct seq_file m, struct pid_namespace ns,
				312	struct pid pid, struct task_struct task)
				313	{
				314	struct stack_trace trace;
				315	unsigned long *entries;
				316	int err;
				317	int i;
				318
				319	entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
				320	if (!entries)
				321	return -ENOMEM;
				322
				323	trace.nr_entries = 0;
				324	trace.max_entries = MAX_STACK_TRACE_DEPTH;
				325	trace.entries = entries;
				326	trace.skip = 0;
				327
				328	err = lock_trace(task);
				329	if (!err) {
				330	save_stack_trace_tsk(task, &trace);
				331
				332	for (i = 0; i < trace.nr_entries; i++) {
				333	seq_printf(m, "[<%pK>] %pS\n",
				334	(void )entries[i], (void )entries[i]);
				335	}
				336	unlock_trace(task);
				337	}
				338	kfree(entries);
				339
				340	return err;
				341	}
				342	#endif
				343
				344	#ifdef CONFIG_SCHEDSTATS
				345	/*
				346	* Provides /proc/PID/schedstat
				347	*/
				348	static int proc_pid_schedstat(struct task_struct task, char buffer)
				349	{
				350	return sprintf(buffer, "%llu %llu %lu\n",
				351	(unsigned long long)task->se.sum_exec_runtime,
				352	(unsigned long long)task->sched_info.run_delay,
				353	task->sched_info.pcount);
				354	}
				355	#endif
				356
				357	#ifdef CONFIG_LATENCYTOP
				358	static int lstats_show_proc(struct seq_file m, void v)
				359	{
				360	int i;
				361	struct inode *inode = m->private;
				362	struct task_struct *task = get_proc_task(inode);
				363
				364	if (!task)
				365	return -ESRCH;
				366	seq_puts(m, "Latency Top version : v0.1\n");
				367	for (i = 0; i < 32; i++) {
				368	struct latency_record *lr = &task->latency_record[i];
				369	if (lr->backtrace[0]) {
				370	int q;
				371	seq_printf(m, "%i %li %li",
				372	lr->count, lr->time, lr->max);
				373	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
				374	unsigned long bt = lr->backtrace[q];
				375	if (!bt)
				376	break;
				377	if (bt == ULONG_MAX)
				378	break;
				379	seq_printf(m, " %ps", (void *)bt);
				380	}
				381	seq_putc(m, '\n');
				382	}
				383
				384	}
				385	put_task_struct(task);
				386	return 0;
				387	}
				388
				389	static int lstats_open(struct inode inode, struct file file)
				390	{
				391	return single_open(file, lstats_show_proc, inode);
				392	}
				393
				394	static ssize_t lstats_write(struct file file, const char __user buf,
				395	size_t count, loff_t *offs)
				396	{
				397	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
				398
				399	if (!task)
				400	return -ESRCH;
				401	clear_all_latency_tracing(task);
				402	put_task_struct(task);
				403
				404	return count;
				405	}
				406
				407	static const struct file_operations proc_lstats_operations = {
				408	.open = lstats_open,
				409	.read = seq_read,
				410	.write = lstats_write,
				411	.llseek = seq_lseek,
				412	.release = single_release,
				413	};
				414
				415	#endif
				416
				417	static int proc_oom_score(struct task_struct task, char buffer)
				418	{
				419	unsigned long points = 0;
				420
				421	read_lock(&tasklist_lock);
				422	if (pid_alive(task))
				423	points = oom_badness(task, NULL, NULL,
				424	totalram_pages + total_swap_pages);
				425	read_unlock(&tasklist_lock);
				426	return sprintf(buffer, "%lu\n", points);
				427	}
				428
				429	struct limit_names {
				430	char *name;
				431	char *unit;
				432	};
				433
				434	static const struct limit_names lnames[RLIM_NLIMITS] = {
				435	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
				436	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
				437	[RLIMIT_DATA] = {"Max data size", "bytes"},
				438	[RLIMIT_STACK] = {"Max stack size", "bytes"},
				439	[RLIMIT_CORE] = {"Max core file size", "bytes"},
				440	[RLIMIT_RSS] = {"Max resident set", "bytes"},
				441	[RLIMIT_NPROC] = {"Max processes", "processes"},
				442	[RLIMIT_NOFILE] = {"Max open files", "files"},
				443	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
				444	[RLIMIT_AS] = {"Max address space", "bytes"},
				445	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
				446	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
				447	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
				448	[RLIMIT_NICE] = {"Max nice priority", NULL},
				449	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
				450	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
				451	};
				452
				453	/* Display limits for a process */
				454	static int proc_pid_limits(struct task_struct task, char buffer)
				455	{
				456	unsigned int i;
				457	int count = 0;
				458	unsigned long flags;
				459	char *bufptr = buffer;
				460
				461	struct rlimit rlim[RLIM_NLIMITS];
				462
				463	if (!lock_task_sighand(task, &flags))
				464	return 0;
				465	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
				466	unlock_task_sighand(task, &flags);
				467
				468	/*
				469	* print the file header
				470	*/
				471	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
				472	"Limit", "Soft Limit", "Hard Limit", "Units");
				473
				474	for (i = 0; i < RLIM_NLIMITS; i++) {
				475	if (rlim[i].rlim_cur == RLIM_INFINITY)
				476	count += sprintf(&bufptr[count], "%-25s %-20s ",
				477	lnames[i].name, "unlimited");
				478	else
				479	count += sprintf(&bufptr[count], "%-25s %-20lu ",
				480	lnames[i].name, rlim[i].rlim_cur);
				481
				482	if (rlim[i].rlim_max == RLIM_INFINITY)
				483	count += sprintf(&bufptr[count], "%-20s ", "unlimited");
				484	else
				485	count += sprintf(&bufptr[count], "%-20lu ",
				486	rlim[i].rlim_max);
				487
				488	if (lnames[i].unit)
				489	count += sprintf(&bufptr[count], "%-10s\n",
				490	lnames[i].unit);
				491	else
				492	count += sprintf(&bufptr[count], "\n");
				493	}
				494
				495	return count;
				496	}
				497
				498	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				499	static int proc_pid_syscall(struct task_struct task, char buffer)
				500	{
				501	long nr;
				502	unsigned long args[6], sp, pc;
				503	int res = lock_trace(task);
				504	if (res)
				505	return res;
				506
				507	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
				508	res = sprintf(buffer, "running\n");
				509	else if (nr < 0)
				510	res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
				511	else
				512	res = sprintf(buffer,
				513	"%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
				514	nr,
				515	args[0], args[1], args[2], args[3], args[4], args[5],
				516	sp, pc);
				517	unlock_trace(task);
				518	return res;
				519	}
				520	#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
				521
				522	/************************************************************************/
				523	/* Here the fs part begins */
				524	/************************************************************************/
				525
				526	/* permission checks */
				527	static int proc_fd_access_allowed(struct inode *inode)
				528	{
				529	struct task_struct *task;
				530	int allowed = 0;
				531	/* Allow access to a task's file descriptors if it is us or we
				532	* may use ptrace attach to the process and find out that
				533	* information.
				534	*/
				535	task = get_proc_task(inode);
				536	if (task) {
				537	allowed = ptrace_may_access(task, PTRACE_MODE_READ);
				538	put_task_struct(task);
				539	}
				540	return allowed;
				541	}
				542
				543	int proc_setattr(struct dentry dentry, struct iattr attr)
				544	{
				545	int error;
				546	struct inode *inode = dentry->d_inode;
				547
				548	if (attr->ia_valid & ATTR_MODE)
				549	return -EPERM;
				550
				551	error = inode_change_ok(inode, attr);
				552	if (error)
				553	return error;
				554
				555	if ((attr->ia_valid & ATTR_SIZE) &&
				556	attr->ia_size != i_size_read(inode)) {
				557	error = vmtruncate(inode, attr->ia_size);
				558	if (error)
				559	return error;
				560	}
				561
				562	setattr_copy(inode, attr);
				563	mark_inode_dirty(inode);
				564	return 0;
				565	}
				566
				567	/*
				568	* May current process learn task's sched/cmdline info (for hide_pid_min=1)
				569	* or euid/egid (for hide_pid_min=2)?
				570	*/
				571	static bool has_pid_permissions(struct pid_namespace *pid,
				572	struct task_struct *task,
				573	int hide_pid_min)
				574	{
				575	if (pid->hide_pid < hide_pid_min)
				576	return true;
				577	if (in_group_p(pid->pid_gid))
				578	return true;
				579	return ptrace_may_access(task, PTRACE_MODE_READ);
				580	}
				581
				582
				583	static int proc_pid_permission(struct inode *inode, int mask)
				584	{
				585	struct pid_namespace *pid = inode->i_sb->s_fs_info;
				586	struct task_struct *task;
				587	bool has_perms;
				588
				589	task = get_proc_task(inode);
				590	if (!task)
				591	return -ESRCH;
				592	has_perms = has_pid_permissions(pid, task, 1);
				593	put_task_struct(task);
				594
				595	if (!has_perms) {
				596	if (pid->hide_pid == 2) {
				597	/*
				598	* Let's make getdents(), stat(), and open()
				599	* consistent with each other. If a process
				600	* may not stat() a file, it shouldn't be seen
				601	* in procfs at all.
				602	*/
				603	return -ENOENT;
				604	}
				605
				606	return -EPERM;
				607	}
				608	return generic_permission(inode, mask);
				609	}
				610
				611
				612
				613	static const struct inode_operations proc_def_inode_operations = {
				614	.setattr = proc_setattr,
				615	};
				616
				617	#define PROC_BLOCK_SIZE (31024) / 4K page size but our output routines use some slack for overruns */
				618
				619	static ssize_t proc_info_read(struct file * file, char __user * buf,
				620	size_t count, loff_t *ppos)
				621	{
				622	struct inode * inode = file->f_path.dentry->d_inode;
				623	unsigned long page;
				624	ssize_t length;
				625	struct task_struct *task = get_proc_task(inode);
				626
				627	length = -ESRCH;
				628	if (!task)
				629	goto out_no_task;
				630
				631	if (count > PROC_BLOCK_SIZE)
				632	count = PROC_BLOCK_SIZE;
				633
				634	length = -ENOMEM;
				635	if (!(page = __get_free_page(GFP_TEMPORARY)))
				636	goto out;
				637
				638	length = PROC_I(inode)->op.proc_read(task, (char*)page);
				639
				640	if (length >= 0)
				641	length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
				642	free_page(page);
				643	out:
				644	put_task_struct(task);
				645	out_no_task:
				646	return length;
				647	}
				648
				649	static const struct file_operations proc_info_file_operations = {
				650	.read = proc_info_read,
				651	.llseek = generic_file_llseek,
				652	};
				653
				654	static int proc_single_show(struct seq_file m, void v)
				655	{
				656	struct inode *inode = m->private;
				657	struct pid_namespace *ns;
				658	struct pid *pid;
				659	struct task_struct *task;
				660	int ret;
				661
				662	ns = inode->i_sb->s_fs_info;
				663	pid = proc_pid(inode);
				664	task = get_pid_task(pid, PIDTYPE_PID);
				665	if (!task)
				666	return -ESRCH;
				667
				668	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
				669
				670	put_task_struct(task);
				671	return ret;
				672	}
				673
				674	static int proc_single_open(struct inode inode, struct file filp)
				675	{
				676	return single_open(filp, proc_single_show, inode);
				677	}
				678
				679	static const struct file_operations proc_single_file_operations = {
				680	.open = proc_single_open,
				681	.read = seq_read,
				682	.llseek = seq_lseek,
				683	.release = single_release,
				684	};
				685
				686	static int mem_open(struct inode* inode, struct file* file)
				687	{
				688	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
				689	struct mm_struct *mm;
				690
				691	if (!task)
				692	return -ESRCH;
				693
				694	mm = mm_access(task, PTRACE_MODE_ATTACH);
				695	put_task_struct(task);
				696
				697	if (IS_ERR(mm))
				698	return PTR_ERR(mm);
				699
				700	if (mm) {
				701	/* ensure this mm_struct can't be freed */
				702	atomic_inc(&mm->mm_count);
				703	/* but do not pin its memory */
				704	mmput(mm);
				705	}
				706
				707	/* OK to pass negative loff_t, we can catch out-of-range */
				708	file->f_mode \|= FMODE_UNSIGNED_OFFSET;
				709	file->private_data = mm;
				710
				711	return 0;
				712	}
				713
				714	static ssize_t mem_rw(struct file file, char __user buf,
				715	size_t count, loff_t *ppos, int write)
				716	{
				717	struct mm_struct *mm = file->private_data;
				718	unsigned long addr = *ppos;
				719	ssize_t copied;
				720	char *page;
				721
				722	if (!mm)
				723	return 0;
				724
				725	page = (char *)__get_free_page(GFP_TEMPORARY);
				726	if (!page)
				727	return -ENOMEM;
				728
				729	copied = 0;
				730	if (!atomic_inc_not_zero(&mm->mm_users))
				731	goto free;
				732
				733	while (count > 0) {
				734	int this_len = min_t(int, count, PAGE_SIZE);
				735
				736	if (write && copy_from_user(page, buf, this_len)) {
				737	copied = -EFAULT;
				738	break;
				739	}
				740
				741	this_len = access_remote_vm(mm, addr, page, this_len, write);
				742	if (!this_len) {
				743	if (!copied)
				744	copied = -EIO;
				745	break;
				746	}
				747
				748	if (!write && copy_to_user(buf, page, this_len)) {
				749	copied = -EFAULT;
				750	break;
				751	}
				752
				753	buf += this_len;
				754	addr += this_len;
				755	copied += this_len;
				756	count -= this_len;
				757	}
				758	*ppos = addr;
				759
				760	mmput(mm);
				761	free:
				762	free_page((unsigned long) page);
				763	return copied;
				764	}
				765
				766	static ssize_t mem_read(struct file file, char __user buf,
				767	size_t count, loff_t *ppos)
				768	{
				769	return mem_rw(file, buf, count, ppos, 0);
				770	}
				771
				772	static ssize_t mem_write(struct file file, const char __user buf,
				773	size_t count, loff_t *ppos)
				774	{
				775	return mem_rw(file, (char __user*)buf, count, ppos, 1);
				776	}
				777
				778	loff_t mem_lseek(struct file *file, loff_t offset, int orig)
				779	{
				780	switch (orig) {
				781	case 0:
				782	file->f_pos = offset;
				783	break;
				784	case 1:
				785	file->f_pos += offset;
				786	break;
				787	default:
				788	return -EINVAL;
				789	}
				790	force_successful_syscall_return();
				791	return file->f_pos;
				792	}
				793
				794	static int mem_release(struct inode inode, struct file file)
				795	{
				796	struct mm_struct *mm = file->private_data;
				797	if (mm)
				798	mmdrop(mm);
				799	return 0;
				800	}
				801
				802	static const struct file_operations proc_mem_operations = {
				803	.llseek = mem_lseek,
				804	.read = mem_read,
				805	.write = mem_write,
				806	.open = mem_open,
				807	.release = mem_release,
				808	};
				809
				810	static ssize_t environ_read(struct file file, char __user buf,
				811	size_t count, loff_t *ppos)
				812	{
				813	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
				814	char *page;
				815	unsigned long src = *ppos;
				816	int ret = -ESRCH;
				817	struct mm_struct *mm;
				818
				819	if (!task)
				820	goto out_no_task;
				821
				822	ret = -ENOMEM;
				823	page = (char *)__get_free_page(GFP_TEMPORARY);
				824	if (!page)
				825	goto out;
				826
				827
				828	mm = mm_for_maps(task);
				829	ret = PTR_ERR(mm);
				830	if (!mm \|\| IS_ERR(mm))
				831	goto out_free;
				832
				833	ret = 0;
				834	while (count > 0) {
				835	int this_len, retval, max_len;
				836
				837	this_len = mm->env_end - (mm->env_start + src);
				838
				839	if (this_len <= 0)
				840	break;
				841
				842	max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
				843	this_len = (this_len > max_len) ? max_len : this_len;
				844
				845	retval = access_process_vm(task, (mm->env_start + src),
				846	page, this_len, 0);
				847
				848	if (retval <= 0) {
				849	ret = retval;
				850	break;
				851	}
				852
				853	if (copy_to_user(buf, page, retval)) {
				854	ret = -EFAULT;
				855	break;
				856	}
				857
				858	ret += retval;
				859	src += retval;
				860	buf += retval;
				861	count -= retval;
				862	}
				863	*ppos = src;
				864
				865	mmput(mm);
				866	out_free:
				867	free_page((unsigned long) page);
				868	out:
				869	put_task_struct(task);
				870	out_no_task:
				871	return ret;
				872	}
				873
				874	static const struct file_operations proc_environ_operations = {
				875	.read = environ_read,
				876	.llseek = generic_file_llseek,
				877	};
				878
				879	static ssize_t oom_adjust_read(struct file file, char __user buf,
				880	size_t count, loff_t *ppos)
				881	{
				882	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
				883	char buffer[PROC_NUMBUF];
				884	size_t len;
				885	int oom_adjust = OOM_DISABLE;
				886	unsigned long flags;
				887
				888	if (!task)
				889	return -ESRCH;
				890
				891	if (lock_task_sighand(task, &flags)) {
				892	oom_adjust = task->signal->oom_adj;
				893	unlock_task_sighand(task, &flags);
				894	}
				895
				896	put_task_struct(task);
				897
				898	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
				899
				900	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				901	}
				902
				903	static ssize_t oom_adjust_write(struct file file, const char __user buf,
				904	size_t count, loff_t *ppos)
				905	{
				906	struct task_struct *task;
				907	char buffer[PROC_NUMBUF];
				908	int oom_adjust;
				909	unsigned long flags;
				910	int err;
				911
				912	memset(buffer, 0, sizeof(buffer));
				913	if (count > sizeof(buffer) - 1)
				914	count = sizeof(buffer) - 1;
				915	if (copy_from_user(buffer, buf, count)) {
				916	err = -EFAULT;
				917	goto out;
				918	}
				919
				920	err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
				921	if (err)
				922	goto out;
				923	if ((oom_adjust < OOM_ADJUST_MIN \|\| oom_adjust > OOM_ADJUST_MAX) &&
				924	oom_adjust != OOM_DISABLE) {
				925	err = -EINVAL;
				926	goto out;
				927	}
				928
				929	task = get_proc_task(file->f_path.dentry->d_inode);
				930	if (!task) {
				931	err = -ESRCH;
				932	goto out;
				933	}
				934
				935	task_lock(task);
				936	if (!task->mm) {
				937	err = -EINVAL;
				938	goto err_task_lock;
				939	}
				940
				941	if (!lock_task_sighand(task, &flags)) {
				942	err = -ESRCH;
				943	goto err_task_lock;
				944	}
				945
				946	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
				947	err = -EACCES;
				948	goto err_sighand;
				949	}
				950
				951	/*
				952	* Warn that /proc/pid/oom_adj is deprecated, see
				953	* Documentation/feature-removal-schedule.txt.
				954	*/
				955	printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
				956	current->comm, task_pid_nr(current), task_pid_nr(task),
				957	task_pid_nr(task));
				958	task->signal->oom_adj = oom_adjust;
				959	/*
				960	* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
				961	* value is always attainable.
				962	*/
				963	if (task->signal->oom_adj == OOM_ADJUST_MAX)
				964	task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
				965	else
				966	task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
				967	-OOM_DISABLE;
				968	trace_oom_score_adj_update(task);
				969	err_sighand:
				970	unlock_task_sighand(task, &flags);
				971	err_task_lock:
				972	task_unlock(task);
				973	put_task_struct(task);
				974	out:
				975	return err < 0 ? err : count;
				976	}
				977
				978	static int oom_adjust_permission(struct inode *inode, int mask)
				979	{
				980	uid_t uid;
				981	struct task_struct *p;
				982
				983	p = get_proc_task(inode);
				984	if(p) {
				985	uid = task_uid(p);
				986	put_task_struct(p);
				987	}
				988
				989	/*
				990	* System Server (uid == 1000) is granted access to oom_adj of all
				991	* android applications (uid > 10000) as and services (uid >= 1000)
				992	*/
				993	if (p && (current_fsuid() == 1000) && (uid >= 1000)) {
				994	if (inode->i_mode >> 6 & mask) {
				995	return 0;
				996	}
				997	}
				998
				999	/* Fall back to default. */
				1000	return generic_permission(inode, mask);
				1001	}
				1002
				1003	static const struct inode_operations proc_oom_adjust_inode_operations = {
				1004	.permission = oom_adjust_permission,
				1005	};
				1006
				1007	static const struct file_operations proc_oom_adjust_operations = {
				1008	.read = oom_adjust_read,
				1009	.write = oom_adjust_write,
				1010	.llseek = generic_file_llseek,
				1011	};
				1012
				1013	static ssize_t oom_score_adj_read(struct file file, char __user buf,
				1014	size_t count, loff_t *ppos)
				1015	{
				1016	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
				1017	char buffer[PROC_NUMBUF];
				1018	int oom_score_adj = OOM_SCORE_ADJ_MIN;
				1019	unsigned long flags;
				1020	size_t len;
				1021
				1022	if (!task)
				1023	return -ESRCH;
				1024	if (lock_task_sighand(task, &flags)) {
				1025	oom_score_adj = task->signal->oom_score_adj;
				1026	unlock_task_sighand(task, &flags);
				1027	}
				1028	put_task_struct(task);
				1029	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
				1030	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1031	}
				1032
				1033	static ssize_t oom_score_adj_write(struct file file, const char __user buf,
				1034	size_t count, loff_t *ppos)
				1035	{
				1036	struct task_struct *task;
				1037	char buffer[PROC_NUMBUF];
				1038	unsigned long flags;
				1039	int oom_score_adj;
				1040	int err;
				1041
				1042	memset(buffer, 0, sizeof(buffer));
				1043	if (count > sizeof(buffer) - 1)
				1044	count = sizeof(buffer) - 1;
				1045	if (copy_from_user(buffer, buf, count)) {
				1046	err = -EFAULT;
				1047	goto out;
				1048	}
				1049
				1050	err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
				1051	if (err)
				1052	goto out;
				1053	if (oom_score_adj < OOM_SCORE_ADJ_MIN \|\|
				1054	oom_score_adj > OOM_SCORE_ADJ_MAX) {
				1055	err = -EINVAL;
				1056	goto out;
				1057	}
				1058
				1059	task = get_proc_task(file->f_path.dentry->d_inode);
				1060	if (!task) {
				1061	err = -ESRCH;
				1062	goto out;
				1063	}
				1064
				1065	task_lock(task);
				1066	if (!task->mm) {
				1067	err = -EINVAL;
				1068	goto err_task_lock;
				1069	}
				1070
				1071	if (!lock_task_sighand(task, &flags)) {
				1072	err = -ESRCH;
				1073	goto err_task_lock;
				1074	}
				1075
				1076	if (oom_score_adj < task->signal->oom_score_adj_min &&
				1077	!capable(CAP_SYS_RESOURCE)) {
				1078	err = -EACCES;
				1079	goto err_sighand;
				1080	}
				1081
				1082	task->signal->oom_score_adj = oom_score_adj;
				1083	if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
				1084	task->signal->oom_score_adj_min = oom_score_adj;
				1085	trace_oom_score_adj_update(task);
				1086	/*
				1087	* Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
				1088	* always attainable.
				1089	*/
				1090	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
				1091	task->signal->oom_adj = OOM_DISABLE;
				1092	else
				1093	task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
				1094	OOM_SCORE_ADJ_MAX;
				1095	err_sighand:
				1096	unlock_task_sighand(task, &flags);
				1097	err_task_lock:
				1098	task_unlock(task);
				1099	put_task_struct(task);
				1100	out:
				1101	return err < 0 ? err : count;
				1102	}
				1103
				1104	static const struct file_operations proc_oom_score_adj_operations = {
				1105	.read = oom_score_adj_read,
				1106	.write = oom_score_adj_write,
				1107	.llseek = default_llseek,
				1108	};
				1109
				1110	#ifdef CONFIG_AUDITSYSCALL
				1111	#define TMPBUFLEN 21
				1112	static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
				1113	size_t count, loff_t *ppos)
				1114	{
				1115	struct inode * inode = file->f_path.dentry->d_inode;
				1116	struct task_struct *task = get_proc_task(inode);
				1117	ssize_t length;
				1118	char tmpbuf[TMPBUFLEN];
				1119
				1120	if (!task)
				1121	return -ESRCH;
				1122	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1123	audit_get_loginuid(task));
				1124	put_task_struct(task);
				1125	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1126	}
				1127
				1128	static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
				1129	size_t count, loff_t *ppos)
				1130	{
				1131	struct inode * inode = file->f_path.dentry->d_inode;
				1132	char page, tmp;
				1133	ssize_t length;
				1134	uid_t loginuid;
				1135
				1136	rcu_read_lock();
				1137	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
				1138	rcu_read_unlock();
				1139	return -EPERM;
				1140	}
				1141	rcu_read_unlock();
				1142
				1143	if (count >= PAGE_SIZE)
				1144	count = PAGE_SIZE - 1;
				1145
				1146	if (*ppos != 0) {
				1147	/* No partial writes. */
				1148	return -EINVAL;
				1149	}
				1150	page = (char*)__get_free_page(GFP_TEMPORARY);
				1151	if (!page)
				1152	return -ENOMEM;
				1153	length = -EFAULT;
				1154	if (copy_from_user(page, buf, count))
				1155	goto out_free_page;
				1156
				1157	page[count] = '\0';
				1158	loginuid = simple_strtoul(page, &tmp, 10);
				1159	if (tmp == page) {
				1160	length = -EINVAL;
				1161	goto out_free_page;
				1162
				1163	}
				1164	length = audit_set_loginuid(loginuid);
				1165	if (likely(length == 0))
				1166	length = count;
				1167
				1168	out_free_page:
				1169	free_page((unsigned long) page);
				1170	return length;
				1171	}
				1172
				1173	static const struct file_operations proc_loginuid_operations = {
				1174	.read = proc_loginuid_read,
				1175	.write = proc_loginuid_write,
				1176	.llseek = generic_file_llseek,
				1177	};
				1178
				1179	static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
				1180	size_t count, loff_t *ppos)
				1181	{
				1182	struct inode * inode = file->f_path.dentry->d_inode;
				1183	struct task_struct *task = get_proc_task(inode);
				1184	ssize_t length;
				1185	char tmpbuf[TMPBUFLEN];
				1186
				1187	if (!task)
				1188	return -ESRCH;
				1189	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				1190	audit_get_sessionid(task));
				1191	put_task_struct(task);
				1192	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
				1193	}
				1194
				1195	static const struct file_operations proc_sessionid_operations = {
				1196	.read = proc_sessionid_read,
				1197	.llseek = generic_file_llseek,
				1198	};
				1199	#endif
				1200
				1201	#ifdef CONFIG_FAULT_INJECTION
				1202	static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
				1203	size_t count, loff_t *ppos)
				1204	{
				1205	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
				1206	char buffer[PROC_NUMBUF];
				1207	size_t len;
				1208	int make_it_fail;
				1209
				1210	if (!task)
				1211	return -ESRCH;
				1212	make_it_fail = task->make_it_fail;
				1213	put_task_struct(task);
				1214
				1215	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
				1216
				1217	return simple_read_from_buffer(buf, count, ppos, buffer, len);
				1218	}
				1219
				1220	static ssize_t proc_fault_inject_write(struct file * file,
				1221	const char __user * buf, size_t count, loff_t *ppos)
				1222	{
				1223	struct task_struct *task;
				1224	char buffer[PROC_NUMBUF], *end;
				1225	int make_it_fail;
				1226
				1227	if (!capable(CAP_SYS_RESOURCE))
				1228	return -EPERM;
				1229	memset(buffer, 0, sizeof(buffer));
				1230	if (count > sizeof(buffer) - 1)
				1231	count = sizeof(buffer) - 1;
				1232	if (copy_from_user(buffer, buf, count))
				1233	return -EFAULT;
				1234	make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
				1235	if (*end)
				1236	return -EINVAL;
				1237	task = get_proc_task(file->f_dentry->d_inode);
				1238	if (!task)
				1239	return -ESRCH;
				1240	task->make_it_fail = make_it_fail;
				1241	put_task_struct(task);
				1242
				1243	return count;
				1244	}
				1245
				1246	static const struct file_operations proc_fault_inject_operations = {
				1247	.read = proc_fault_inject_read,
				1248	.write = proc_fault_inject_write,
				1249	.llseek = generic_file_llseek,
				1250	};
				1251	#endif
				1252
				1253
				1254	#ifdef CONFIG_SCHED_DEBUG
				1255	/*
				1256	* Print out various scheduling related per-task fields:
				1257	*/
				1258	static int sched_show(struct seq_file m, void v)
				1259	{
				1260	struct inode *inode = m->private;
				1261	struct task_struct *p;
				1262
				1263	p = get_proc_task(inode);
				1264	if (!p)
				1265	return -ESRCH;
				1266	proc_sched_show_task(p, m);
				1267
				1268	put_task_struct(p);
				1269
				1270	return 0;
				1271	}
				1272
				1273	static ssize_t
				1274	sched_write(struct file file, const char __user buf,
				1275	size_t count, loff_t *offset)
				1276	{
				1277	struct inode *inode = file->f_path.dentry->d_inode;
				1278	struct task_struct *p;
				1279
				1280	p = get_proc_task(inode);
				1281	if (!p)
				1282	return -ESRCH;
				1283	proc_sched_set_task(p);
				1284
				1285	put_task_struct(p);
				1286
				1287	return count;
				1288	}
				1289
				1290	static int sched_open(struct inode inode, struct file filp)
				1291	{
				1292	return single_open(filp, sched_show, inode);
				1293	}
				1294
				1295	static const struct file_operations proc_pid_sched_operations = {
				1296	.open = sched_open,
				1297	.read = seq_read,
				1298	.write = sched_write,
				1299	.llseek = seq_lseek,
				1300	.release = single_release,
				1301	};
				1302
				1303	#endif
				1304
				1305	#ifdef CONFIG_SCHED_AUTOGROUP
				1306	/*
				1307	* Print out autogroup related information:
				1308	*/
				1309	static int sched_autogroup_show(struct seq_file m, void v)
				1310	{
				1311	struct inode *inode = m->private;
				1312	struct task_struct *p;
				1313
				1314	p = get_proc_task(inode);
				1315	if (!p)
				1316	return -ESRCH;
				1317	proc_sched_autogroup_show_task(p, m);
				1318
				1319	put_task_struct(p);
				1320
				1321	return 0;
				1322	}
				1323
				1324	static ssize_t
				1325	sched_autogroup_write(struct file file, const char __user buf,
				1326	size_t count, loff_t *offset)
				1327	{
				1328	struct inode *inode = file->f_path.dentry->d_inode;
				1329	struct task_struct *p;
				1330	char buffer[PROC_NUMBUF];
				1331	int nice;
				1332	int err;
				1333
				1334	memset(buffer, 0, sizeof(buffer));
				1335	if (count > sizeof(buffer) - 1)
				1336	count = sizeof(buffer) - 1;
				1337	if (copy_from_user(buffer, buf, count))
				1338	return -EFAULT;
				1339
				1340	err = kstrtoint(strstrip(buffer), 0, &nice);
				1341	if (err < 0)
				1342	return err;
				1343
				1344	p = get_proc_task(inode);
				1345	if (!p)
				1346	return -ESRCH;
				1347
				1348	err = proc_sched_autogroup_set_nice(p, nice);
				1349	if (err)
				1350	count = err;
				1351
				1352	put_task_struct(p);
				1353
				1354	return count;
				1355	}
				1356
				1357	static int sched_autogroup_open(struct inode inode, struct file filp)
				1358	{
				1359	int ret;
				1360
				1361	ret = single_open(filp, sched_autogroup_show, NULL);
				1362	if (!ret) {
				1363	struct seq_file *m = filp->private_data;
				1364
				1365	m->private = inode;
				1366	}
				1367	return ret;
				1368	}
				1369
				1370	static const struct file_operations proc_pid_sched_autogroup_operations = {
				1371	.open = sched_autogroup_open,
				1372	.read = seq_read,
				1373	.write = sched_autogroup_write,
				1374	.llseek = seq_lseek,
				1375	.release = single_release,
				1376	};
				1377
				1378	#endif /* CONFIG_SCHED_AUTOGROUP */
				1379
				1380	static ssize_t comm_write(struct file file, const char __user buf,
				1381	size_t count, loff_t *offset)
				1382	{
				1383	struct inode *inode = file->f_path.dentry->d_inode;
				1384	struct task_struct *p;
				1385	char buffer[TASK_COMM_LEN];
				1386
				1387	memset(buffer, 0, sizeof(buffer));
				1388	if (count > sizeof(buffer) - 1)
				1389	count = sizeof(buffer) - 1;
				1390	if (copy_from_user(buffer, buf, count))
				1391	return -EFAULT;
				1392
				1393	p = get_proc_task(inode);
				1394	if (!p)
				1395	return -ESRCH;
				1396
				1397	if (same_thread_group(current, p))
				1398	set_task_comm(p, buffer);
				1399	else
				1400	count = -EINVAL;
				1401
				1402	put_task_struct(p);
				1403
				1404	return count;
				1405	}
				1406
				1407	static int comm_show(struct seq_file m, void v)
				1408	{
				1409	struct inode *inode = m->private;
				1410	struct task_struct *p;
				1411
				1412	p = get_proc_task(inode);
				1413	if (!p)
				1414	return -ESRCH;
				1415
				1416	task_lock(p);
				1417	seq_printf(m, "%s\n", p->comm);
				1418	task_unlock(p);
				1419
				1420	put_task_struct(p);
				1421
				1422	return 0;
				1423	}
				1424
				1425	static int comm_open(struct inode inode, struct file filp)
				1426	{
				1427	return single_open(filp, comm_show, inode);
				1428	}
				1429
				1430	static const struct file_operations proc_pid_set_comm_operations = {
				1431	.open = comm_open,
				1432	.read = seq_read,
				1433	.write = comm_write,
				1434	.llseek = seq_lseek,
				1435	.release = single_release,
				1436	};
				1437
				1438	static int proc_exe_link(struct dentry dentry, struct path exe_path)
				1439	{
				1440	struct task_struct *task;
				1441	struct mm_struct *mm;
				1442	struct file *exe_file;
				1443
				1444	task = get_proc_task(dentry->d_inode);
				1445	if (!task)
				1446	return -ENOENT;
				1447	mm = get_task_mm(task);
				1448	put_task_struct(task);
				1449	if (!mm)
				1450	return -ENOENT;
				1451	exe_file = get_mm_exe_file(mm);
				1452	mmput(mm);
				1453	if (exe_file) {
				1454	*exe_path = exe_file->f_path;
				1455	path_get(&exe_file->f_path);
				1456	fput(exe_file);
				1457	return 0;
				1458	} else
				1459	return -ENOENT;
				1460	}
				1461
				1462	static void proc_pid_follow_link(struct dentry dentry, struct nameidata *nd)
				1463	{
				1464	struct inode *inode = dentry->d_inode;
				1465	int error = -EACCES;
				1466
				1467	/* We don't need a base pointer in the /proc filesystem */
				1468	path_put(&nd->path);
				1469
				1470	/* Are we allowed to snoop on the tasks file descriptors? */
				1471	if (!proc_fd_access_allowed(inode))
				1472	goto out;
				1473
				1474	error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
				1475	out:
				1476	return ERR_PTR(error);
				1477	}
				1478
				1479	static int do_proc_readlink(struct path path, char __user buffer, int buflen)
				1480	{
				1481	char tmp = (char)__get_free_page(GFP_TEMPORARY);
				1482	char *pathname;
				1483	int len;
				1484
				1485	if (!tmp)
				1486	return -ENOMEM;
				1487
				1488	pathname = d_path(path, tmp, PAGE_SIZE);
				1489	len = PTR_ERR(pathname);
				1490	if (IS_ERR(pathname))
				1491	goto out;
				1492	len = tmp + PAGE_SIZE - 1 - pathname;
				1493
				1494	if (len > buflen)
				1495	len = buflen;
				1496	if (copy_to_user(buffer, pathname, len))
				1497	len = -EFAULT;
				1498	out:
				1499	free_page((unsigned long)tmp);
				1500	return len;
				1501	}
				1502
				1503	static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
				1504	{
				1505	int error = -EACCES;
				1506	struct inode *inode = dentry->d_inode;
				1507	struct path path;
				1508
				1509	/* Are we allowed to snoop on the tasks file descriptors? */
				1510	if (!proc_fd_access_allowed(inode))
				1511	goto out;
				1512
				1513	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
				1514	if (error)
				1515	goto out;
				1516
				1517	error = do_proc_readlink(&path, buffer, buflen);
				1518	path_put(&path);
				1519	out:
				1520	return error;
				1521	}
				1522
				1523	static const struct inode_operations proc_pid_link_inode_operations = {
				1524	.readlink = proc_pid_readlink,
				1525	.follow_link = proc_pid_follow_link,
				1526	.setattr = proc_setattr,
				1527	};
				1528
				1529
				1530	/* building an inode */
				1531
				1532	static int task_dumpable(struct task_struct *task)
				1533	{
				1534	int dumpable = 0;
				1535	struct mm_struct *mm;
				1536
				1537	task_lock(task);
				1538	mm = task->mm;
				1539	if (mm)
				1540	dumpable = get_dumpable(mm);
				1541	task_unlock(task);
				1542	if(dumpable == 1)
				1543	return 1;
				1544	return 0;
				1545	}
				1546
				1547	struct inode proc_pid_make_inode(struct super_block sb, struct task_struct *task)
				1548	{
				1549	struct inode * inode;
				1550	struct proc_inode *ei;
				1551	const struct cred *cred;
				1552
				1553	/* We need a new inode */
				1554
				1555	inode = new_inode(sb);
				1556	if (!inode)
				1557	goto out;
				1558
				1559	/* Common stuff */
				1560	ei = PROC_I(inode);
				1561	inode->i_ino = get_next_ino();
				1562	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
				1563	inode->i_op = &proc_def_inode_operations;
				1564
				1565	/*
				1566	* grab the reference to task.
				1567	*/
				1568	ei->pid = get_task_pid(task, PIDTYPE_PID);
				1569	if (!ei->pid)
				1570	goto out_unlock;
				1571
				1572	if (task_dumpable(task)) {
				1573	rcu_read_lock();
				1574	cred = __task_cred(task);
				1575	inode->i_uid = cred->euid;
				1576	inode->i_gid = cred->egid;
				1577	rcu_read_unlock();
				1578	}
				1579	security_task_to_inode(task, inode);
				1580
				1581	out:
				1582	return inode;
				1583
				1584	out_unlock:
				1585	iput(inode);
				1586	return NULL;
				1587	}
				1588
				1589	int pid_getattr(struct vfsmount mnt, struct dentry dentry, struct kstat *stat)
				1590	{
				1591	struct inode *inode = dentry->d_inode;
				1592	struct task_struct *task;
				1593	const struct cred *cred;
				1594	struct pid_namespace *pid = dentry->d_sb->s_fs_info;
				1595
				1596	generic_fillattr(inode, stat);
				1597
				1598	rcu_read_lock();
				1599	stat->uid = 0;
				1600	stat->gid = 0;
				1601	task = pid_task(proc_pid(inode), PIDTYPE_PID);
				1602	if (task) {
				1603	if (!has_pid_permissions(pid, task, 2)) {
				1604	rcu_read_unlock();
				1605	/*
				1606	* This doesn't prevent learning whether PID exists,
				1607	* it only makes getattr() consistent with readdir().
				1608	*/
				1609	return -ENOENT;
				1610	}
				1611	if ((inode->i_mode == (S_IFDIR\|S_IRUGO\|S_IXUGO)) \|\|
				1612	task_dumpable(task)) {
				1613	cred = __task_cred(task);
				1614	stat->uid = cred->euid;
				1615	stat->gid = cred->egid;
				1616	}
				1617	}
				1618	rcu_read_unlock();
				1619	return 0;
				1620	}
				1621
				1622	/* dentry stuff */
				1623
				1624	/*
				1625	* Exceptional case: normally we are not allowed to unhash a busy
				1626	* directory. In this case, however, we can do it - no aliasing problems
				1627	* due to the way we treat inodes.
				1628	*
				1629	* Rewrite the inode's ownerships here because the owning task may have
				1630	* performed a setuid(), etc.
				1631	*
				1632	* Before the /proc/pid/status file was created the only way to read
				1633	* the effective uid of a /process was to stat /proc/pid. Reading
				1634	* /proc/pid/status is slow enough that procps and other packages
				1635	* kept stating /proc/pid. To keep the rules in /proc simple I have
				1636	* made this apply to all per process world readable and executable
				1637	* directories.
				1638	*/
				1639	int pid_revalidate(struct dentry dentry, struct nameidata nd)
				1640	{
				1641	struct inode *inode;
				1642	struct task_struct *task;
				1643	const struct cred *cred;
				1644
				1645	if (nd && nd->flags & LOOKUP_RCU)
				1646	return -ECHILD;
				1647
				1648	inode = dentry->d_inode;
				1649	task = get_proc_task(inode);
				1650
				1651	if (task) {
				1652	if ((inode->i_mode == (S_IFDIR\|S_IRUGO\|S_IXUGO)) \|\|
				1653	task_dumpable(task)) {
				1654	rcu_read_lock();
				1655	cred = __task_cred(task);
				1656	inode->i_uid = cred->euid;
				1657	inode->i_gid = cred->egid;
				1658	rcu_read_unlock();
				1659	} else {
				1660	inode->i_uid = 0;
				1661	inode->i_gid = 0;
				1662	}
				1663	inode->i_mode &= ~(S_ISUID \| S_ISGID);
				1664	security_task_to_inode(task, inode);
				1665	put_task_struct(task);
				1666	return 1;
				1667	}
				1668	d_drop(dentry);
				1669	return 0;
				1670	}
				1671
				1672	static int pid_delete_dentry(const struct dentry * dentry)
				1673	{
				1674	/* Is the task we represent dead?
				1675	* If so, then don't put the dentry on the lru list,
				1676	* kill it immediately.
				1677	*/
				1678	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
				1679	}
				1680
				1681	const struct dentry_operations pid_dentry_operations =
				1682	{
				1683	.d_revalidate = pid_revalidate,
				1684	.d_delete = pid_delete_dentry,
				1685	};
				1686
				1687	/* Lookups */
				1688
				1689	/*
				1690	* Fill a directory entry.
				1691	*
				1692	* If possible create the dcache entry and derive our inode number and
				1693	* file type from dcache entry.
				1694	*
				1695	* Since all of the proc inode numbers are dynamically generated, the inode
				1696	* numbers do not exist until the inode is cache. This means creating the
				1697	* the dcache entry in readdir is necessary to keep the inode numbers
				1698	* reported by readdir in sync with the inode numbers reported
				1699	* by stat.
				1700	*/
				1701	int proc_fill_cache(struct file filp, void dirent, filldir_t filldir,
				1702	const char *name, int len,
				1703	instantiate_t instantiate, struct task_struct task, const void ptr)
				1704	{
				1705	struct dentry child, dir = filp->f_path.dentry;
				1706	struct inode *inode;
				1707	struct qstr qname;
				1708	ino_t ino = 0;
				1709	unsigned type = DT_UNKNOWN;
				1710
				1711	qname.name = name;
				1712	qname.len = len;
				1713	qname.hash = full_name_hash(name, len);
				1714
				1715	child = d_lookup(dir, &qname);
				1716	if (!child) {
				1717	struct dentry *new;
				1718	new = d_alloc(dir, &qname);
				1719	if (new) {
				1720	child = instantiate(dir->d_inode, new, task, ptr);
				1721	if (child)
				1722	dput(new);
				1723	else
				1724	child = new;
				1725	}
				1726	}
				1727	if (!child \|\| IS_ERR(child) \|\| !child->d_inode)
				1728	goto end_instantiate;
				1729	inode = child->d_inode;
				1730	if (inode) {
				1731	ino = inode->i_ino;
				1732	type = inode->i_mode >> 12;
				1733	}
				1734	dput(child);
				1735	end_instantiate:
				1736	if (!ino)
				1737	ino = find_inode_number(dir, &qname);
				1738	if (!ino)
				1739	ino = 1;
				1740	return filldir(dirent, name, len, filp->f_pos, ino, type);
				1741	}
				1742
				1743	static unsigned name_to_int(struct dentry *dentry)
				1744	{
				1745	const char *name = dentry->d_name.name;
				1746	int len = dentry->d_name.len;
				1747	unsigned n = 0;
				1748
				1749	if (len > 1 && *name == '0')
				1750	goto out;
				1751	while (len-- > 0) {
				1752	unsigned c = *name++ - '0';
				1753	if (c > 9)
				1754	goto out;
				1755	if (n >= (~0U-9)/10)
				1756	goto out;
				1757	n *= 10;
				1758	n += c;
				1759	}
				1760	return n;
				1761	out:
				1762	return ~0U;
				1763	}
				1764
				1765	#define PROC_FDINFO_MAX 64
				1766
				1767	static int proc_fd_info(struct inode inode, struct path path, char *info)
				1768	{
				1769	struct task_struct *task = get_proc_task(inode);
				1770	struct files_struct *files = NULL;
				1771	struct file *file;
				1772	int fd = proc_fd(inode);
				1773
				1774	if (task) {
				1775	files = get_files_struct(task);
				1776	put_task_struct(task);
				1777	}
				1778	if (files) {
				1779	/*
				1780	* We are not taking a ref to the file structure, so we must
				1781	* hold ->file_lock.
				1782	*/
				1783	spin_lock(&files->file_lock);
				1784	file = fcheck_files(files, fd);
				1785	if (file) {
				1786	unsigned int f_flags;
				1787	struct fdtable *fdt;
				1788
				1789	fdt = files_fdtable(files);
				1790	f_flags = file->f_flags & ~O_CLOEXEC;
				1791	if (close_on_exec(fd, fdt))
				1792	f_flags \|= O_CLOEXEC;
				1793
				1794	if (path) {
				1795	*path = file->f_path;
				1796	path_get(&file->f_path);
				1797	}
				1798	if (info)
				1799	snprintf(info, PROC_FDINFO_MAX,
				1800	"pos:\t%lli\n"
				1801	"flags:\t0%o\n",
				1802	(long long) file->f_pos,
				1803	f_flags);
				1804	spin_unlock(&files->file_lock);
				1805	put_files_struct(files);
				1806	return 0;
				1807	}
				1808	spin_unlock(&files->file_lock);
				1809	put_files_struct(files);
				1810	}
				1811	return -ENOENT;
				1812	}
				1813
				1814	static int proc_fd_link(struct dentry dentry, struct path path)
				1815	{
				1816	return proc_fd_info(dentry->d_inode, path, NULL);
				1817	}
				1818
				1819	static int tid_fd_revalidate(struct dentry dentry, struct nameidata nd)
				1820	{
				1821	struct inode *inode;
				1822	struct task_struct *task;
				1823	int fd;
				1824	struct files_struct *files;
				1825	const struct cred *cred;
				1826
				1827	if (nd && nd->flags & LOOKUP_RCU)
				1828	return -ECHILD;
				1829
				1830	inode = dentry->d_inode;
				1831	task = get_proc_task(inode);
				1832	fd = proc_fd(inode);
				1833
				1834	if (task) {
				1835	files = get_files_struct(task);
				1836	if (files) {
				1837	struct file *file;
				1838	rcu_read_lock();
				1839	file = fcheck_files(files, fd);
				1840	if (file) {
				1841	unsigned f_mode = file->f_mode;
				1842
				1843	rcu_read_unlock();
				1844	put_files_struct(files);
				1845
				1846	if (task_dumpable(task)) {
				1847	rcu_read_lock();
				1848	cred = __task_cred(task);
				1849	inode->i_uid = cred->euid;
				1850	inode->i_gid = cred->egid;
				1851	rcu_read_unlock();
				1852	} else {
				1853	inode->i_uid = 0;
				1854	inode->i_gid = 0;
				1855	}
				1856
				1857	if (S_ISLNK(inode->i_mode)) {
				1858	unsigned i_mode = S_IFLNK;
				1859	if (f_mode & FMODE_READ)
				1860	i_mode \|= S_IRUSR \| S_IXUSR;
				1861	if (f_mode & FMODE_WRITE)
				1862	i_mode \|= S_IWUSR \| S_IXUSR;
				1863	inode->i_mode = i_mode;
				1864	}
				1865
				1866	security_task_to_inode(task, inode);
				1867	put_task_struct(task);
				1868	return 1;
				1869	}
				1870	rcu_read_unlock();
				1871	put_files_struct(files);
				1872	}
				1873	put_task_struct(task);
				1874	}
				1875	d_drop(dentry);
				1876	return 0;
				1877	}
				1878
				1879	static const struct dentry_operations tid_fd_dentry_operations =
				1880	{
				1881	.d_revalidate = tid_fd_revalidate,
				1882	.d_delete = pid_delete_dentry,
				1883	};
				1884
				1885	static struct dentry proc_fd_instantiate(struct inode dir,
				1886	struct dentry dentry, struct task_struct task, const void *ptr)
				1887	{
				1888	unsigned fd = (const unsigned )ptr;
				1889	struct inode *inode;
				1890	struct proc_inode *ei;
				1891	struct dentry *error = ERR_PTR(-ENOENT);
				1892
				1893	inode = proc_pid_make_inode(dir->i_sb, task);
				1894	if (!inode)
				1895	goto out;
				1896	ei = PROC_I(inode);
				1897	ei->fd = fd;
				1898
				1899	inode->i_mode = S_IFLNK;
				1900	inode->i_op = &proc_pid_link_inode_operations;
				1901	inode->i_size = 64;
				1902	ei->op.proc_get_link = proc_fd_link;
				1903	d_set_d_op(dentry, &tid_fd_dentry_operations);
				1904	d_add(dentry, inode);
				1905	/* Close the race of the process dying before we return the dentry */
				1906	if (tid_fd_revalidate(dentry, NULL))
				1907	error = NULL;
				1908
				1909	out:
				1910	return error;
				1911	}
				1912
				1913	static struct dentry proc_lookupfd_common(struct inode dir,
				1914	struct dentry *dentry,
				1915	instantiate_t instantiate)
				1916	{
				1917	struct task_struct *task = get_proc_task(dir);
				1918	unsigned fd = name_to_int(dentry);
				1919	struct dentry *result = ERR_PTR(-ENOENT);
				1920
				1921	if (!task)
				1922	goto out_no_task;
				1923	if (fd == ~0U)
				1924	goto out;
				1925
				1926	result = instantiate(dir, dentry, task, &fd);
				1927	out:
				1928	put_task_struct(task);
				1929	out_no_task:
				1930	return result;
				1931	}
				1932
				1933	static int proc_readfd_common(struct file * filp, void * dirent,
				1934	filldir_t filldir, instantiate_t instantiate)
				1935	{
				1936	struct dentry *dentry = filp->f_path.dentry;
				1937	struct inode *inode = dentry->d_inode;
				1938	struct task_struct *p = get_proc_task(inode);
				1939	unsigned int fd, ino;
				1940	int retval;
				1941	struct files_struct * files;
				1942
				1943	retval = -ENOENT;
				1944	if (!p)
				1945	goto out_no_task;
				1946	retval = 0;
				1947
				1948	fd = filp->f_pos;
				1949	switch (fd) {
				1950	case 0:
				1951	if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
				1952	goto out;
				1953	filp->f_pos++;
				1954	case 1:
				1955	ino = parent_ino(dentry);
				1956	if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
				1957	goto out;
				1958	filp->f_pos++;
				1959	default:
				1960	files = get_files_struct(p);
				1961	if (!files)
				1962	goto out;
				1963	rcu_read_lock();
				1964	for (fd = filp->f_pos-2;
				1965	fd < files_fdtable(files)->max_fds;
				1966	fd++, filp->f_pos++) {
				1967	char name[PROC_NUMBUF];
				1968	int len;
				1969
				1970	if (!fcheck_files(files, fd))
				1971	continue;
				1972	rcu_read_unlock();
				1973
				1974	len = snprintf(name, sizeof(name), "%d", fd);
				1975	if (proc_fill_cache(filp, dirent, filldir,
				1976	name, len, instantiate,
				1977	p, &fd) < 0) {
				1978	rcu_read_lock();
				1979	break;
				1980	}
				1981	rcu_read_lock();
				1982	}
				1983	rcu_read_unlock();
				1984	put_files_struct(files);
				1985	}
				1986	out:
				1987	put_task_struct(p);
				1988	out_no_task:
				1989	return retval;
				1990	}
				1991
				1992	static struct dentry proc_lookupfd(struct inode dir, struct dentry *dentry,
				1993	struct nameidata *nd)
				1994	{
				1995	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
				1996	}
				1997
				1998	static int proc_readfd(struct file filp, void dirent, filldir_t filldir)
				1999	{
				2000	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
				2001	}
				2002
				2003	static ssize_t proc_fdinfo_read(struct file file, char __user buf,
				2004	size_t len, loff_t *ppos)
				2005	{
				2006	char tmp[PROC_FDINFO_MAX];
				2007	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
				2008	if (!err)
				2009	err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
				2010	return err;
				2011	}
				2012
				2013	static const struct file_operations proc_fdinfo_file_operations = {
				2014	.open = nonseekable_open,
				2015	.read = proc_fdinfo_read,
				2016	.llseek = no_llseek,
				2017	};
				2018
				2019	static const struct file_operations proc_fd_operations = {
				2020	.read = generic_read_dir,
				2021	.readdir = proc_readfd,
				2022	.llseek = default_llseek,
				2023	};
				2024
				2025	#ifdef CONFIG_CHECKPOINT_RESTORE
				2026
				2027	/*
				2028	* dname_to_vma_addr - maps a dentry name into two unsigned longs
				2029	* which represent vma start and end addresses.
				2030	*/
				2031	static int dname_to_vma_addr(struct dentry *dentry,
				2032	unsigned long start, unsigned long end)
				2033	{
				2034	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
				2035	return -EINVAL;
				2036
				2037	return 0;
				2038	}
				2039
				2040	static int map_files_d_revalidate(struct dentry dentry, struct nameidata nd)
				2041	{
				2042	unsigned long vm_start, vm_end;
				2043	bool exact_vma_exists = false;
				2044	struct mm_struct *mm = NULL;
				2045	struct task_struct *task;
				2046	const struct cred *cred;
				2047	struct inode *inode;
				2048	int status = 0;
				2049
				2050	if (nd && nd->flags & LOOKUP_RCU)
				2051	return -ECHILD;
				2052
				2053	if (!capable(CAP_SYS_ADMIN)) {
				2054	status = -EACCES;
				2055	goto out_notask;
				2056	}
				2057
				2058	inode = dentry->d_inode;
				2059	task = get_proc_task(inode);
				2060	if (!task)
				2061	goto out_notask;
				2062
				2063	if (!ptrace_may_access(task, PTRACE_MODE_READ))
				2064	goto out;
				2065
				2066	mm = get_task_mm(task);
				2067	if (!mm)
				2068	goto out;
				2069
				2070	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
				2071	down_read(&mm->mmap_sem);
				2072	exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
				2073	up_read(&mm->mmap_sem);
				2074	}
				2075
				2076	mmput(mm);
				2077
				2078	if (exact_vma_exists) {
				2079	if (task_dumpable(task)) {
				2080	rcu_read_lock();
				2081	cred = __task_cred(task);
				2082	inode->i_uid = cred->euid;
				2083	inode->i_gid = cred->egid;
				2084	rcu_read_unlock();
				2085	} else {
				2086	inode->i_uid = 0;
				2087	inode->i_gid = 0;
				2088	}
				2089	security_task_to_inode(task, inode);
				2090	status = 1;
				2091	}
				2092
				2093	out:
				2094	put_task_struct(task);
				2095
				2096	out_notask:
				2097	if (status <= 0)
				2098	d_drop(dentry);
				2099
				2100	return status;
				2101	}
				2102
				2103	static const struct dentry_operations tid_map_files_dentry_operations = {
				2104	.d_revalidate = map_files_d_revalidate,
				2105	.d_delete = pid_delete_dentry,
				2106	};
				2107
				2108	static int proc_map_files_get_link(struct dentry dentry, struct path path)
				2109	{
				2110	unsigned long vm_start, vm_end;
				2111	struct vm_area_struct *vma;
				2112	struct task_struct *task;
				2113	struct mm_struct *mm;
				2114	int rc;
				2115
				2116	rc = -ENOENT;
				2117	task = get_proc_task(dentry->d_inode);
				2118	if (!task)
				2119	goto out;
				2120
				2121	mm = get_task_mm(task);
				2122	put_task_struct(task);
				2123	if (!mm)
				2124	goto out;
				2125
				2126	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
				2127	if (rc)
				2128	goto out_mmput;
				2129
				2130	rc = -ENOENT;
				2131	down_read(&mm->mmap_sem);
				2132	vma = find_exact_vma(mm, vm_start, vm_end);
				2133	if (vma && vma->vm_file) {
				2134	*path = vma->vm_file->f_path;
				2135	path_get(path);
				2136	rc = 0;
				2137	}
				2138	up_read(&mm->mmap_sem);
				2139
				2140	out_mmput:
				2141	mmput(mm);
				2142	out:
				2143	return rc;
				2144	}
				2145
				2146	struct map_files_info {
				2147	struct file *file;
				2148	unsigned long len;
				2149	unsigned char name[4sizeof(long)+2]; / max: %lx-%lx\0 */
				2150	};
				2151
				2152	static struct dentry *
				2153	proc_map_files_instantiate(struct inode dir, struct dentry dentry,
				2154	struct task_struct task, const void ptr)
				2155	{
				2156	const struct file *file = ptr;
				2157	struct proc_inode *ei;
				2158	struct inode *inode;
				2159
				2160	if (!file)
				2161	return ERR_PTR(-ENOENT);
				2162
				2163	inode = proc_pid_make_inode(dir->i_sb, task);
				2164	if (!inode)
				2165	return ERR_PTR(-ENOENT);
				2166
				2167	ei = PROC_I(inode);
				2168	ei->op.proc_get_link = proc_map_files_get_link;
				2169
				2170	inode->i_op = &proc_pid_link_inode_operations;
				2171	inode->i_size = 64;
				2172	inode->i_mode = S_IFLNK;
				2173
				2174	if (file->f_mode & FMODE_READ)
				2175	inode->i_mode \|= S_IRUSR;
				2176	if (file->f_mode & FMODE_WRITE)
				2177	inode->i_mode \|= S_IWUSR;
				2178
				2179	d_set_d_op(dentry, &tid_map_files_dentry_operations);
				2180	d_add(dentry, inode);
				2181
				2182	return NULL;
				2183	}
				2184
				2185	static struct dentry proc_map_files_lookup(struct inode dir,
				2186	struct dentry dentry, struct nameidata nd)
				2187	{
				2188	unsigned long vm_start, vm_end;
				2189	struct vm_area_struct *vma;
				2190	struct task_struct *task;
				2191	struct dentry *result;
				2192	struct mm_struct *mm;
				2193
				2194	result = ERR_PTR(-EACCES);
				2195	if (!capable(CAP_SYS_ADMIN))
				2196	goto out;
				2197
				2198	result = ERR_PTR(-ENOENT);
				2199	task = get_proc_task(dir);
				2200	if (!task)
				2201	goto out;
				2202
				2203	result = ERR_PTR(-EACCES);
				2204	if (!ptrace_may_access(task, PTRACE_MODE_READ))
				2205	goto out_put_task;
				2206
				2207	result = ERR_PTR(-ENOENT);
				2208	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
				2209	goto out_put_task;
				2210
				2211	mm = get_task_mm(task);
				2212	if (!mm)
				2213	goto out_put_task;
				2214
				2215	down_read(&mm->mmap_sem);
				2216	vma = find_exact_vma(mm, vm_start, vm_end);
				2217	if (!vma)
				2218	goto out_no_vma;
				2219
				2220	result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
				2221
				2222	out_no_vma:
				2223	up_read(&mm->mmap_sem);
				2224	mmput(mm);
				2225	out_put_task:
				2226	put_task_struct(task);
				2227	out:
				2228	return result;
				2229	}
				2230
				2231	static const struct inode_operations proc_map_files_inode_operations = {
				2232	.lookup = proc_map_files_lookup,
				2233	.permission = proc_fd_permission,
				2234	.setattr = proc_setattr,
				2235	};
				2236
				2237	static int
				2238	proc_map_files_readdir(struct file filp, void dirent, filldir_t filldir)
				2239	{
				2240	struct dentry *dentry = filp->f_path.dentry;
				2241	struct inode *inode = dentry->d_inode;
				2242	struct vm_area_struct *vma;
				2243	struct task_struct *task;
				2244	struct mm_struct *mm;
				2245	ino_t ino;
				2246	int ret;
				2247
				2248	ret = -EACCES;
				2249	if (!capable(CAP_SYS_ADMIN))
				2250	goto out;
				2251
				2252	ret = -ENOENT;
				2253	task = get_proc_task(inode);
				2254	if (!task)
				2255	goto out;
				2256
				2257	ret = -EACCES;
				2258	if (!ptrace_may_access(task, PTRACE_MODE_READ))
				2259	goto out_put_task;
				2260
				2261	ret = 0;
				2262	switch (filp->f_pos) {
				2263	case 0:
				2264	ino = inode->i_ino;
				2265	if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
				2266	goto out_put_task;
				2267	filp->f_pos++;
				2268	case 1:
				2269	ino = parent_ino(dentry);
				2270	if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
				2271	goto out_put_task;
				2272	filp->f_pos++;
				2273	default:
				2274	{
				2275	unsigned long nr_files, pos, i;
				2276	struct flex_array *fa = NULL;
				2277	struct map_files_info info;
				2278	struct map_files_info *p;
				2279
				2280	mm = get_task_mm(task);
				2281	if (!mm)
				2282	goto out_put_task;
				2283	down_read(&mm->mmap_sem);
				2284
				2285	nr_files = 0;
				2286
				2287	/*
				2288	* We need two passes here:
				2289	*
				2290	* 1) Collect vmas of mapped files with mmap_sem taken
				2291	* 2) Release mmap_sem and instantiate entries
				2292	*
				2293	* otherwise we get lockdep complained, since filldir()
				2294	* routine might require mmap_sem taken in might_fault().
				2295	*/
				2296
				2297	for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
				2298	if (vma->vm_file && ++pos > filp->f_pos)
				2299	nr_files++;
				2300	}
				2301
				2302	if (nr_files) {
				2303	fa = flex_array_alloc(sizeof(info), nr_files,
				2304	GFP_KERNEL);
				2305	if (!fa \|\| flex_array_prealloc(fa, 0, nr_files,
				2306	GFP_KERNEL)) {
				2307	ret = -ENOMEM;
				2308	if (fa)
				2309	flex_array_free(fa);
				2310	up_read(&mm->mmap_sem);
				2311	mmput(mm);
				2312	goto out_put_task;
				2313	}
				2314	for (i = 0, vma = mm->mmap, pos = 2; vma;
				2315	vma = vma->vm_next) {
				2316	if (!vma->vm_file)
				2317	continue;
				2318	if (++pos <= filp->f_pos)
				2319	continue;
				2320
				2321	get_file(vma->vm_file);
				2322	info.file = vma->vm_file;
				2323	info.len = snprintf(info.name,
				2324	sizeof(info.name), "%lx-%lx",
				2325	vma->vm_start, vma->vm_end);
				2326	if (flex_array_put(fa, i++, &info, GFP_KERNEL))
				2327	BUG();
				2328	}
				2329	}
				2330	up_read(&mm->mmap_sem);
				2331
				2332	for (i = 0; i < nr_files; i++) {
				2333	p = flex_array_get(fa, i);
				2334	ret = proc_fill_cache(filp, dirent, filldir,
				2335	p->name, p->len,
				2336	proc_map_files_instantiate,
				2337	task, p->file);
				2338	if (ret)
				2339	break;
				2340	filp->f_pos++;
				2341	fput(p->file);
				2342	}
				2343	for (; i < nr_files; i++) {
				2344	/*
				2345	* In case of error don't forget
				2346	* to put rest of file refs.
				2347	*/
				2348	p = flex_array_get(fa, i);
				2349	fput(p->file);
				2350	}
				2351	if (fa)
				2352	flex_array_free(fa);
				2353	mmput(mm);
				2354	}
				2355	}
				2356
				2357	out_put_task:
				2358	put_task_struct(task);
				2359	out:
				2360	return ret;
				2361	}
				2362
				2363	static const struct file_operations proc_map_files_operations = {
				2364	.read = generic_read_dir,
				2365	.readdir = proc_map_files_readdir,
				2366	.llseek = default_llseek,
				2367	};
				2368
				2369	#endif /* CONFIG_CHECKPOINT_RESTORE */
				2370
				2371	/*
				2372	* /proc/pid/fd needs a special permission handler so that a process can still
				2373	* access /proc/self/fd after it has executed a setuid().
				2374	*/
				2375	static int proc_fd_permission(struct inode *inode, int mask)
				2376	{
				2377	int rv = generic_permission(inode, mask);
				2378	if (rv == 0)
				2379	return 0;
				2380	if (task_pid(current) == proc_pid(inode))
				2381	rv = 0;
				2382	return rv;
				2383	}
				2384
				2385	/*
				2386	* proc directories can do almost nothing..
				2387	*/
				2388	static const struct inode_operations proc_fd_inode_operations = {
				2389	.lookup = proc_lookupfd,
				2390	.permission = proc_fd_permission,
				2391	.setattr = proc_setattr,
				2392	};
				2393
				2394	static struct dentry proc_fdinfo_instantiate(struct inode dir,
				2395	struct dentry dentry, struct task_struct task, const void *ptr)
				2396	{
				2397	unsigned fd = (unsigned )ptr;
				2398	struct inode *inode;
				2399	struct proc_inode *ei;
				2400	struct dentry *error = ERR_PTR(-ENOENT);
				2401
				2402	inode = proc_pid_make_inode(dir->i_sb, task);
				2403	if (!inode)
				2404	goto out;
				2405	ei = PROC_I(inode);
				2406	ei->fd = fd;
				2407	inode->i_mode = S_IFREG \| S_IRUSR;
				2408	inode->i_fop = &proc_fdinfo_file_operations;
				2409	d_set_d_op(dentry, &tid_fd_dentry_operations);
				2410	d_add(dentry, inode);
				2411	/* Close the race of the process dying before we return the dentry */
				2412	if (tid_fd_revalidate(dentry, NULL))
				2413	error = NULL;
				2414
				2415	out:
				2416	return error;
				2417	}
				2418
				2419	static struct dentry proc_lookupfdinfo(struct inode dir,
				2420	struct dentry *dentry,
				2421	struct nameidata *nd)
				2422	{
				2423	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
				2424	}
				2425
				2426	static int proc_readfdinfo(struct file filp, void dirent, filldir_t filldir)
				2427	{
				2428	return proc_readfd_common(filp, dirent, filldir,
				2429	proc_fdinfo_instantiate);
				2430	}
				2431
				2432	static const struct file_operations proc_fdinfo_operations = {
				2433	.read = generic_read_dir,
				2434	.readdir = proc_readfdinfo,
				2435	.llseek = default_llseek,
				2436	};
				2437
				2438	/*
				2439	* proc directories can do almost nothing..
				2440	*/
				2441	static const struct inode_operations proc_fdinfo_inode_operations = {
				2442	.lookup = proc_lookupfdinfo,
				2443	.setattr = proc_setattr,
				2444	};
				2445
				2446
				2447	static struct dentry proc_pident_instantiate(struct inode dir,
				2448	struct dentry dentry, struct task_struct task, const void *ptr)
				2449	{
				2450	const struct pid_entry *p = ptr;
				2451	struct inode *inode;
				2452	struct proc_inode *ei;
				2453	struct dentry *error = ERR_PTR(-ENOENT);
				2454
				2455	inode = proc_pid_make_inode(dir->i_sb, task);
				2456	if (!inode)
				2457	goto out;
				2458
				2459	ei = PROC_I(inode);
				2460	inode->i_mode = p->mode;
				2461	if (S_ISDIR(inode->i_mode))
				2462	set_nlink(inode, 2); /* Use getattr to fix if necessary */
				2463	if (p->iop)
				2464	inode->i_op = p->iop;
				2465	if (p->fop)
				2466	inode->i_fop = p->fop;
				2467	ei->op = p->op;
				2468	d_set_d_op(dentry, &pid_dentry_operations);
				2469	d_add(dentry, inode);
				2470	/* Close the race of the process dying before we return the dentry */
				2471	if (pid_revalidate(dentry, NULL))
				2472	error = NULL;
				2473	out:
				2474	return error;
				2475	}
				2476
				2477	static struct dentry proc_pident_lookup(struct inode dir,
				2478	struct dentry *dentry,
				2479	const struct pid_entry *ents,
				2480	unsigned int nents)
				2481	{
				2482	struct dentry *error;
				2483	struct task_struct *task = get_proc_task(dir);
				2484	const struct pid_entry p, last;
				2485
				2486	error = ERR_PTR(-ENOENT);
				2487
				2488	if (!task)
				2489	goto out_no_task;
				2490
				2491	/*
				2492	* Yes, it does not scale. And it should not. Don't add
				2493	* new entries into /proc/<tgid>/ without very good reasons.
				2494	*/
				2495	last = &ents[nents - 1];
				2496	for (p = ents; p <= last; p++) {
				2497	if (p->len != dentry->d_name.len)
				2498	continue;
				2499	if (!memcmp(dentry->d_name.name, p->name, p->len))
				2500	break;
				2501	}
				2502	if (p > last)
				2503	goto out;
				2504
				2505	error = proc_pident_instantiate(dir, dentry, task, p);
				2506	out:
				2507	put_task_struct(task);
				2508	out_no_task:
				2509	return error;
				2510	}
				2511
				2512	static int proc_pident_fill_cache(struct file filp, void dirent,
				2513	filldir_t filldir, struct task_struct task, const struct pid_entry p)
				2514	{
				2515	return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
				2516	proc_pident_instantiate, task, p);
				2517	}
				2518
				2519	static int proc_pident_readdir(struct file *filp,
				2520	void *dirent, filldir_t filldir,
				2521	const struct pid_entry *ents, unsigned int nents)
				2522	{
				2523	int i;
				2524	struct dentry *dentry = filp->f_path.dentry;
				2525	struct inode *inode = dentry->d_inode;
				2526	struct task_struct *task = get_proc_task(inode);
				2527	const struct pid_entry p, last;
				2528	ino_t ino;
				2529	int ret;
				2530
				2531	ret = -ENOENT;
				2532	if (!task)
				2533	goto out_no_task;
				2534
				2535	ret = 0;
				2536	i = filp->f_pos;
				2537	switch (i) {
				2538	case 0:
				2539	ino = inode->i_ino;
				2540	if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
				2541	goto out;
				2542	i++;
				2543	filp->f_pos++;
				2544	/* fall through */
				2545	case 1:
				2546	ino = parent_ino(dentry);
				2547	if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
				2548	goto out;
				2549	i++;
				2550	filp->f_pos++;
				2551	/* fall through */
				2552	default:
				2553	i -= 2;
				2554	if (i >= nents) {
				2555	ret = 1;
				2556	goto out;
				2557	}
				2558	p = ents + i;
				2559	last = &ents[nents - 1];
				2560	while (p <= last) {
				2561	if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
				2562	goto out;
				2563	filp->f_pos++;
				2564	p++;
				2565	}
				2566	}
				2567
				2568	ret = 1;
				2569	out:
				2570	put_task_struct(task);
				2571	out_no_task:
				2572	return ret;
				2573	}
				2574
				2575	#ifdef CONFIG_SECURITY
				2576	static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
				2577	size_t count, loff_t *ppos)
				2578	{
				2579	struct inode * inode = file->f_path.dentry->d_inode;
				2580	char *p = NULL;
				2581	ssize_t length;
				2582	struct task_struct *task = get_proc_task(inode);
				2583
				2584	if (!task)
				2585	return -ESRCH;
				2586
				2587	length = security_getprocattr(task,
				2588	(char*)file->f_path.dentry->d_name.name,
				2589	&p);
				2590	put_task_struct(task);
				2591	if (length > 0)
				2592	length = simple_read_from_buffer(buf, count, ppos, p, length);
				2593	kfree(p);
				2594	return length;
				2595	}
				2596
				2597	static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
				2598	size_t count, loff_t *ppos)
				2599	{
				2600	struct inode * inode = file->f_path.dentry->d_inode;
				2601	char *page;
				2602	ssize_t length;
				2603	struct task_struct *task = get_proc_task(inode);
				2604
				2605	length = -ESRCH;
				2606	if (!task)
				2607	goto out_no_task;
				2608	if (count > PAGE_SIZE)
				2609	count = PAGE_SIZE;
				2610
				2611	/* No partial writes. */
				2612	length = -EINVAL;
				2613	if (*ppos != 0)
				2614	goto out;
				2615
				2616	length = -ENOMEM;
				2617	page = (char*)__get_free_page(GFP_TEMPORARY);
				2618	if (!page)
				2619	goto out;
				2620
				2621	length = -EFAULT;
				2622	if (copy_from_user(page, buf, count))
				2623	goto out_free;
				2624
				2625	/* Guard against adverse ptrace interaction */
				2626	length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
				2627	if (length < 0)
				2628	goto out_free;
				2629
				2630	length = security_setprocattr(task,
				2631	(char*)file->f_path.dentry->d_name.name,
				2632	(void*)page, count);
				2633	mutex_unlock(&task->signal->cred_guard_mutex);
				2634	out_free:
				2635	free_page((unsigned long) page);
				2636	out:
				2637	put_task_struct(task);
				2638	out_no_task:
				2639	return length;
				2640	}
				2641
				2642	static const struct file_operations proc_pid_attr_operations = {
				2643	.read = proc_pid_attr_read,
				2644	.write = proc_pid_attr_write,
				2645	.llseek = generic_file_llseek,
				2646	};
				2647
				2648	static const struct pid_entry attr_dir_stuff[] = {
				2649	REG("current", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2650	REG("prev", S_IRUGO, proc_pid_attr_operations),
				2651	REG("exec", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2652	REG("fscreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2653	REG("keycreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2654	REG("sockcreate", S_IRUGO\|S_IWUGO, proc_pid_attr_operations),
				2655	};
				2656
				2657	static int proc_attr_dir_readdir(struct file * filp,
				2658	void * dirent, filldir_t filldir)
				2659	{
				2660	return proc_pident_readdir(filp,dirent,filldir,
				2661	attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
				2662	}
				2663
				2664	static const struct file_operations proc_attr_dir_operations = {
				2665	.read = generic_read_dir,
				2666	.readdir = proc_attr_dir_readdir,
				2667	.llseek = default_llseek,
				2668	};
				2669
				2670	static struct dentry proc_attr_dir_lookup(struct inode dir,
				2671	struct dentry dentry, struct nameidata nd)
				2672	{
				2673	return proc_pident_lookup(dir, dentry,
				2674	attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
				2675	}
				2676
				2677	static const struct inode_operations proc_attr_dir_inode_operations = {
				2678	.lookup = proc_attr_dir_lookup,
				2679	.getattr = pid_getattr,
				2680	.setattr = proc_setattr,
				2681	};
				2682
				2683	#endif
				2684
				2685	#ifdef CONFIG_ELF_CORE
				2686	static ssize_t proc_coredump_filter_read(struct file file, char __user buf,
				2687	size_t count, loff_t *ppos)
				2688	{
				2689	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
				2690	struct mm_struct *mm;
				2691	char buffer[PROC_NUMBUF];
				2692	size_t len;
				2693	int ret;
				2694
				2695	if (!task)
				2696	return -ESRCH;
				2697
				2698	ret = 0;
				2699	mm = get_task_mm(task);
				2700	if (mm) {
				2701	len = snprintf(buffer, sizeof(buffer), "%08lx\n",
				2702	((mm->flags & MMF_DUMP_FILTER_MASK) >>
				2703	MMF_DUMP_FILTER_SHIFT));
				2704	mmput(mm);
				2705	ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
				2706	}
				2707
				2708	put_task_struct(task);
				2709
				2710	return ret;
				2711	}
				2712
				2713	static ssize_t proc_coredump_filter_write(struct file *file,
				2714	const char __user *buf,
				2715	size_t count,
				2716	loff_t *ppos)
				2717	{
				2718	struct task_struct *task;
				2719	struct mm_struct *mm;
				2720	char buffer[PROC_NUMBUF], *end;
				2721	unsigned int val;
				2722	int ret;
				2723	int i;
				2724	unsigned long mask;
				2725
				2726	ret = -EFAULT;
				2727	memset(buffer, 0, sizeof(buffer));
				2728	if (count > sizeof(buffer) - 1)
				2729	count = sizeof(buffer) - 1;
				2730	if (copy_from_user(buffer, buf, count))
				2731	goto out_no_task;
				2732
				2733	ret = -EINVAL;
				2734	val = (unsigned int)simple_strtoul(buffer, &end, 0);
				2735	if (*end == '\n')
				2736	end++;
				2737	if (end - buffer == 0)
				2738	goto out_no_task;
				2739
				2740	ret = -ESRCH;
				2741	task = get_proc_task(file->f_dentry->d_inode);
				2742	if (!task)
				2743	goto out_no_task;
				2744
				2745	ret = end - buffer;
				2746	mm = get_task_mm(task);
				2747	if (!mm)
				2748	goto out_no_mm;
				2749
				2750	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
				2751	if (val & mask)
				2752	set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2753	else
				2754	clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
				2755	}
				2756
				2757	mmput(mm);
				2758	out_no_mm:
				2759	put_task_struct(task);
				2760	out_no_task:
				2761	return ret;
				2762	}
				2763
				2764	static const struct file_operations proc_coredump_filter_operations = {
				2765	.read = proc_coredump_filter_read,
				2766	.write = proc_coredump_filter_write,
				2767	.llseek = generic_file_llseek,
				2768	};
				2769	#endif
				2770
				2771	/*
				2772	* /proc/self:
				2773	*/
				2774	static int proc_self_readlink(struct dentry dentry, char __user buffer,
				2775	int buflen)
				2776	{
				2777	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
				2778	pid_t tgid = task_tgid_nr_ns(current, ns);
				2779	char tmp[PROC_NUMBUF];
				2780	if (!tgid)
				2781	return -ENOENT;
				2782	sprintf(tmp, "%d", tgid);
				2783	return vfs_readlink(dentry,buffer,buflen,tmp);
				2784	}
				2785
				2786	static void proc_self_follow_link(struct dentry dentry, struct nameidata *nd)
				2787	{
				2788	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
				2789	pid_t tgid = task_tgid_nr_ns(current, ns);
				2790	char *name = ERR_PTR(-ENOENT);
				2791	if (tgid) {
				2792	name = __getname();
				2793	if (!name)
				2794	name = ERR_PTR(-ENOMEM);
				2795	else
				2796	sprintf(name, "%d", tgid);
				2797	}
				2798	nd_set_link(nd, name);
				2799	return NULL;
				2800	}
				2801
				2802	static void proc_self_put_link(struct dentry dentry, struct nameidata nd,
				2803	void *cookie)
				2804	{
				2805	char *s = nd_get_link(nd);
				2806	if (!IS_ERR(s))
				2807	__putname(s);
				2808	}
				2809
				2810	static const struct inode_operations proc_self_inode_operations = {
				2811	.readlink = proc_self_readlink,
				2812	.follow_link = proc_self_follow_link,
				2813	.put_link = proc_self_put_link,
				2814	};
				2815
				2816	/*
				2817	* proc base
				2818	*
				2819	* These are the directory entries in the root directory of /proc
				2820	* that properly belong to the /proc filesystem, as they describe
				2821	* describe something that is process related.
				2822	*/
				2823	static const struct pid_entry proc_base_stuff[] = {
				2824	NOD("self", S_IFLNK\|S_IRWXUGO,
				2825	&proc_self_inode_operations, NULL, {}),
				2826	};
				2827
				2828	static struct dentry proc_base_instantiate(struct inode dir,
				2829	struct dentry dentry, struct task_struct task, const void *ptr)
				2830	{
				2831	const struct pid_entry *p = ptr;
				2832	struct inode *inode;
				2833	struct proc_inode *ei;
				2834	struct dentry *error;
				2835
				2836	/* Allocate the inode */
				2837	error = ERR_PTR(-ENOMEM);
				2838	inode = new_inode(dir->i_sb);
				2839	if (!inode)
				2840	goto out;
				2841
				2842	/* Initialize the inode */
				2843	ei = PROC_I(inode);
				2844	inode->i_ino = get_next_ino();
				2845	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
				2846
				2847	/*
				2848	* grab the reference to the task.
				2849	*/
				2850	ei->pid = get_task_pid(task, PIDTYPE_PID);
				2851	if (!ei->pid)
				2852	goto out_iput;
				2853
				2854	inode->i_mode = p->mode;
				2855	if (S_ISDIR(inode->i_mode))
				2856	set_nlink(inode, 2);
				2857	if (S_ISLNK(inode->i_mode))
				2858	inode->i_size = 64;
				2859	if (p->iop)
				2860	inode->i_op = p->iop;
				2861	if (p->fop)
				2862	inode->i_fop = p->fop;
				2863	ei->op = p->op;
				2864	d_add(dentry, inode);
				2865	error = NULL;
				2866	out:
				2867	return error;
				2868	out_iput:
				2869	iput(inode);
				2870	goto out;
				2871	}
				2872
				2873	static struct dentry proc_base_lookup(struct inode dir, struct dentry *dentry)
				2874	{
				2875	struct dentry *error;
				2876	struct task_struct *task = get_proc_task(dir);
				2877	const struct pid_entry p, last;
				2878
				2879	error = ERR_PTR(-ENOENT);
				2880
				2881	if (!task)
				2882	goto out_no_task;
				2883
				2884	/* Lookup the directory entry */
				2885	last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
				2886	for (p = proc_base_stuff; p <= last; p++) {
				2887	if (p->len != dentry->d_name.len)
				2888	continue;
				2889	if (!memcmp(dentry->d_name.name, p->name, p->len))
				2890	break;
				2891	}
				2892	if (p > last)
				2893	goto out;
				2894
				2895	error = proc_base_instantiate(dir, dentry, task, p);
				2896
				2897	out:
				2898	put_task_struct(task);
				2899	out_no_task:
				2900	return error;
				2901	}
				2902
				2903	static int proc_base_fill_cache(struct file filp, void dirent,
				2904	filldir_t filldir, struct task_struct task, const struct pid_entry p)
				2905	{
				2906	return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
				2907	proc_base_instantiate, task, p);
				2908	}
				2909
				2910	#ifdef CONFIG_TASK_IO_ACCOUNTING
				2911	static int do_io_accounting(struct task_struct task, char buffer, int whole)
				2912	{
				2913	struct task_io_accounting acct = task->ioac;
				2914	unsigned long flags;
				2915	int result;
				2916
				2917	result = mutex_lock_killable(&task->signal->cred_guard_mutex);
				2918	if (result)
				2919	return result;
				2920
				2921	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
				2922	result = -EACCES;
				2923	goto out_unlock;
				2924	}
				2925
				2926	if (whole && lock_task_sighand(task, &flags)) {
				2927	struct task_struct *t = task;
				2928
				2929	task_io_accounting_add(&acct, &task->signal->ioac);
				2930	while_each_thread(task, t)
				2931	task_io_accounting_add(&acct, &t->ioac);
				2932
				2933	unlock_task_sighand(task, &flags);
				2934	}
				2935	result = sprintf(buffer,
				2936	"rchar: %llu\n"
				2937	"wchar: %llu\n"
				2938	"syscr: %llu\n"
				2939	"syscw: %llu\n"
				2940	"read_bytes: %llu\n"
				2941	"write_bytes: %llu\n"
				2942	"cancelled_write_bytes: %llu\n",
				2943	(unsigned long long)acct.rchar,
				2944	(unsigned long long)acct.wchar,
				2945	(unsigned long long)acct.syscr,
				2946	(unsigned long long)acct.syscw,
				2947	(unsigned long long)acct.read_bytes,
				2948	(unsigned long long)acct.write_bytes,
				2949	(unsigned long long)acct.cancelled_write_bytes);
				2950	out_unlock:
				2951	mutex_unlock(&task->signal->cred_guard_mutex);
				2952	return result;
				2953	}
				2954
				2955	static int proc_tid_io_accounting(struct task_struct task, char buffer)
				2956	{
				2957	return do_io_accounting(task, buffer, 0);
				2958	}
				2959
				2960	static int proc_tgid_io_accounting(struct task_struct task, char buffer)
				2961	{
				2962	return do_io_accounting(task, buffer, 1);
				2963	}
				2964	#endif /* CONFIG_TASK_IO_ACCOUNTING */
				2965
				2966	static int proc_pid_personality(struct seq_file m, struct pid_namespace ns,
				2967	struct pid pid, struct task_struct task)
				2968	{
				2969	int err = lock_trace(task);
				2970	if (!err) {
				2971	seq_printf(m, "%08x\n", task->personality);
				2972	unlock_trace(task);
				2973	}
				2974	return err;
				2975	}
				2976
				2977	/*
				2978	* Thread groups
				2979	*/
				2980	static const struct file_operations proc_task_operations;
				2981	static const struct inode_operations proc_task_inode_operations;
				2982
				2983	static const struct pid_entry tgid_base_stuff[] = {
				2984	DIR("task", S_IRUGO\|S_IXUGO, proc_task_inode_operations, proc_task_operations),
				2985	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				2986	#ifdef CONFIG_CHECKPOINT_RESTORE
				2987	DIR("map_files", S_IRUSR\|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
				2988	#endif
				2989	DIR("fdinfo", S_IRUSR\|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				2990	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				2991	#ifdef CONFIG_NET
				2992	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
				2993	#endif
				2994	REG("environ", S_IRUSR, proc_environ_operations),
				2995	INF("auxv", S_IRUSR, proc_pid_auxv),
				2996	ONE("status", S_IRUGO, proc_pid_status),
				2997	ONE("personality", S_IRUGO, proc_pid_personality),
				2998	INF("limits", S_IRUGO, proc_pid_limits),
				2999	#ifdef CONFIG_SCHED_DEBUG
				3000	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				3001	#endif
				3002	#ifdef CONFIG_SCHED_AUTOGROUP
				3003	REG("autogroup", S_IRUGO\|S_IWUSR, proc_pid_sched_autogroup_operations),
				3004	#endif
				3005	REG("comm", S_IRUGO\|S_IWUSR, proc_pid_set_comm_operations),
				3006	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				3007	INF("syscall", S_IRUGO, proc_pid_syscall),
				3008	#endif
				3009	INF("cmdline", S_IRUGO, proc_pid_cmdline),
				3010	ONE("stat", S_IRUGO, proc_tgid_stat),
				3011	ONE("statm", S_IRUGO, proc_pid_statm),
				3012	REG("maps", S_IRUGO, proc_pid_maps_operations),
				3013	#ifdef CONFIG_NUMA
				3014	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
				3015	#endif
				3016	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				3017	LNK("cwd", proc_cwd_link),
				3018	LNK("root", proc_root_link),
				3019	LNK("exe", proc_exe_link),
				3020	REG("mounts", S_IRUGO, proc_mounts_operations),
				3021	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				3022	REG("mountstats", S_IRUSR, proc_mountstats_operations),
				3023	#ifdef CONFIG_PROC_PAGE_MONITOR
				3024	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				3025	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
				3026	REG("pagemap", S_IRUGO, proc_pagemap_operations),
				3027	#endif
				3028	#ifdef CONFIG_SECURITY
				3029	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				3030	#endif
				3031	#ifdef CONFIG_KALLSYMS
				3032	INF("wchan", S_IRUGO, proc_pid_wchan),
				3033	#endif
				3034	#ifdef CONFIG_STACKTRACE
				3035	ONE("stack", S_IRUGO, proc_pid_stack),
				3036	#endif
				3037	#ifdef CONFIG_SCHEDSTATS
				3038	INF("schedstat", S_IRUGO, proc_pid_schedstat),
				3039	#endif
				3040	#ifdef CONFIG_LATENCYTOP
				3041	REG("latency", S_IRUGO, proc_lstats_operations),
				3042	#endif
				3043	#ifdef CONFIG_PROC_PID_CPUSET
				3044	REG("cpuset", S_IRUGO, proc_cpuset_operations),
				3045	#endif
				3046	#ifdef CONFIG_CGROUPS
				3047	REG("cgroup", S_IRUGO, proc_cgroup_operations),
				3048	#endif
				3049	INF("oom_score", S_IRUGO, proc_oom_score),
				3050	ANDROID("oom_adj",S_IRUGO\|S_IWUSR, oom_adjust),
				3051	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3052	#ifdef CONFIG_AUDITSYSCALL
				3053	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3054	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3055	#endif
				3056	#ifdef CONFIG_FAULT_INJECTION
				3057	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3058	#endif
				3059	#ifdef CONFIG_ELF_CORE
				3060	REG("coredump_filter", S_IRUGO\|S_IWUSR, proc_coredump_filter_operations),
				3061	#endif
				3062	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3063	INF("io", S_IRUSR, proc_tgid_io_accounting),
				3064	#endif
				3065	#ifdef CONFIG_HARDWALL
				3066	INF("hardwall", S_IRUGO, proc_pid_hardwall),
				3067	#endif
				3068	};
				3069
				3070	static int proc_tgid_base_readdir(struct file * filp,
				3071	void * dirent, filldir_t filldir)
				3072	{
				3073	return proc_pident_readdir(filp,dirent,filldir,
				3074	tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
				3075	}
				3076
				3077	static const struct file_operations proc_tgid_base_operations = {
				3078	.read = generic_read_dir,
				3079	.readdir = proc_tgid_base_readdir,
				3080	.llseek = default_llseek,
				3081	};
				3082
				3083	static struct dentry proc_tgid_base_lookup(struct inode dir, struct dentry dentry, struct nameidata nd){
				3084	return proc_pident_lookup(dir, dentry,
				3085	tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
				3086	}
				3087
				3088	static const struct inode_operations proc_tgid_base_inode_operations = {
				3089	.lookup = proc_tgid_base_lookup,
				3090	.getattr = pid_getattr,
				3091	.setattr = proc_setattr,
				3092	.permission = proc_pid_permission,
				3093	};
				3094
				3095	static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
				3096	{
				3097	struct dentry dentry, leader, *dir;
				3098	char buf[PROC_NUMBUF];
				3099	struct qstr name;
				3100
				3101	name.name = buf;
				3102	name.len = snprintf(buf, sizeof(buf), "%d", pid);
				3103	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
				3104	if (dentry) {
				3105	shrink_dcache_parent(dentry);
				3106	d_drop(dentry);
				3107	dput(dentry);
				3108	}
				3109
				3110	name.name = buf;
				3111	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
				3112	leader = d_hash_and_lookup(mnt->mnt_root, &name);
				3113	if (!leader)
				3114	goto out;
				3115
				3116	name.name = "task";
				3117	name.len = strlen(name.name);
				3118	dir = d_hash_and_lookup(leader, &name);
				3119	if (!dir)
				3120	goto out_put_leader;
				3121
				3122	name.name = buf;
				3123	name.len = snprintf(buf, sizeof(buf), "%d", pid);
				3124	dentry = d_hash_and_lookup(dir, &name);
				3125	if (dentry) {
				3126	shrink_dcache_parent(dentry);
				3127	d_drop(dentry);
				3128	dput(dentry);
				3129	}
				3130
				3131	dput(dir);
				3132	out_put_leader:
				3133	dput(leader);
				3134	out:
				3135	return;
				3136	}
				3137
				3138	/**
				3139	* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
				3140	* @task: task that should be flushed.
				3141	*
				3142	* When flushing dentries from proc, one needs to flush them from global
				3143	* proc (proc_mnt) and from all the namespaces' procs this task was seen
				3144	* in. This call is supposed to do all of this job.
				3145	*
				3146	* Looks in the dcache for
				3147	* /proc/@pid
				3148	* /proc/@tgid/task/@pid
				3149	* if either directory is present flushes it and all of it'ts children
				3150	* from the dcache.
				3151	*
				3152	* It is safe and reasonable to cache /proc entries for a task until
				3153	* that task exits. After that they just clog up the dcache with
				3154	* useless entries, possibly causing useful dcache entries to be
				3155	* flushed instead. This routine is proved to flush those useless
				3156	* dcache entries at process exit time.
				3157	*
				3158	* NOTE: This routine is just an optimization so it does not guarantee
				3159	* that no dcache entries will exist at process exit time it
				3160	* just makes it very unlikely that any will persist.
				3161	*/
				3162
				3163	void proc_flush_task(struct task_struct *task)
				3164	{
				3165	int i;
				3166	struct pid pid, tgid;
				3167	struct upid *upid;
				3168
				3169	pid = task_pid(task);
				3170	tgid = task_tgid(task);
				3171
				3172	for (i = 0; i <= pid->level; i++) {
				3173	upid = &pid->numbers[i];
				3174	proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
				3175	tgid->numbers[i].nr);
				3176	}
				3177
				3178	upid = &pid->numbers[pid->level];
				3179	if (upid->nr == 1)
				3180	pid_ns_release_proc(upid->ns);
				3181	}
				3182
				3183	static struct dentry proc_pid_instantiate(struct inode dir,
				3184	struct dentry * dentry,
				3185	struct task_struct task, const void ptr)
				3186	{
				3187	struct dentry *error = ERR_PTR(-ENOENT);
				3188	struct inode *inode;
				3189
				3190	inode = proc_pid_make_inode(dir->i_sb, task);
				3191	if (!inode)
				3192	goto out;
				3193
				3194	inode->i_mode = S_IFDIR\|S_IRUGO\|S_IXUGO;
				3195	inode->i_op = &proc_tgid_base_inode_operations;
				3196	inode->i_fop = &proc_tgid_base_operations;
				3197	inode->i_flags\|=S_IMMUTABLE;
				3198
				3199	set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
				3200	ARRAY_SIZE(tgid_base_stuff)));
				3201
				3202	d_set_d_op(dentry, &pid_dentry_operations);
				3203
				3204	d_add(dentry, inode);
				3205	/* Close the race of the process dying before we return the dentry */
				3206	if (pid_revalidate(dentry, NULL))
				3207	error = NULL;
				3208	out:
				3209	return error;
				3210	}
				3211
				3212	struct dentry proc_pid_lookup(struct inode dir, struct dentry * dentry, struct nameidata *nd)
				3213	{
				3214	struct dentry *result;
				3215	struct task_struct *task;
				3216	unsigned tgid;
				3217	struct pid_namespace *ns;
				3218
				3219	result = proc_base_lookup(dir, dentry);
				3220	if (!IS_ERR(result) \|\| PTR_ERR(result) != -ENOENT)
				3221	goto out;
				3222
				3223	tgid = name_to_int(dentry);
				3224	if (tgid == ~0U)
				3225	goto out;
				3226
				3227	ns = dentry->d_sb->s_fs_info;
				3228	rcu_read_lock();
				3229	task = find_task_by_pid_ns(tgid, ns);
				3230	if (task)
				3231	get_task_struct(task);
				3232	rcu_read_unlock();
				3233	if (!task)
				3234	goto out;
				3235
				3236	result = proc_pid_instantiate(dir, dentry, task, NULL);
				3237	put_task_struct(task);
				3238	out:
				3239	return result;
				3240	}
				3241
				3242	/*
				3243	* Find the first task with tgid >= tgid
				3244	*
				3245	*/
				3246	struct tgid_iter {
				3247	unsigned int tgid;
				3248	struct task_struct *task;
				3249	};
				3250	static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
				3251	{
				3252	struct pid *pid;
				3253
				3254	if (iter.task)
				3255	put_task_struct(iter.task);
				3256	rcu_read_lock();
				3257	retry:
				3258	iter.task = NULL;
				3259	pid = find_ge_pid(iter.tgid, ns);
				3260	if (pid) {
				3261	iter.tgid = pid_nr_ns(pid, ns);
				3262	iter.task = pid_task(pid, PIDTYPE_PID);
				3263	/* What we to know is if the pid we have find is the
				3264	* pid of a thread_group_leader. Testing for task
				3265	* being a thread_group_leader is the obvious thing
				3266	* todo but there is a window when it fails, due to
				3267	* the pid transfer logic in de_thread.
				3268	*
				3269	* So we perform the straight forward test of seeing
				3270	* if the pid we have found is the pid of a thread
				3271	* group leader, and don't worry if the task we have
				3272	* found doesn't happen to be a thread group leader.
				3273	* As we don't care in the case of readdir.
				3274	*/
				3275	if (!iter.task \|\| !has_group_leader_pid(iter.task)) {
				3276	iter.tgid += 1;
				3277	goto retry;
				3278	}
				3279	get_task_struct(iter.task);
				3280	}
				3281	rcu_read_unlock();
				3282	return iter;
				3283	}
				3284
				3285	#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
				3286
				3287	static int proc_pid_fill_cache(struct file filp, void dirent, filldir_t filldir,
				3288	struct tgid_iter iter)
				3289	{
				3290	char name[PROC_NUMBUF];
				3291	int len = snprintf(name, sizeof(name), "%d", iter.tgid);
				3292	return proc_fill_cache(filp, dirent, filldir, name, len,
				3293	proc_pid_instantiate, iter.task, NULL);
				3294	}
				3295
				3296	static int fake_filldir(void buf, const char name, int namelen,
				3297	loff_t offset, u64 ino, unsigned d_type)
				3298	{
				3299	return 0;
				3300	}
				3301
				3302	/* for the /proc/ directory itself, after non-process stuff has been done */
				3303	int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
				3304	{
				3305	unsigned int nr;
				3306	struct task_struct *reaper;
				3307	struct tgid_iter iter;
				3308	struct pid_namespace *ns;
				3309	filldir_t __filldir;
				3310
				3311	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
				3312	goto out_no_task;
				3313	nr = filp->f_pos - FIRST_PROCESS_ENTRY;
				3314
				3315	reaper = get_proc_task(filp->f_path.dentry->d_inode);
				3316	if (!reaper)
				3317	goto out_no_task;
				3318
				3319	for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
				3320	const struct pid_entry *p = &proc_base_stuff[nr];
				3321	if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
				3322	goto out;
				3323	}
				3324
				3325	ns = filp->f_dentry->d_sb->s_fs_info;
				3326	iter.task = NULL;
				3327	iter.tgid = filp->f_pos - TGID_OFFSET;
				3328	for (iter = next_tgid(ns, iter);
				3329	iter.task;
				3330	iter.tgid += 1, iter = next_tgid(ns, iter)) {
				3331	if (has_pid_permissions(ns, iter.task, 2))
				3332	__filldir = filldir;
				3333	else
				3334	__filldir = fake_filldir;
				3335
				3336	filp->f_pos = iter.tgid + TGID_OFFSET;
				3337	if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
				3338	put_task_struct(iter.task);
				3339	goto out;
				3340	}
				3341	}
				3342	filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
				3343	out:
				3344	put_task_struct(reaper);
				3345	out_no_task:
				3346	return 0;
				3347	}
				3348
				3349	/*
				3350	* Tasks
				3351	*/
				3352	static const struct pid_entry tid_base_stuff[] = {
				3353	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
				3354	DIR("fdinfo", S_IRUSR\|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
				3355	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
				3356	REG("environ", S_IRUSR, proc_environ_operations),
				3357	INF("auxv", S_IRUSR, proc_pid_auxv),
				3358	ONE("status", S_IRUGO, proc_pid_status),
				3359	ONE("personality", S_IRUGO, proc_pid_personality),
				3360	INF("limits", S_IRUGO, proc_pid_limits),
				3361	#ifdef CONFIG_SCHED_DEBUG
				3362	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
				3363	#endif
				3364	REG("comm", S_IRUGO\|S_IWUSR, proc_pid_set_comm_operations),
				3365	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
				3366	INF("syscall", S_IRUGO, proc_pid_syscall),
				3367	#endif
				3368	INF("cmdline", S_IRUGO, proc_pid_cmdline),
				3369	ONE("stat", S_IRUGO, proc_tid_stat),
				3370	ONE("statm", S_IRUGO, proc_pid_statm),
				3371	REG("maps", S_IRUGO, proc_tid_maps_operations),
				3372	#ifdef CONFIG_NUMA
				3373	REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
				3374	#endif
				3375	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
				3376	LNK("cwd", proc_cwd_link),
				3377	LNK("root", proc_root_link),
				3378	LNK("exe", proc_exe_link),
				3379	REG("mounts", S_IRUGO, proc_mounts_operations),
				3380	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
				3381	#ifdef CONFIG_PROC_PAGE_MONITOR
				3382	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
				3383	REG("smaps", S_IRUGO, proc_tid_smaps_operations),
				3384	REG("pagemap", S_IRUGO, proc_pagemap_operations),
				3385	#endif
				3386	#ifdef CONFIG_SECURITY
				3387	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
				3388	#endif
				3389	#ifdef CONFIG_KALLSYMS
				3390	INF("wchan", S_IRUGO, proc_pid_wchan),
				3391	#endif
				3392	#ifdef CONFIG_STACKTRACE
				3393	ONE("stack", S_IRUGO, proc_pid_stack),
				3394	#endif
				3395	#ifdef CONFIG_SCHEDSTATS
				3396	INF("schedstat", S_IRUGO, proc_pid_schedstat),
				3397	#endif
				3398	#ifdef CONFIG_LATENCYTOP
				3399	REG("latency", S_IRUGO, proc_lstats_operations),
				3400	#endif
				3401	#ifdef CONFIG_PROC_PID_CPUSET
				3402	REG("cpuset", S_IRUGO, proc_cpuset_operations),
				3403	#endif
				3404	#ifdef CONFIG_CGROUPS
				3405	REG("cgroup", S_IRUGO, proc_cgroup_operations),
				3406	#endif
				3407	INF("oom_score", S_IRUGO, proc_oom_score),
				3408	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adjust_operations),
				3409	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
				3410	#ifdef CONFIG_AUDITSYSCALL
				3411	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
				3412	REG("sessionid", S_IRUGO, proc_sessionid_operations),
				3413	#endif
				3414	#ifdef CONFIG_FAULT_INJECTION
				3415	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
				3416	#endif
				3417	#ifdef CONFIG_TASK_IO_ACCOUNTING
				3418	INF("io", S_IRUSR, proc_tid_io_accounting),
				3419	#endif
				3420	#ifdef CONFIG_HARDWALL
				3421	INF("hardwall", S_IRUGO, proc_pid_hardwall),
				3422	#endif
				3423	};
				3424
				3425	static int proc_tid_base_readdir(struct file * filp,
				3426	void * dirent, filldir_t filldir)
				3427	{
				3428	return proc_pident_readdir(filp,dirent,filldir,
				3429	tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
				3430	}
				3431
				3432	static struct dentry proc_tid_base_lookup(struct inode dir, struct dentry dentry, struct nameidata nd){
				3433	return proc_pident_lookup(dir, dentry,
				3434	tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
				3435	}
				3436
				3437	static const struct file_operations proc_tid_base_operations = {
				3438	.read = generic_read_dir,
				3439	.readdir = proc_tid_base_readdir,
				3440	.llseek = default_llseek,
				3441	};
				3442
				3443	static const struct inode_operations proc_tid_base_inode_operations = {
				3444	.lookup = proc_tid_base_lookup,
				3445	.getattr = pid_getattr,
				3446	.setattr = proc_setattr,
				3447	};
				3448
				3449	static struct dentry proc_task_instantiate(struct inode dir,
				3450	struct dentry dentry, struct task_struct task, const void *ptr)
				3451	{
				3452	struct dentry *error = ERR_PTR(-ENOENT);
				3453	struct inode *inode;
				3454	inode = proc_pid_make_inode(dir->i_sb, task);
				3455
				3456	if (!inode)
				3457	goto out;
				3458	inode->i_mode = S_IFDIR\|S_IRUGO\|S_IXUGO;
				3459	inode->i_op = &proc_tid_base_inode_operations;
				3460	inode->i_fop = &proc_tid_base_operations;
				3461	inode->i_flags\|=S_IMMUTABLE;
				3462
				3463	set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
				3464	ARRAY_SIZE(tid_base_stuff)));
				3465
				3466	d_set_d_op(dentry, &pid_dentry_operations);
				3467
				3468	d_add(dentry, inode);
				3469	/* Close the race of the process dying before we return the dentry */
				3470	if (pid_revalidate(dentry, NULL))
				3471	error = NULL;
				3472	out:
				3473	return error;
				3474	}
				3475
				3476	static struct dentry proc_task_lookup(struct inode dir, struct dentry * dentry, struct nameidata *nd)
				3477	{
				3478	struct dentry *result = ERR_PTR(-ENOENT);
				3479	struct task_struct *task;
				3480	struct task_struct *leader = get_proc_task(dir);
				3481	unsigned tid;
				3482	struct pid_namespace *ns;
				3483
				3484	if (!leader)
				3485	goto out_no_task;
				3486
				3487	tid = name_to_int(dentry);
				3488	if (tid == ~0U)
				3489	goto out;
				3490
				3491	ns = dentry->d_sb->s_fs_info;
				3492	rcu_read_lock();
				3493	task = find_task_by_pid_ns(tid, ns);
				3494	if (task)
				3495	get_task_struct(task);
				3496	rcu_read_unlock();
				3497	if (!task)
				3498	goto out;
				3499	if (!same_thread_group(leader, task))
				3500	goto out_drop_task;
				3501
				3502	result = proc_task_instantiate(dir, dentry, task, NULL);
				3503	out_drop_task:
				3504	put_task_struct(task);
				3505	out:
				3506	put_task_struct(leader);
				3507	out_no_task:
				3508	return result;
				3509	}
				3510
				3511	/*
				3512	* Find the first tid of a thread group to return to user space.
				3513	*
				3514	* Usually this is just the thread group leader, but if the users
				3515	* buffer was too small or there was a seek into the middle of the
				3516	* directory we have more work todo.
				3517	*
				3518	* In the case of a short read we start with find_task_by_pid.
				3519	*
				3520	* In the case of a seek we start with the leader and walk nr
				3521	* threads past it.
				3522	*/
				3523	static struct task_struct first_tid(struct task_struct leader,
				3524	int tid, int nr, struct pid_namespace *ns)
				3525	{
				3526	struct task_struct *pos;
				3527
				3528	rcu_read_lock();
				3529	/* Attempt to start with the pid of a thread */
				3530	if (tid && (nr > 0)) {
				3531	pos = find_task_by_pid_ns(tid, ns);
				3532	if (pos && (pos->group_leader == leader))
				3533	goto found;
				3534	}
				3535
				3536	/* If nr exceeds the number of threads there is nothing todo */
				3537	pos = NULL;
				3538	if (nr && nr >= get_nr_threads(leader))
				3539	goto out;
				3540
				3541	/* If we haven't found our starting place yet start
				3542	* with the leader and walk nr threads forward.
				3543	*/
				3544	for (pos = leader; nr > 0; --nr) {
				3545	pos = next_thread(pos);
				3546	if (pos == leader) {
				3547	pos = NULL;
				3548	goto out;
				3549	}
				3550	}
				3551	found:
				3552	get_task_struct(pos);
				3553	out:
				3554	rcu_read_unlock();
				3555	return pos;
				3556	}
				3557
				3558	/*
				3559	* Find the next thread in the thread list.
				3560	* Return NULL if there is an error or no next thread.
				3561	*
				3562	* The reference to the input task_struct is released.
				3563	*/
				3564	static struct task_struct next_tid(struct task_struct start)
				3565	{
				3566	struct task_struct *pos = NULL;
				3567	rcu_read_lock();
				3568	if (pid_alive(start)) {
				3569	pos = next_thread(start);
				3570	if (thread_group_leader(pos))
				3571	pos = NULL;
				3572	else
				3573	get_task_struct(pos);
				3574	}
				3575	rcu_read_unlock();
				3576	put_task_struct(start);
				3577	return pos;
				3578	}
				3579
				3580	static int proc_task_fill_cache(struct file filp, void dirent, filldir_t filldir,
				3581	struct task_struct *task, int tid)
				3582	{
				3583	char name[PROC_NUMBUF];
				3584	int len = snprintf(name, sizeof(name), "%d", tid);
				3585	return proc_fill_cache(filp, dirent, filldir, name, len,
				3586	proc_task_instantiate, task, NULL);
				3587	}
				3588
				3589	/* for the /proc/TGID/task/ directories */
				3590	static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
				3591	{
				3592	struct dentry *dentry = filp->f_path.dentry;
				3593	struct inode *inode = dentry->d_inode;
				3594	struct task_struct *leader = NULL;
				3595	struct task_struct *task;
				3596	int retval = -ENOENT;
				3597	ino_t ino;
				3598	int tid;
				3599	struct pid_namespace *ns;
				3600
				3601	task = get_proc_task(inode);
				3602	if (!task)
				3603	goto out_no_task;
				3604	rcu_read_lock();
				3605	if (pid_alive(task)) {
				3606	leader = task->group_leader;
				3607	get_task_struct(leader);
				3608	}
				3609	rcu_read_unlock();
				3610	put_task_struct(task);
				3611	if (!leader)
				3612	goto out_no_task;
				3613	retval = 0;
				3614
				3615	switch ((unsigned long)filp->f_pos) {
				3616	case 0:
				3617	ino = inode->i_ino;
				3618	if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
				3619	goto out;
				3620	filp->f_pos++;
				3621	/* fall through */
				3622	case 1:
				3623	ino = parent_ino(dentry);
				3624	if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
				3625	goto out;
				3626	filp->f_pos++;
				3627	/* fall through */
				3628	}
				3629
				3630	/* f_version caches the tgid value that the last readdir call couldn't
				3631	* return. lseek aka telldir automagically resets f_version to 0.
				3632	*/
				3633	ns = filp->f_dentry->d_sb->s_fs_info;
				3634	tid = (int)filp->f_version;
				3635	filp->f_version = 0;
				3636	for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
				3637	task;
				3638	task = next_tid(task), filp->f_pos++) {
				3639	tid = task_pid_nr_ns(task, ns);
				3640	if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
				3641	/* returning this tgid failed, save it as the first
				3642	* pid for the next readir call */
				3643	filp->f_version = (u64)tid;
				3644	put_task_struct(task);
				3645	break;
				3646	}
				3647	}
				3648	out:
				3649	put_task_struct(leader);
				3650	out_no_task:
				3651	return retval;
				3652	}
				3653
				3654	static int proc_task_getattr(struct vfsmount mnt, struct dentry dentry, struct kstat *stat)
				3655	{
				3656	struct inode *inode = dentry->d_inode;
				3657	struct task_struct *p = get_proc_task(inode);
				3658	generic_fillattr(inode, stat);
				3659
				3660	if (p) {
				3661	stat->nlink += get_nr_threads(p);
				3662	put_task_struct(p);
				3663	}
				3664
				3665	return 0;
				3666	}
				3667
				3668	static const struct inode_operations proc_task_inode_operations = {
				3669	.lookup = proc_task_lookup,
				3670	.getattr = proc_task_getattr,
				3671	.setattr = proc_setattr,
				3672	.permission = proc_pid_permission,
				3673	};
				3674
				3675	static const struct file_operations proc_task_operations = {
				3676	.read = generic_read_dir,
				3677	.readdir = proc_task_readdir,
				3678	.llseek = default_llseek,
				3679	};