Blame - marvell/linux/fs/exec.c - T108

blob: 5dffc67745c80943a32c34ac3af3721b060885ca [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* linux/fs/exec.c
				4	*
				5	* Copyright (C) 1991, 1992 Linus Torvalds
				6	*/
				7
				8	/*
				9	* #!-checking implemented by tytso.
				10	*/
				11	/*
				12	* Demand-loading implemented 01.12.91 - no need to read anything but
				13	* the header into memory. The inode of the executable is put into
				14	* "current->executable", and page faults do the actual loading. Clean.
				15	*
				16	* Once more I can proudly say that linux stood up to being changed: it
				17	* was less than 2 hours work to get demand-loading completely implemented.
				18	*
				19	* Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
				20	* current->executable is only used by the procfs. This allows a dispatch
				21	* table to check for several different types of binary formats. We keep
				22	* trying until we recognize the file or we run out of supported binary
				23	* formats.
				24	*/
				25
				26	#include <linux/slab.h>
				27	#include <linux/file.h>
				28	#include <linux/fdtable.h>
				29	#include <linux/mm.h>
				30	#include <linux/vmacache.h>
				31	#include <linux/stat.h>
				32	#include <linux/fcntl.h>
				33	#include <linux/swap.h>
				34	#include <linux/string.h>
				35	#include <linux/init.h>
				36	#include <linux/sched/mm.h>
				37	#include <linux/sched/coredump.h>
				38	#include <linux/sched/signal.h>
				39	#include <linux/sched/numa_balancing.h>
				40	#include <linux/sched/task.h>
				41	#include <linux/pagemap.h>
				42	#include <linux/perf_event.h>
				43	#include <linux/highmem.h>
				44	#include <linux/spinlock.h>
				45	#include <linux/key.h>
				46	#include <linux/personality.h>
				47	#include <linux/binfmts.h>
				48	#include <linux/utsname.h>
				49	#include <linux/pid_namespace.h>
				50	#include <linux/module.h>
				51	#include <linux/namei.h>
				52	#include <linux/mount.h>
				53	#include <linux/security.h>
				54	#include <linux/syscalls.h>
				55	#include <linux/tsacct_kern.h>
				56	#include <linux/cn_proc.h>
				57	#include <linux/audit.h>
				58	#include <linux/tracehook.h>
				59	#include <linux/kmod.h>
				60	#include <linux/fsnotify.h>
				61	#include <linux/fs_struct.h>
				62	#include <linux/pipe_fs_i.h>
				63	#include <linux/oom.h>
				64	#include <linux/compat.h>
				65	#include <linux/vmalloc.h>
				66
				67	#include <linux/uaccess.h>
				68	#include <asm/mmu_context.h>
				69	#include <asm/tlb.h>
				70
				71	#include <trace/events/task.h>
				72	#include "internal.h"
				73
				74	#include <trace/events/sched.h>
				75
				76	int suid_dumpable = 0;
				77
				78	static LIST_HEAD(formats);
				79	static DEFINE_RWLOCK(binfmt_lock);
				80
				81	void __register_binfmt(struct linux_binfmt * fmt, int insert)
				82	{
				83	BUG_ON(!fmt);
				84	if (WARN_ON(!fmt->load_binary))
				85	return;
				86	write_lock(&binfmt_lock);
				87	insert ? list_add(&fmt->lh, &formats) :
				88	list_add_tail(&fmt->lh, &formats);
				89	write_unlock(&binfmt_lock);
				90	}
				91
				92	EXPORT_SYMBOL(__register_binfmt);
				93
				94	void unregister_binfmt(struct linux_binfmt * fmt)
				95	{
				96	write_lock(&binfmt_lock);
				97	list_del(&fmt->lh);
				98	write_unlock(&binfmt_lock);
				99	}
				100
				101	EXPORT_SYMBOL(unregister_binfmt);
				102
				103	static inline void put_binfmt(struct linux_binfmt * fmt)
				104	{
				105	module_put(fmt->module);
				106	}
				107
				108	bool path_noexec(const struct path *path)
				109	{
				110	return (path->mnt->mnt_flags & MNT_NOEXEC) \|\|
				111	(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
				112	}
				113
				114	#ifdef CONFIG_USELIB
				115	/*
				116	* Note that a shared library must be both readable and executable due to
				117	* security reasons.
				118	*
				119	* Also note that we take the address to load from from the file itself.
				120	*/
				121	SYSCALL_DEFINE1(uselib, const char __user *, library)
				122	{
				123	struct linux_binfmt *fmt;
				124	struct file *file;
				125	struct filename *tmp = getname(library);
				126	int error = PTR_ERR(tmp);
				127	static const struct open_flags uselib_flags = {
				128	.open_flag = O_LARGEFILE \| O_RDONLY \| __FMODE_EXEC,
				129	.acc_mode = MAY_READ \| MAY_EXEC,
				130	.intent = LOOKUP_OPEN,
				131	.lookup_flags = LOOKUP_FOLLOW,
				132	};
				133
				134	if (IS_ERR(tmp))
				135	goto out;
				136
				137	file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
				138	putname(tmp);
				139	error = PTR_ERR(file);
				140	if (IS_ERR(file))
				141	goto out;
				142
				143	error = -EINVAL;
				144	if (!S_ISREG(file_inode(file)->i_mode))
				145	goto exit;
				146
				147	error = -EACCES;
				148	if (path_noexec(&file->f_path))
				149	goto exit;
				150
				151	fsnotify_open(file);
				152
				153	error = -ENOEXEC;
				154
				155	read_lock(&binfmt_lock);
				156	list_for_each_entry(fmt, &formats, lh) {
				157	if (!fmt->load_shlib)
				158	continue;
				159	if (!try_module_get(fmt->module))
				160	continue;
				161	read_unlock(&binfmt_lock);
				162	error = fmt->load_shlib(file);
				163	read_lock(&binfmt_lock);
				164	put_binfmt(fmt);
				165	if (error != -ENOEXEC)
				166	break;
				167	}
				168	read_unlock(&binfmt_lock);
				169	exit:
				170	fput(file);
				171	out:
				172	return error;
				173	}
				174	#endif /* #ifdef CONFIG_USELIB */
				175
				176	#ifdef CONFIG_MMU
				177	/*
				178	* The nascent bprm->mm is not visible until exec_mmap() but it can
				179	* use a lot of memory, account these pages in current->mm temporary
				180	* for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
				181	* change the counter back via acct_arg_size(0).
				182	*/
				183	static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
				184	{
				185	struct mm_struct *mm = current->mm;
				186	long diff = (long)(pages - bprm->vma_pages);
				187
				188	if (!mm \|\| !diff)
				189	return;
				190
				191	bprm->vma_pages = pages;
				192	add_mm_counter(mm, MM_ANONPAGES, diff);
				193	}
				194
				195	static struct page get_arg_page(struct linux_binprm bprm, unsigned long pos,
				196	int write)
				197	{
				198	struct page *page;
				199	int ret;
				200	unsigned int gup_flags = FOLL_FORCE;
				201
				202	#ifdef CONFIG_STACK_GROWSUP
				203	if (write) {
				204	ret = expand_downwards(bprm->vma, pos);
				205	if (ret < 0)
				206	return NULL;
				207	}
				208	#endif
				209
				210	if (write)
				211	gup_flags \|= FOLL_WRITE;
				212
				213	/*
				214	* We are doing an exec(). 'current' is the process
				215	* doing the exec and bprm->mm is the new process's mm.
				216	*/
				217	ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
				218	&page, NULL, NULL);
				219	if (ret <= 0)
				220	return NULL;
				221
				222	if (write)
				223	acct_arg_size(bprm, vma_pages(bprm->vma));
				224
				225	return page;
				226	}
				227
				228	static void put_arg_page(struct page *page)
				229	{
				230	put_page(page);
				231	}
				232
				233	static void free_arg_pages(struct linux_binprm *bprm)
				234	{
				235	}
				236
				237	static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
				238	struct page *page)
				239	{
				240	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
				241	}
				242
				243	static int __bprm_mm_init(struct linux_binprm *bprm)
				244	{
				245	int err;
				246	struct vm_area_struct *vma = NULL;
				247	struct mm_struct *mm = bprm->mm;
				248
				249	bprm->vma = vma = vm_area_alloc(mm);
				250	if (!vma)
				251	return -ENOMEM;
				252	vma_set_anonymous(vma);
				253
				254	if (down_write_killable(&mm->mmap_sem)) {
				255	err = -EINTR;
				256	goto err_free;
				257	}
				258
				259	/*
				260	* Place the stack at the largest stack address the architecture
				261	* supports. Later, we'll move this to an appropriate place. We don't
				262	* use STACK_TOP because that can depend on attributes which aren't
				263	* configured yet.
				264	*/
				265	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
				266	vma->vm_end = STACK_TOP_MAX;
				267	vma->vm_start = vma->vm_end - PAGE_SIZE;
				268	vma->vm_flags = VM_SOFTDIRTY \| VM_STACK_FLAGS \| VM_STACK_INCOMPLETE_SETUP;
				269	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
				270
				271	err = insert_vm_struct(mm, vma);
				272	if (err)
				273	goto err;
				274
				275	mm->stack_vm = mm->total_vm = 1;
				276	arch_bprm_mm_init(mm, vma);
				277	up_write(&mm->mmap_sem);
				278	bprm->p = vma->vm_end - sizeof(void *);
				279	return 0;
				280	err:
				281	up_write(&mm->mmap_sem);
				282	err_free:
				283	bprm->vma = NULL;
				284	vm_area_free(vma);
				285	return err;
				286	}
				287
				288	static bool valid_arg_len(struct linux_binprm *bprm, long len)
				289	{
				290	return len <= MAX_ARG_STRLEN;
				291	}
				292
				293	#else
				294
				295	static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
				296	{
				297	}
				298
				299	static struct page get_arg_page(struct linux_binprm bprm, unsigned long pos,
				300	int write)
				301	{
				302	struct page *page;
				303
				304	page = bprm->page[pos / PAGE_SIZE];
				305	if (!page && write) {
				306	page = alloc_page(GFP_HIGHUSER\|__GFP_ZERO);
				307	if (!page)
				308	return NULL;
				309	bprm->page[pos / PAGE_SIZE] = page;
				310	}
				311
				312	return page;
				313	}
				314
				315	static void put_arg_page(struct page *page)
				316	{
				317	}
				318
				319	static void free_arg_page(struct linux_binprm *bprm, int i)
				320	{
				321	if (bprm->page[i]) {
				322	__free_page(bprm->page[i]);
				323	bprm->page[i] = NULL;
				324	}
				325	}
				326
				327	static void free_arg_pages(struct linux_binprm *bprm)
				328	{
				329	int i;
				330
				331	for (i = 0; i < MAX_ARG_PAGES; i++)
				332	free_arg_page(bprm, i);
				333	}
				334
				335	static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
				336	struct page *page)
				337	{
				338	}
				339
				340	static int __bprm_mm_init(struct linux_binprm *bprm)
				341	{
				342	bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
				343	return 0;
				344	}
				345
				346	static bool valid_arg_len(struct linux_binprm *bprm, long len)
				347	{
				348	return len <= bprm->p;
				349	}
				350
				351	#endif /* CONFIG_MMU */
				352
				353	/*
				354	* Create a new mm_struct and populate it with a temporary stack
				355	* vm_area_struct. We don't have enough context at this point to set the stack
				356	* flags, permissions, and offset, so we use temporary values. We'll update
				357	* them later in setup_arg_pages().
				358	*/
				359	static int bprm_mm_init(struct linux_binprm *bprm)
				360	{
				361	int err;
				362	struct mm_struct *mm = NULL;
				363
				364	bprm->mm = mm = mm_alloc();
				365	err = -ENOMEM;
				366	if (!mm)
				367	goto err;
				368
				369	/* Save current stack limit for all calculations made during exec. */
				370	task_lock(current->group_leader);
				371	bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
				372	task_unlock(current->group_leader);
				373
				374	err = __bprm_mm_init(bprm);
				375	if (err)
				376	goto err;
				377
				378	return 0;
				379
				380	err:
				381	if (mm) {
				382	bprm->mm = NULL;
				383	mmdrop(mm);
				384	}
				385
				386	return err;
				387	}
				388
				389	struct user_arg_ptr {
				390	#ifdef CONFIG_COMPAT
				391	bool is_compat;
				392	#endif
				393	union {
				394	const char __user const __user native;
				395	#ifdef CONFIG_COMPAT
				396	const compat_uptr_t __user *compat;
				397	#endif
				398	} ptr;
				399	};
				400
				401	static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
				402	{
				403	const char __user *native;
				404
				405	#ifdef CONFIG_COMPAT
				406	if (unlikely(argv.is_compat)) {
				407	compat_uptr_t compat;
				408
				409	if (get_user(compat, argv.ptr.compat + nr))
				410	return ERR_PTR(-EFAULT);
				411
				412	return compat_ptr(compat);
				413	}
				414	#endif
				415
				416	if (get_user(native, argv.ptr.native + nr))
				417	return ERR_PTR(-EFAULT);
				418
				419	return native;
				420	}
				421
				422	/*
				423	* count() counts the number of strings in array ARGV.
				424	*/
				425	static int count(struct user_arg_ptr argv, int max)
				426	{
				427	int i = 0;
				428
				429	if (argv.ptr.native != NULL) {
				430	for (;;) {
				431	const char __user *p = get_user_arg_ptr(argv, i);
				432
				433	if (!p)
				434	break;
				435
				436	if (IS_ERR(p))
				437	return -EFAULT;
				438
				439	if (i >= max)
				440	return -E2BIG;
				441	++i;
				442
				443	if (fatal_signal_pending(current))
				444	return -ERESTARTNOHAND;
				445	cond_resched();
				446	}
				447	}
				448	return i;
				449	}
				450
				451	static int prepare_arg_pages(struct linux_binprm *bprm,
				452	struct user_arg_ptr argv, struct user_arg_ptr envp)
				453	{
				454	unsigned long limit, ptr_size;
				455
				456	bprm->argc = count(argv, MAX_ARG_STRINGS);
				457	if (bprm->argc == 0)
				458	pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
				459	current->comm, bprm->filename);
				460	if (bprm->argc < 0)
				461	return bprm->argc;
				462
				463	bprm->envc = count(envp, MAX_ARG_STRINGS);
				464	if (bprm->envc < 0)
				465	return bprm->envc;
				466
				467	/*
				468	* Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
				469	* (whichever is smaller) for the argv+env strings.
				470	* This ensures that:
				471	* - the remaining binfmt code will not run out of stack space,
				472	* - the program will have a reasonable amount of stack left
				473	* to work from.
				474	*/
				475	limit = _STK_LIM / 4 * 3;
				476	limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
				477	/*
				478	* We've historically supported up to 32 pages (ARG_MAX)
				479	* of argument strings even with small stacks
				480	*/
				481	limit = max_t(unsigned long, limit, ARG_MAX);
				482	/*
				483	* We must account for the size of all the argv and envp pointers to
				484	* the argv and envp strings, since they will also take up space in
				485	* the stack. They aren't stored until much later when we can't
				486	* signal to the parent that the child has run out of stack space.
				487	* Instead, calculate it here so it's possible to fail gracefully.
				488	*
				489	* In the case of argc = 0, make sure there is space for adding a
				490	* empty string (which will bump argc to 1), to ensure confused
				491	* userspace programs don't start processing from argv[1], thinking
				492	* argc can never be 0, to keep them from walking envp by accident.
				493	* See do_execveat_common().
				494	*/
				495	ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
				496	if (limit <= ptr_size)
				497	return -E2BIG;
				498	limit -= ptr_size;
				499
				500	bprm->argmin = bprm->p - limit;
				501	return 0;
				502	}
				503
				504	/*
				505	* 'copy_strings()' copies argument/environment strings from the old
				506	* processes's memory to the new process's stack. The call to get_user_pages()
				507	* ensures the destination page is created and not swapped out.
				508	*/
				509	static int copy_strings(int argc, struct user_arg_ptr argv,
				510	struct linux_binprm *bprm)
				511	{
				512	struct page *kmapped_page = NULL;
				513	char *kaddr = NULL;
				514	unsigned long kpos = 0;
				515	int ret;
				516
				517	while (argc-- > 0) {
				518	const char __user *str;
				519	int len;
				520	unsigned long pos;
				521
				522	ret = -EFAULT;
				523	str = get_user_arg_ptr(argv, argc);
				524	if (IS_ERR(str))
				525	goto out;
				526
				527	len = strnlen_user(str, MAX_ARG_STRLEN);
				528	if (!len)
				529	goto out;
				530
				531	ret = -E2BIG;
				532	if (!valid_arg_len(bprm, len))
				533	goto out;
				534
				535	/* We're going to work our way backwords. */
				536	pos = bprm->p;
				537	str += len;
				538	bprm->p -= len;
				539	#ifdef CONFIG_MMU
				540	if (bprm->p < bprm->argmin)
				541	goto out;
				542	#endif
				543
				544	while (len > 0) {
				545	int offset, bytes_to_copy;
				546
				547	if (fatal_signal_pending(current)) {
				548	ret = -ERESTARTNOHAND;
				549	goto out;
				550	}
				551	cond_resched();
				552
				553	offset = pos % PAGE_SIZE;
				554	if (offset == 0)
				555	offset = PAGE_SIZE;
				556
				557	bytes_to_copy = offset;
				558	if (bytes_to_copy > len)
				559	bytes_to_copy = len;
				560
				561	offset -= bytes_to_copy;
				562	pos -= bytes_to_copy;
				563	str -= bytes_to_copy;
				564	len -= bytes_to_copy;
				565
				566	if (!kmapped_page \|\| kpos != (pos & PAGE_MASK)) {
				567	struct page *page;
				568
				569	page = get_arg_page(bprm, pos, 1);
				570	if (!page) {
				571	ret = -E2BIG;
				572	goto out;
				573	}
				574
				575	if (kmapped_page) {
				576	flush_kernel_dcache_page(kmapped_page);
				577	kunmap(kmapped_page);
				578	put_arg_page(kmapped_page);
				579	}
				580	kmapped_page = page;
				581	kaddr = kmap(kmapped_page);
				582	kpos = pos & PAGE_MASK;
				583	flush_arg_page(bprm, kpos, kmapped_page);
				584	}
				585	if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
				586	ret = -EFAULT;
				587	goto out;
				588	}
				589	}
				590	}
				591	ret = 0;
				592	out:
				593	if (kmapped_page) {
				594	flush_kernel_dcache_page(kmapped_page);
				595	kunmap(kmapped_page);
				596	put_arg_page(kmapped_page);
				597	}
				598	return ret;
				599	}
				600
				601	/*
				602	* Like copy_strings, but get argv and its values from kernel memory.
				603	*/
				604	int copy_strings_kernel(int argc, const char const __argv,
				605	struct linux_binprm *bprm)
				606	{
				607	int r;
				608	mm_segment_t oldfs = get_fs();
				609	struct user_arg_ptr argv = {
				610	.ptr.native = (const char __user const __user )__argv,
				611	};
				612
				613	set_fs(KERNEL_DS);
				614	r = copy_strings(argc, argv, bprm);
				615	set_fs(oldfs);
				616
				617	return r;
				618	}
				619	EXPORT_SYMBOL(copy_strings_kernel);
				620
				621	#ifdef CONFIG_MMU
				622
				623	/*
				624	* During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
				625	* the binfmt code determines where the new stack should reside, we shift it to
				626	* its final location. The process proceeds as follows:
				627	*
				628	* 1) Use shift to calculate the new vma endpoints.
				629	* 2) Extend vma to cover both the old and new ranges. This ensures the
				630	* arguments passed to subsequent functions are consistent.
				631	* 3) Move vma's page tables to the new range.
				632	* 4) Free up any cleared pgd range.
				633	* 5) Shrink the vma to cover only the new range.
				634	*/
				635	static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
				636	{
				637	struct mm_struct *mm = vma->vm_mm;
				638	unsigned long old_start = vma->vm_start;
				639	unsigned long old_end = vma->vm_end;
				640	unsigned long length = old_end - old_start;
				641	unsigned long new_start = old_start - shift;
				642	unsigned long new_end = old_end - shift;
				643	struct mmu_gather tlb;
				644
				645	BUG_ON(new_start > new_end);
				646
				647	/*
				648	* ensure there are no vmas between where we want to go
				649	* and where we are
				650	*/
				651	if (vma != find_vma(mm, new_start))
				652	return -EFAULT;
				653
				654	/*
				655	* cover the whole range: [new_start, old_end)
				656	*/
				657	if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
				658	return -ENOMEM;
				659
				660	/*
				661	* move the page tables downwards, on failure we rely on
				662	* process cleanup to remove whatever mess we made.
				663	*/
				664	if (length != move_page_tables(vma, old_start,
				665	vma, new_start, length, false))
				666	return -ENOMEM;
				667
				668	lru_add_drain();
				669	tlb_gather_mmu(&tlb, mm, old_start, old_end);
				670	if (new_end > old_start) {
				671	/*
				672	* when the old and new regions overlap clear from new_end.
				673	*/
				674	free_pgd_range(&tlb, new_end, old_end, new_end,
				675	vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
				676	} else {
				677	/*
				678	* otherwise, clean from old_start; this is done to not touch
				679	* the address space in [new_end, old_start) some architectures
				680	* have constraints on va-space that make this illegal (IA64) -
				681	* for the others its just a little faster.
				682	*/
				683	free_pgd_range(&tlb, old_start, old_end, new_end,
				684	vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
				685	}
				686	tlb_finish_mmu(&tlb, old_start, old_end);
				687
				688	/*
				689	* Shrink the vma to just the new range. Always succeeds.
				690	*/
				691	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
				692
				693	return 0;
				694	}
				695
				696	/*
				697	* Finalizes the stack vm_area_struct. The flags and permissions are updated,
				698	* the stack is optionally relocated, and some extra space is added.
				699	*/
				700	int setup_arg_pages(struct linux_binprm *bprm,
				701	unsigned long stack_top,
				702	int executable_stack)
				703	{
				704	unsigned long ret;
				705	unsigned long stack_shift;
				706	struct mm_struct *mm = current->mm;
				707	struct vm_area_struct *vma = bprm->vma;
				708	struct vm_area_struct *prev = NULL;
				709	unsigned long vm_flags;
				710	unsigned long stack_base;
				711	unsigned long stack_size;
				712	unsigned long stack_expand;
				713	unsigned long rlim_stack;
				714
				715	#ifdef CONFIG_STACK_GROWSUP
				716	/* Limit stack size */
				717	stack_base = bprm->rlim_stack.rlim_max;
				718	if (stack_base > STACK_SIZE_MAX)
				719	stack_base = STACK_SIZE_MAX;
				720
				721	/* Add space for stack randomization. */
				722	if (current->flags & PF_RANDOMIZE)
				723	stack_base += (STACK_RND_MASK << PAGE_SHIFT);
				724
				725	/* Make sure we didn't let the argument array grow too large. */
				726	if (vma->vm_end - vma->vm_start > stack_base)
				727	return -ENOMEM;
				728
				729	stack_base = PAGE_ALIGN(stack_top - stack_base);
				730
				731	stack_shift = vma->vm_start - stack_base;
				732	mm->arg_start = bprm->p - stack_shift;
				733	bprm->p = vma->vm_end - stack_shift;
				734	#else
				735	stack_top = arch_align_stack(stack_top);
				736	stack_top = PAGE_ALIGN(stack_top);
				737
				738	if (unlikely(stack_top < mmap_min_addr) \|\|
				739	unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
				740	return -ENOMEM;
				741
				742	stack_shift = vma->vm_end - stack_top;
				743
				744	bprm->p -= stack_shift;
				745	mm->arg_start = bprm->p;
				746	#endif
				747
				748	if (bprm->loader)
				749	bprm->loader -= stack_shift;
				750	bprm->exec -= stack_shift;
				751
				752	if (down_write_killable(&mm->mmap_sem))
				753	return -EINTR;
				754
				755	vm_flags = VM_STACK_FLAGS;
				756
				757	/*
				758	* Adjust stack execute permissions; explicitly enable for
				759	* EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
				760	* (arch default) otherwise.
				761	*/
				762	if (unlikely(executable_stack == EXSTACK_ENABLE_X))
				763	vm_flags \|= VM_EXEC;
				764	else if (executable_stack == EXSTACK_DISABLE_X)
				765	vm_flags &= ~VM_EXEC;
				766	vm_flags \|= mm->def_flags;
				767	vm_flags \|= VM_STACK_INCOMPLETE_SETUP;
				768
				769	ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
				770	vm_flags);
				771	if (ret)
				772	goto out_unlock;
				773	BUG_ON(prev != vma);
				774
				775	/* Move stack pages down in memory. */
				776	if (stack_shift) {
				777	ret = shift_arg_pages(vma, stack_shift);
				778	if (ret)
				779	goto out_unlock;
				780	}
				781
				782	/* mprotect_fixup is overkill to remove the temporary stack flags */
				783	vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
				784
				785	stack_expand = 131072UL; /* randomly 324k (or 264k) pages */
				786	stack_size = vma->vm_end - vma->vm_start;
				787	/*
				788	* Align this down to a page boundary as expand_stack
				789	* will align it up.
				790	*/
				791	rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
				792	#ifdef CONFIG_STACK_GROWSUP
				793	if (stack_size + stack_expand > rlim_stack)
				794	stack_base = vma->vm_start + rlim_stack;
				795	else
				796	stack_base = vma->vm_end + stack_expand;
				797	#else
				798	if (stack_size + stack_expand > rlim_stack)
				799	stack_base = vma->vm_end - rlim_stack;
				800	else
				801	stack_base = vma->vm_start - stack_expand;
				802	#endif
				803	current->mm->start_stack = bprm->p;
				804	ret = expand_stack(vma, stack_base);
				805	if (ret)
				806	ret = -EFAULT;
				807
				808	out_unlock:
				809	up_write(&mm->mmap_sem);
				810	return ret;
				811	}
				812	EXPORT_SYMBOL(setup_arg_pages);
				813
				814	#else
				815
				816	/*
				817	* Transfer the program arguments and environment from the holding pages
				818	* onto the stack. The provided stack pointer is adjusted accordingly.
				819	*/
				820	int transfer_args_to_stack(struct linux_binprm *bprm,
				821	unsigned long *sp_location)
				822	{
				823	unsigned long index, stop, sp;
				824	int ret = 0;
				825
				826	stop = bprm->p >> PAGE_SHIFT;
				827	sp = *sp_location;
				828
				829	for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
				830	unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
				831	char *src = kmap(bprm->page[index]) + offset;
				832	sp -= PAGE_SIZE - offset;
				833	if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
				834	ret = -EFAULT;
				835	kunmap(bprm->page[index]);
				836	if (ret)
				837	goto out;
				838	}
				839
				840	bprm->exec += sp_location - MAX_ARG_PAGES PAGE_SIZE;
				841	*sp_location = sp;
				842
				843	out:
				844	return ret;
				845	}
				846	EXPORT_SYMBOL(transfer_args_to_stack);
				847
				848	#endif /* CONFIG_MMU */
				849
				850	static struct file do_open_execat(int fd, struct filename name, int flags)
				851	{
				852	struct file *file;
				853	int err;
				854	struct open_flags open_exec_flags = {
				855	.open_flag = O_LARGEFILE \| O_RDONLY \| __FMODE_EXEC,
				856	.acc_mode = MAY_EXEC,
				857	.intent = LOOKUP_OPEN,
				858	.lookup_flags = LOOKUP_FOLLOW,
				859	};
				860
				861	if ((flags & ~(AT_SYMLINK_NOFOLLOW \| AT_EMPTY_PATH)) != 0)
				862	return ERR_PTR(-EINVAL);
				863	if (flags & AT_SYMLINK_NOFOLLOW)
				864	open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
				865	if (flags & AT_EMPTY_PATH)
				866	open_exec_flags.lookup_flags \|= LOOKUP_EMPTY;
				867
				868	file = do_filp_open(fd, name, &open_exec_flags);
				869	if (IS_ERR(file))
				870	goto out;
				871
				872	err = -EACCES;
				873	if (!S_ISREG(file_inode(file)->i_mode))
				874	goto exit;
				875
				876	if (path_noexec(&file->f_path))
				877	goto exit;
				878
				879	err = deny_write_access(file);
				880	if (err)
				881	goto exit;
				882
				883	if (name->name[0] != '\0')
				884	fsnotify_open(file);
				885
				886	out:
				887	return file;
				888
				889	exit:
				890	fput(file);
				891	return ERR_PTR(err);
				892	}
				893
				894	struct file open_exec(const char name)
				895	{
				896	struct filename *filename = getname_kernel(name);
				897	struct file *f = ERR_CAST(filename);
				898
				899	if (!IS_ERR(filename)) {
				900	f = do_open_execat(AT_FDCWD, filename, 0);
				901	putname(filename);
				902	}
				903	return f;
				904	}
				905	EXPORT_SYMBOL(open_exec);
				906
				907	int kernel_read_file(struct file file, void buf, loff_t size,
				908	loff_t max_size, enum kernel_read_file_id id)
				909	{
				910	loff_t i_size, pos;
				911	ssize_t bytes = 0;
				912	int ret;
				913
				914	if (!S_ISREG(file_inode(file)->i_mode) \|\| max_size < 0)
				915	return -EINVAL;
				916
				917	ret = deny_write_access(file);
				918	if (ret)
				919	return ret;
				920
				921	ret = security_kernel_read_file(file, id);
				922	if (ret)
				923	goto out;
				924
				925	i_size = i_size_read(file_inode(file));
				926	if (i_size <= 0) {
				927	ret = -EINVAL;
				928	goto out;
				929	}
				930	if (i_size > SIZE_MAX \|\| (max_size > 0 && i_size > max_size)) {
				931	ret = -EFBIG;
				932	goto out;
				933	}
				934
				935	if (id != READING_FIRMWARE_PREALLOC_BUFFER)
				936	*buf = vmalloc(i_size);
				937	if (!*buf) {
				938	ret = -ENOMEM;
				939	goto out;
				940	}
				941
				942	pos = 0;
				943	while (pos < i_size) {
				944	bytes = kernel_read(file, *buf + pos, i_size - pos, &pos);
				945	if (bytes < 0) {
				946	ret = bytes;
				947	goto out_free;
				948	}
				949
				950	if (bytes == 0)
				951	break;
				952	}
				953
				954	if (pos != i_size) {
				955	ret = -EIO;
				956	goto out_free;
				957	}
				958
				959	ret = security_kernel_post_read_file(file, *buf, i_size, id);
				960	if (!ret)
				961	*size = pos;
				962
				963	out_free:
				964	if (ret < 0) {
				965	if (id != READING_FIRMWARE_PREALLOC_BUFFER) {
				966	vfree(*buf);
				967	*buf = NULL;
				968	}
				969	}
				970
				971	out:
				972	allow_write_access(file);
				973	return ret;
				974	}
				975	EXPORT_SYMBOL_GPL(kernel_read_file);
				976
				977	int kernel_read_file_from_path(const char path, void buf, loff_t size,
				978	loff_t max_size, enum kernel_read_file_id id)
				979	{
				980	struct file *file;
				981	int ret;
				982
				983	if (!path \|\| !*path)
				984	return -EINVAL;
				985
				986	file = filp_open(path, O_RDONLY, 0);
				987	if (IS_ERR(file))
				988	return PTR_ERR(file);
				989
				990	ret = kernel_read_file(file, buf, size, max_size, id);
				991	fput(file);
				992	return ret;
				993	}
				994	EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
				995
				996	int kernel_read_file_from_fd(int fd, void *buf, loff_t size, loff_t max_size,
				997	enum kernel_read_file_id id)
				998	{
				999	struct fd f = fdget(fd);
				1000	int ret = -EBADF;
				1001
				1002	if (!f.file \|\| !(f.file->f_mode & FMODE_READ))
				1003	goto out;
				1004
				1005	ret = kernel_read_file(f.file, buf, size, max_size, id);
				1006	out:
				1007	fdput(f);
				1008	return ret;
				1009	}
				1010	EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
				1011
				1012	ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
				1013	{
				1014	ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
				1015	if (res > 0)
				1016	flush_icache_range(addr, addr + len);
				1017	return res;
				1018	}
				1019	EXPORT_SYMBOL(read_code);
				1020
				1021	/*
				1022	* Maps the mm_struct mm into the current task struct.
				1023	* On success, this function returns with exec_update_lock
				1024	* held for writing.
				1025	*/
				1026	static int exec_mmap(struct mm_struct *mm)
				1027	{
				1028	struct task_struct *tsk;
				1029	struct mm_struct old_mm, active_mm;
				1030	int ret;
				1031
				1032	/* Notify parent that we're no longer interested in the old VM */
				1033	tsk = current;
				1034	old_mm = current->mm;
				1035	exec_mm_release(tsk, old_mm);
				1036
				1037	ret = down_write_killable(&tsk->signal->exec_update_lock);
				1038	if (ret)
				1039	return ret;
				1040
				1041	if (old_mm) {
				1042	sync_mm_rss(old_mm);
				1043	/*
				1044	* Make sure that if there is a core dump in progress
				1045	* for the old mm, we get out and die instead of going
				1046	* through with the exec. We must hold mmap_sem around
				1047	* checking core_state and changing tsk->mm.
				1048	*/
				1049	down_read(&old_mm->mmap_sem);
				1050	if (unlikely(old_mm->core_state)) {
				1051	up_read(&old_mm->mmap_sem);
				1052	up_write(&tsk->signal->exec_update_lock);
				1053	return -EINTR;
				1054	}
				1055	}
				1056
				1057	task_lock(tsk);
				1058	membarrier_exec_mmap(mm);
				1059
				1060	local_irq_disable();
				1061	active_mm = tsk->active_mm;
				1062	tsk->active_mm = mm;
				1063	tsk->mm = mm;
				1064	/*
				1065	* This prevents preemption while active_mm is being loaded and
				1066	* it and mm are being updated, which could cause problems for
				1067	* lazy tlb mm refcounting when these are updated by context
				1068	* switches. Not all architectures can handle irqs off over
				1069	* activate_mm yet.
				1070	*/
				1071	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
				1072	local_irq_enable();
				1073	activate_mm(active_mm, mm);
				1074	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
				1075	local_irq_enable();
				1076	tsk->mm->vmacache_seqnum = 0;
				1077	vmacache_flush(tsk);
				1078	task_unlock(tsk);
				1079	if (old_mm) {
				1080	up_read(&old_mm->mmap_sem);
				1081	BUG_ON(active_mm != old_mm);
				1082	setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
				1083	mm_update_next_owner(old_mm);
				1084	mmput(old_mm);
				1085	return 0;
				1086	}
				1087	mmdrop(active_mm);
				1088	return 0;
				1089	}
				1090
				1091	/*
				1092	* This function makes sure the current process has its own signal table,
				1093	* so that flush_signal_handlers can later reset the handlers without
				1094	* disturbing other processes. (Other processes might share the signal
				1095	* table via the CLONE_SIGHAND option to clone().)
				1096	*/
				1097	static int de_thread(struct task_struct *tsk)
				1098	{
				1099	struct signal_struct *sig = tsk->signal;
				1100	struct sighand_struct *oldsighand = tsk->sighand;
				1101	spinlock_t *lock = &oldsighand->siglock;
				1102
				1103	if (thread_group_empty(tsk))
				1104	goto no_thread_group;
				1105
				1106	/*
				1107	* Kill all other threads in the thread group.
				1108	*/
				1109	spin_lock_irq(lock);
				1110	if (signal_group_exit(sig)) {
				1111	/*
				1112	* Another group action in progress, just
				1113	* return so that the signal is processed.
				1114	*/
				1115	spin_unlock_irq(lock);
				1116	return -EAGAIN;
				1117	}
				1118
				1119	sig->group_exit_task = tsk;
				1120	sig->notify_count = zap_other_threads(tsk);
				1121	if (!thread_group_leader(tsk))
				1122	sig->notify_count--;
				1123
				1124	while (sig->notify_count) {
				1125	__set_current_state(TASK_KILLABLE);
				1126	spin_unlock_irq(lock);
				1127	schedule();
				1128	if (__fatal_signal_pending(tsk))
				1129	goto killed;
				1130	spin_lock_irq(lock);
				1131	}
				1132	spin_unlock_irq(lock);
				1133
				1134	/*
				1135	* At this point all other threads have exited, all we have to
				1136	* do is to wait for the thread group leader to become inactive,
				1137	* and to assume its PID:
				1138	*/
				1139	if (!thread_group_leader(tsk)) {
				1140	struct task_struct *leader = tsk->group_leader;
				1141
				1142	for (;;) {
				1143	cgroup_threadgroup_change_begin(tsk);
				1144	write_lock_irq(&tasklist_lock);
				1145	/*
				1146	* Do this under tasklist_lock to ensure that
				1147	* exit_notify() can't miss ->group_exit_task
				1148	*/
				1149	sig->notify_count = -1;
				1150	if (likely(leader->exit_state))
				1151	break;
				1152	__set_current_state(TASK_KILLABLE);
				1153	write_unlock_irq(&tasklist_lock);
				1154	cgroup_threadgroup_change_end(tsk);
				1155	schedule();
				1156	if (__fatal_signal_pending(tsk))
				1157	goto killed;
				1158	}
				1159
				1160	/*
				1161	* The only record we have of the real-time age of a
				1162	* process, regardless of execs it's done, is start_time.
				1163	* All the past CPU time is accumulated in signal_struct
				1164	* from sister threads now dead. But in this non-leader
				1165	* exec, nothing survives from the original leader thread,
				1166	* whose birth marks the true age of this process now.
				1167	* When we take on its identity by switching to its PID, we
				1168	* also take its birthdate (always earlier than our own).
				1169	*/
				1170	tsk->start_time = leader->start_time;
				1171	tsk->real_start_time = leader->real_start_time;
				1172
				1173	BUG_ON(!same_thread_group(leader, tsk));
				1174	BUG_ON(has_group_leader_pid(tsk));
				1175	/*
				1176	* An exec() starts a new thread group with the
				1177	* TGID of the previous thread group. Rehash the
				1178	* two threads with a switched PID, and release
				1179	* the former thread group leader:
				1180	*/
				1181
				1182	/* Become a process group leader with the old leader's pid.
				1183	* The old leader becomes a thread of the this thread group.
				1184	* Note: The old leader also uses this pid until release_task
				1185	* is called. Odd but simple and correct.
				1186	*/
				1187	tsk->pid = leader->pid;
				1188	change_pid(tsk, PIDTYPE_PID, task_pid(leader));
				1189	transfer_pid(leader, tsk, PIDTYPE_TGID);
				1190	transfer_pid(leader, tsk, PIDTYPE_PGID);
				1191	transfer_pid(leader, tsk, PIDTYPE_SID);
				1192
				1193	list_replace_rcu(&leader->tasks, &tsk->tasks);
				1194	list_replace_init(&leader->sibling, &tsk->sibling);
				1195
				1196	tsk->group_leader = tsk;
				1197	leader->group_leader = tsk;
				1198
				1199	tsk->exit_signal = SIGCHLD;
				1200	leader->exit_signal = -1;
				1201
				1202	BUG_ON(leader->exit_state != EXIT_ZOMBIE);
				1203	leader->exit_state = EXIT_DEAD;
				1204
				1205	/*
				1206	* We are going to release_task()->ptrace_unlink() silently,
				1207	* the tracer can sleep in do_wait(). EXIT_DEAD guarantees
				1208	* the tracer wont't block again waiting for this thread.
				1209	*/
				1210	if (unlikely(leader->ptrace))
				1211	__wake_up_parent(leader, leader->parent);
				1212	write_unlock_irq(&tasklist_lock);
				1213	cgroup_threadgroup_change_end(tsk);
				1214
				1215	release_task(leader);
				1216	}
				1217
				1218	sig->group_exit_task = NULL;
				1219	sig->notify_count = 0;
				1220
				1221	no_thread_group:
				1222	/* we have changed execution domain */
				1223	tsk->exit_signal = SIGCHLD;
				1224
				1225	#ifdef CONFIG_POSIX_TIMERS
				1226	exit_itimers(sig);
				1227	flush_itimer_signals();
				1228	#endif
				1229
				1230	if (refcount_read(&oldsighand->count) != 1) {
				1231	struct sighand_struct *newsighand;
				1232	/*
				1233	* This ->sighand is shared with the CLONE_SIGHAND
				1234	* but not CLONE_THREAD task, switch to the new one.
				1235	*/
				1236	newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
				1237	if (!newsighand)
				1238	return -ENOMEM;
				1239
				1240	refcount_set(&newsighand->count, 1);
				1241	memcpy(newsighand->action, oldsighand->action,
				1242	sizeof(newsighand->action));
				1243
				1244	write_lock_irq(&tasklist_lock);
				1245	spin_lock(&oldsighand->siglock);
				1246	rcu_assign_pointer(tsk->sighand, newsighand);
				1247	spin_unlock(&oldsighand->siglock);
				1248	write_unlock_irq(&tasklist_lock);
				1249
				1250	__cleanup_sighand(oldsighand);
				1251	}
				1252
				1253	BUG_ON(!thread_group_leader(tsk));
				1254	return 0;
				1255
				1256	killed:
				1257	/* protects against exit_notify() and __exit_signal() */
				1258	read_lock(&tasklist_lock);
				1259	sig->group_exit_task = NULL;
				1260	sig->notify_count = 0;
				1261	read_unlock(&tasklist_lock);
				1262	return -EAGAIN;
				1263	}
				1264
				1265	char __get_task_comm(char buf, size_t buf_size, struct task_struct *tsk)
				1266	{
				1267	task_lock(tsk);
				1268	strncpy(buf, tsk->comm, buf_size);
				1269	task_unlock(tsk);
				1270	return buf;
				1271	}
				1272	EXPORT_SYMBOL_GPL(__get_task_comm);
				1273
				1274	/*
				1275	* These functions flushes out all traces of the currently running executable
				1276	* so that a new one can be started
				1277	*/
				1278
				1279	void __set_task_comm(struct task_struct tsk, const char buf, bool exec)
				1280	{
				1281	task_lock(tsk);
				1282	trace_task_rename(tsk, buf);
				1283	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
				1284	task_unlock(tsk);
				1285	perf_event_comm(tsk, exec);
				1286	}
				1287
				1288	/*
				1289	* Calling this is the point of no return. None of the failures will be
				1290	* seen by userspace since either the process is already taking a fatal
				1291	* signal (via de_thread() or coredump), or will have SEGV raised
				1292	* (after exec_mmap()) by search_binary_handlers (see below).
				1293	*/
				1294	int flush_old_exec(struct linux_binprm * bprm)
				1295	{
				1296	int retval;
				1297
				1298	/*
				1299	* Make sure we have a private signal table and that
				1300	* we are unassociated from the previous thread group.
				1301	*/
				1302	retval = de_thread(current);
				1303	if (retval)
				1304	goto out;
				1305
				1306	/*
				1307	* Must be called _before_ exec_mmap() as bprm->mm is
				1308	* not visibile until then. This also enables the update
				1309	* to be lockless.
				1310	*/
				1311	set_mm_exe_file(bprm->mm, bprm->file);
				1312
				1313	would_dump(bprm, bprm->file);
				1314
				1315	/*
				1316	* Release all of the old mmap stuff
				1317	*/
				1318	acct_arg_size(bprm, 0);
				1319	retval = exec_mmap(bprm->mm);
				1320	if (retval)
				1321	goto out;
				1322
				1323	/*
				1324	* After setting bprm->called_exec_mmap (to mark that current is
				1325	* using the prepared mm now), we have nothing left of the original
				1326	* process. If anything from here on returns an error, the check
				1327	* in search_binary_handler() will SEGV current.
				1328	*/
				1329	bprm->called_exec_mmap = 1;
				1330	bprm->mm = NULL;
				1331
				1332	set_fs(USER_DS);
				1333	current->flags &= ~(PF_RANDOMIZE \| PF_FORKNOEXEC \| PF_KTHREAD \|
				1334	PF_NOFREEZE \| PF_NO_SETAFFINITY);
				1335	flush_thread();
				1336	current->personality &= ~bprm->per_clear;
				1337
				1338	/*
				1339	* We have to apply CLOEXEC before we change whether the process is
				1340	* dumpable (in setup_new_exec) to avoid a race with a process in userspace
				1341	* trying to access the should-be-closed file descriptors of a process
				1342	* undergoing exec(2).
				1343	*/
				1344	do_close_on_exec(current->files);
				1345	return 0;
				1346
				1347	out:
				1348	return retval;
				1349	}
				1350	EXPORT_SYMBOL(flush_old_exec);
				1351
				1352	void would_dump(struct linux_binprm bprm, struct file file)
				1353	{
				1354	struct inode *inode = file_inode(file);
				1355	if (inode_permission(inode, MAY_READ) < 0) {
				1356	struct user_namespace old, user_ns;
				1357	bprm->interp_flags \|= BINPRM_FLAGS_ENFORCE_NONDUMP;
				1358
				1359	/* Ensure mm->user_ns contains the executable */
				1360	user_ns = old = bprm->mm->user_ns;
				1361	while ((user_ns != &init_user_ns) &&
				1362	!privileged_wrt_inode_uidgid(user_ns, inode))
				1363	user_ns = user_ns->parent;
				1364
				1365	if (old != user_ns) {
				1366	bprm->mm->user_ns = get_user_ns(user_ns);
				1367	put_user_ns(old);
				1368	}
				1369	}
				1370	}
				1371	EXPORT_SYMBOL(would_dump);
				1372
				1373	void setup_new_exec(struct linux_binprm * bprm)
				1374	{
				1375	/*
				1376	* Once here, prepare_binrpm() will not be called any more, so
				1377	* the final state of setuid/setgid/fscaps can be merged into the
				1378	* secureexec flag.
				1379	*/
				1380	bprm->secureexec \|= bprm->cap_elevated;
				1381
				1382	if (bprm->secureexec) {
				1383	/* Make sure parent cannot signal privileged process. */
				1384	current->pdeath_signal = 0;
				1385
				1386	/*
				1387	* For secureexec, reset the stack limit to sane default to
				1388	* avoid bad behavior from the prior rlimits. This has to
				1389	* happen before arch_pick_mmap_layout(), which examines
				1390	* RLIMIT_STACK, but after the point of no return to avoid
				1391	* needing to clean up the change on failure.
				1392	*/
				1393	if (bprm->rlim_stack.rlim_cur > _STK_LIM)
				1394	bprm->rlim_stack.rlim_cur = _STK_LIM;
				1395	}
				1396
				1397	arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
				1398
				1399	current->sas_ss_sp = current->sas_ss_size = 0;
				1400
				1401	/*
				1402	* Figure out dumpability. Note that this checking only of current
				1403	* is wrong, but userspace depends on it. This should be testing
				1404	* bprm->secureexec instead.
				1405	*/
				1406	if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP \|\|
				1407	!(uid_eq(current_euid(), current_uid()) &&
				1408	gid_eq(current_egid(), current_gid())))
				1409	set_dumpable(current->mm, suid_dumpable);
				1410	else
				1411	set_dumpable(current->mm, SUID_DUMP_USER);
				1412
				1413	arch_setup_new_exec();
				1414	perf_event_exec();
				1415	__set_task_comm(current, kbasename(bprm->filename), true);
				1416
				1417	/* Set the new mm task size. We have to do that late because it may
				1418	* depend on TIF_32BIT which is only updated in flush_thread() on
				1419	* some architectures like powerpc
				1420	*/
				1421	current->mm->task_size = TASK_SIZE;
				1422
				1423	/* An exec changes our domain. We are no longer part of the thread
				1424	group */
				1425	WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1);
				1426	flush_signal_handlers(current, 0);
				1427	}
				1428	EXPORT_SYMBOL(setup_new_exec);
				1429
				1430	/* Runs immediately before start_thread() takes over. */
				1431	void finalize_exec(struct linux_binprm *bprm)
				1432	{
				1433	/* Store any stack rlimit changes before starting thread. */
				1434	task_lock(current->group_leader);
				1435	current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
				1436	task_unlock(current->group_leader);
				1437	}
				1438	EXPORT_SYMBOL(finalize_exec);
				1439
				1440	/*
				1441	* Prepare credentials and lock ->cred_guard_mutex.
				1442	* install_exec_creds() commits the new creds and drops the lock.
				1443	* Or, if exec fails before, free_bprm() should release ->cred and
				1444	* and unlock.
				1445	*/
				1446	static int prepare_bprm_creds(struct linux_binprm *bprm)
				1447	{
				1448	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
				1449	return -ERESTARTNOINTR;
				1450
				1451	bprm->cred = prepare_exec_creds();
				1452	if (likely(bprm->cred))
				1453	return 0;
				1454
				1455	mutex_unlock(&current->signal->cred_guard_mutex);
				1456	return -ENOMEM;
				1457	}
				1458
				1459	static void free_bprm(struct linux_binprm *bprm)
				1460	{
				1461	free_arg_pages(bprm);
				1462	if (bprm->cred) {
				1463	if (bprm->called_exec_mmap)
				1464	up_write(&current->signal->exec_update_lock);
				1465	mutex_unlock(&current->signal->cred_guard_mutex);
				1466	abort_creds(bprm->cred);
				1467	}
				1468	if (bprm->file) {
				1469	allow_write_access(bprm->file);
				1470	fput(bprm->file);
				1471	}
				1472	/* If a binfmt changed the interp, free it. */
				1473	if (bprm->interp != bprm->filename)
				1474	kfree(bprm->interp);
				1475	kfree(bprm);
				1476	}
				1477
				1478	int bprm_change_interp(const char interp, struct linux_binprm bprm)
				1479	{
				1480	/* If a binfmt changed the interp, free it first. */
				1481	if (bprm->interp != bprm->filename)
				1482	kfree(bprm->interp);
				1483	bprm->interp = kstrdup(interp, GFP_KERNEL);
				1484	if (!bprm->interp)
				1485	return -ENOMEM;
				1486	return 0;
				1487	}
				1488	EXPORT_SYMBOL(bprm_change_interp);
				1489
				1490	/*
				1491	* install the new credentials for this executable
				1492	*/
				1493	void install_exec_creds(struct linux_binprm *bprm)
				1494	{
				1495	security_bprm_committing_creds(bprm);
				1496
				1497	commit_creds(bprm->cred);
				1498	bprm->cred = NULL;
				1499
				1500	/*
				1501	* Disable monitoring for regular users
				1502	* when executing setuid binaries. Must
				1503	* wait until new credentials are committed
				1504	* by commit_creds() above
				1505	*/
				1506	if (get_dumpable(current->mm) != SUID_DUMP_USER)
				1507	perf_event_exit_task(current);
				1508	/*
				1509	* cred_guard_mutex must be held at least to this point to prevent
				1510	* ptrace_attach() from altering our determination of the task's
				1511	* credentials; any time after this it may be unlocked.
				1512	*/
				1513	security_bprm_committed_creds(bprm);
				1514	up_write(&current->signal->exec_update_lock);
				1515	mutex_unlock(&current->signal->cred_guard_mutex);
				1516	}
				1517	EXPORT_SYMBOL(install_exec_creds);
				1518
				1519	/*
				1520	* determine how safe it is to execute the proposed program
				1521	* - the caller must hold ->cred_guard_mutex to protect against
				1522	* PTRACE_ATTACH or seccomp thread-sync
				1523	*/
				1524	static void check_unsafe_exec(struct linux_binprm *bprm)
				1525	{
				1526	struct task_struct p = current, t;
				1527	unsigned n_fs;
				1528
				1529	if (p->ptrace)
				1530	bprm->unsafe \|= LSM_UNSAFE_PTRACE;
				1531
				1532	/*
				1533	* This isn't strictly necessary, but it makes it harder for LSMs to
				1534	* mess up.
				1535	*/
				1536	if (task_no_new_privs(current))
				1537	bprm->unsafe \|= LSM_UNSAFE_NO_NEW_PRIVS;
				1538
				1539	t = p;
				1540	n_fs = 1;
				1541	spin_lock(&p->fs->lock);
				1542	rcu_read_lock();
				1543	while_each_thread(p, t) {
				1544	if (t->fs == p->fs)
				1545	n_fs++;
				1546	}
				1547	rcu_read_unlock();
				1548
				1549	if (p->fs->users > n_fs)
				1550	bprm->unsafe \|= LSM_UNSAFE_SHARE;
				1551	else
				1552	p->fs->in_exec = 1;
				1553	spin_unlock(&p->fs->lock);
				1554	}
				1555
				1556	static void bprm_fill_uid(struct linux_binprm *bprm)
				1557	{
				1558	struct inode *inode;
				1559	unsigned int mode;
				1560	kuid_t uid;
				1561	kgid_t gid;
				1562	int err;
				1563
				1564	/*
				1565	* Since this can be called multiple times (via prepare_binprm),
				1566	* we must clear any previous work done when setting set[ug]id
				1567	* bits from any earlier bprm->file uses (for example when run
				1568	* first for a setuid script then again for its interpreter).
				1569	*/
				1570	bprm->cred->euid = current_euid();
				1571	bprm->cred->egid = current_egid();
				1572
				1573	if (!mnt_may_suid(bprm->file->f_path.mnt))
				1574	return;
				1575
				1576	if (task_no_new_privs(current))
				1577	return;
				1578
				1579	inode = bprm->file->f_path.dentry->d_inode;
				1580	mode = READ_ONCE(inode->i_mode);
				1581	if (!(mode & (S_ISUID\|S_ISGID)))
				1582	return;
				1583
				1584	/* Be careful if suid/sgid is set */
				1585	inode_lock(inode);
				1586
				1587	/* Atomically reload and check mode/uid/gid now that lock held. */
				1588	mode = inode->i_mode;
				1589	uid = inode->i_uid;
				1590	gid = inode->i_gid;
				1591	err = inode_permission(inode, MAY_EXEC);
				1592	inode_unlock(inode);
				1593
				1594	/* Did the exec bit vanish out from under us? Give up. */
				1595	if (err)
				1596	return;
				1597
				1598	/* We ignore suid/sgid if there are no mappings for them in the ns */
				1599	if (!kuid_has_mapping(bprm->cred->user_ns, uid) \|\|
				1600	!kgid_has_mapping(bprm->cred->user_ns, gid))
				1601	return;
				1602
				1603	if (mode & S_ISUID) {
				1604	bprm->per_clear \|= PER_CLEAR_ON_SETID;
				1605	bprm->cred->euid = uid;
				1606	}
				1607
				1608	if ((mode & (S_ISGID \| S_IXGRP)) == (S_ISGID \| S_IXGRP)) {
				1609	bprm->per_clear \|= PER_CLEAR_ON_SETID;
				1610	bprm->cred->egid = gid;
				1611	}
				1612	}
				1613
				1614	/*
				1615	* Fill the binprm structure from the inode.
				1616	* Check permissions, then read the first BINPRM_BUF_SIZE bytes
				1617	*
				1618	* This may be called multiple times for binary chains (scripts for example).
				1619	*/
				1620	int prepare_binprm(struct linux_binprm *bprm)
				1621	{
				1622	int retval;
				1623	loff_t pos = 0;
				1624
				1625	bprm_fill_uid(bprm);
				1626
				1627	/* fill in binprm security blob */
				1628	retval = security_bprm_set_creds(bprm);
				1629	if (retval)
				1630	return retval;
				1631	bprm->called_set_creds = 1;
				1632
				1633	memset(bprm->buf, 0, BINPRM_BUF_SIZE);
				1634	return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
				1635	}
				1636
				1637	EXPORT_SYMBOL(prepare_binprm);
				1638
				1639	/*
				1640	* Arguments are '\0' separated strings found at the location bprm->p
				1641	* points to; chop off the first by relocating brpm->p to right after
				1642	* the first '\0' encountered.
				1643	*/
				1644	int remove_arg_zero(struct linux_binprm *bprm)
				1645	{
				1646	int ret = 0;
				1647	unsigned long offset;
				1648	char *kaddr;
				1649	struct page *page;
				1650
				1651	if (!bprm->argc)
				1652	return 0;
				1653
				1654	do {
				1655	offset = bprm->p & ~PAGE_MASK;
				1656	page = get_arg_page(bprm, bprm->p, 0);
				1657	if (!page) {
				1658	ret = -EFAULT;
				1659	goto out;
				1660	}
				1661	kaddr = kmap_atomic(page);
				1662
				1663	for (; offset < PAGE_SIZE && kaddr[offset];
				1664	offset++, bprm->p++)
				1665	;
				1666
				1667	kunmap_atomic(kaddr);
				1668	put_arg_page(page);
				1669	} while (offset == PAGE_SIZE);
				1670
				1671	bprm->p++;
				1672	bprm->argc--;
				1673	ret = 0;
				1674
				1675	out:
				1676	return ret;
				1677	}
				1678	EXPORT_SYMBOL(remove_arg_zero);
				1679
				1680	#define printable(c) (((c)=='\t') \|\| ((c)=='\n') \|\| (0x20<=(c) && (c)<=0x7e))
				1681	/*
				1682	* cycle the list of binary formats handler, until one recognizes the image
				1683	*/
				1684	int search_binary_handler(struct linux_binprm *bprm)
				1685	{
				1686	bool need_retry = IS_ENABLED(CONFIG_MODULES);
				1687	struct linux_binfmt *fmt;
				1688	int retval;
				1689
				1690	/* This allows 4 levels of binfmt rewrites before failing hard. */
				1691	if (bprm->recursion_depth > 5)
				1692	return -ELOOP;
				1693
				1694	retval = security_bprm_check(bprm);
				1695	if (retval)
				1696	return retval;
				1697
				1698	retval = -ENOENT;
				1699	retry:
				1700	read_lock(&binfmt_lock);
				1701	list_for_each_entry(fmt, &formats, lh) {
				1702	if (!try_module_get(fmt->module))
				1703	continue;
				1704	read_unlock(&binfmt_lock);
				1705
				1706	bprm->recursion_depth++;
				1707	retval = fmt->load_binary(bprm);
				1708	bprm->recursion_depth--;
				1709
				1710	read_lock(&binfmt_lock);
				1711	put_binfmt(fmt);
				1712	if (retval < 0 && bprm->called_exec_mmap) {
				1713	/* we got to flush_old_exec() and failed after it */
				1714	read_unlock(&binfmt_lock);
				1715	force_sigsegv(SIGSEGV);
				1716	return retval;
				1717	}
				1718	if (retval != -ENOEXEC \|\| !bprm->file) {
				1719	read_unlock(&binfmt_lock);
				1720	return retval;
				1721	}
				1722	}
				1723	read_unlock(&binfmt_lock);
				1724
				1725	if (need_retry) {
				1726	if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
				1727	printable(bprm->buf[2]) && printable(bprm->buf[3]))
				1728	return retval;
				1729	if (request_module("binfmt-%04x", (ushort )(bprm->buf + 2)) < 0)
				1730	return retval;
				1731	need_retry = false;
				1732	goto retry;
				1733	}
				1734
				1735	return retval;
				1736	}
				1737	EXPORT_SYMBOL(search_binary_handler);
				1738
				1739	static int exec_binprm(struct linux_binprm *bprm)
				1740	{
				1741	pid_t old_pid, old_vpid;
				1742	int ret;
				1743
				1744	/* Need to fetch pid before load_binary changes it */
				1745	old_pid = current->pid;
				1746	rcu_read_lock();
				1747	old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
				1748	rcu_read_unlock();
				1749
				1750	ret = search_binary_handler(bprm);
				1751	if (ret >= 0) {
				1752	audit_bprm(bprm);
				1753	trace_sched_process_exec(current, old_pid, bprm);
				1754	ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
				1755	proc_exec_connector(current);
				1756	}
				1757
				1758	return ret;
				1759	}
				1760
				1761	/*
				1762	* sys_execve() executes a new program.
				1763	*/
				1764	static int __do_execve_file(int fd, struct filename *filename,
				1765	struct user_arg_ptr argv,
				1766	struct user_arg_ptr envp,
				1767	int flags, struct file *file)
				1768	{
				1769	char *pathbuf = NULL;
				1770	struct linux_binprm *bprm;
				1771	struct files_struct *displaced;
				1772	int retval;
				1773
				1774	if (IS_ERR(filename))
				1775	return PTR_ERR(filename);
				1776
				1777	/*
				1778	* We move the actual failure in case of RLIMIT_NPROC excess from
				1779	* set*uid() to execve() because too many poorly written programs
				1780	* don't check setuid() return code. Here we additionally recheck
				1781	* whether NPROC limit is still exceeded.
				1782	*/
				1783	if ((current->flags & PF_NPROC_EXCEEDED) &&
				1784	atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
				1785	retval = -EAGAIN;
				1786	goto out_ret;
				1787	}
				1788
				1789	/* We're below the limit (still or again), so we don't want to make
				1790	* further execve() calls fail. */
				1791	current->flags &= ~PF_NPROC_EXCEEDED;
				1792
				1793	retval = unshare_files(&displaced);
				1794	if (retval)
				1795	goto out_ret;
				1796
				1797	retval = -ENOMEM;
				1798	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
				1799	if (!bprm)
				1800	goto out_files;
				1801
				1802	retval = prepare_bprm_creds(bprm);
				1803	if (retval)
				1804	goto out_free;
				1805
				1806	check_unsafe_exec(bprm);
				1807	current->in_execve = 1;
				1808
				1809	if (!file)
				1810	file = do_open_execat(fd, filename, flags);
				1811	retval = PTR_ERR(file);
				1812	if (IS_ERR(file))
				1813	goto out_unmark;
				1814
				1815	sched_exec();
				1816
				1817	bprm->file = file;
				1818	if (!filename) {
				1819	bprm->filename = "none";
				1820	} else if (fd == AT_FDCWD \|\| filename->name[0] == '/') {
				1821	bprm->filename = filename->name;
				1822	} else {
				1823	if (filename->name[0] == '\0')
				1824	pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
				1825	else
				1826	pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
				1827	fd, filename->name);
				1828	if (!pathbuf) {
				1829	retval = -ENOMEM;
				1830	goto out_unmark;
				1831	}
				1832	/*
				1833	* Record that a name derived from an O_CLOEXEC fd will be
				1834	* inaccessible after exec. Relies on having exclusive access to
				1835	* current->files (due to unshare_files above).
				1836	*/
				1837	if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
				1838	bprm->interp_flags \|= BINPRM_FLAGS_PATH_INACCESSIBLE;
				1839	bprm->filename = pathbuf;
				1840	}
				1841	bprm->interp = bprm->filename;
				1842
				1843	retval = bprm_mm_init(bprm);
				1844	if (retval)
				1845	goto out_unmark;
				1846
				1847	retval = prepare_arg_pages(bprm, argv, envp);
				1848	if (retval < 0)
				1849	goto out;
				1850
				1851	retval = prepare_binprm(bprm);
				1852	if (retval < 0)
				1853	goto out;
				1854
				1855	retval = copy_strings_kernel(1, &bprm->filename, bprm);
				1856	if (retval < 0)
				1857	goto out;
				1858
				1859	bprm->exec = bprm->p;
				1860	retval = copy_strings(bprm->envc, envp, bprm);
				1861	if (retval < 0)
				1862	goto out;
				1863
				1864	retval = copy_strings(bprm->argc, argv, bprm);
				1865	if (retval < 0)
				1866	goto out;
				1867
				1868	/*
				1869	* When argv is empty, add an empty string ("") as argv[0] to
				1870	* ensure confused userspace programs that start processing
				1871	* from argv[1] won't end up walking envp. See also
				1872	* bprm_stack_limits().
				1873	*/
				1874	if (bprm->argc == 0) {
				1875	const char *argv[] = { "", NULL };
				1876	retval = copy_strings_kernel(1, argv, bprm);
				1877	if (retval < 0)
				1878	goto out;
				1879	bprm->argc = 1;
				1880	}
				1881
				1882	retval = exec_binprm(bprm);
				1883	if (retval < 0)
				1884	goto out;
				1885
				1886	/* execve succeeded */
				1887	current->fs->in_exec = 0;
				1888	current->in_execve = 0;
				1889	rseq_execve(current);
				1890	acct_update_integrals(current);
				1891	task_numa_free(current, false);
				1892	free_bprm(bprm);
				1893	kfree(pathbuf);
				1894	if (filename)
				1895	putname(filename);
				1896	if (displaced)
				1897	put_files_struct(displaced);
				1898	return retval;
				1899
				1900	out:
				1901	if (bprm->mm) {
				1902	acct_arg_size(bprm, 0);
				1903	mmput(bprm->mm);
				1904	}
				1905
				1906	out_unmark:
				1907	current->fs->in_exec = 0;
				1908	current->in_execve = 0;
				1909
				1910	out_free:
				1911	free_bprm(bprm);
				1912	kfree(pathbuf);
				1913
				1914	out_files:
				1915	if (displaced)
				1916	reset_files_struct(displaced);
				1917	out_ret:
				1918	if (filename)
				1919	putname(filename);
				1920	return retval;
				1921	}
				1922
				1923	static int do_execveat_common(int fd, struct filename *filename,
				1924	struct user_arg_ptr argv,
				1925	struct user_arg_ptr envp,
				1926	int flags)
				1927	{
				1928	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
				1929	}
				1930
				1931	int do_execve_file(struct file file, void __argv, void *__envp)
				1932	{
				1933	struct user_arg_ptr argv = { .ptr.native = __argv };
				1934	struct user_arg_ptr envp = { .ptr.native = __envp };
				1935
				1936	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
				1937	}
				1938
				1939	int do_execve(struct filename *filename,
				1940	const char __user const __user __argv,
				1941	const char __user const __user __envp)
				1942	{
				1943	struct user_arg_ptr argv = { .ptr.native = __argv };
				1944	struct user_arg_ptr envp = { .ptr.native = __envp };
				1945	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
				1946	}
				1947
				1948	int do_execveat(int fd, struct filename *filename,
				1949	const char __user const __user __argv,
				1950	const char __user const __user __envp,
				1951	int flags)
				1952	{
				1953	struct user_arg_ptr argv = { .ptr.native = __argv };
				1954	struct user_arg_ptr envp = { .ptr.native = __envp };
				1955
				1956	return do_execveat_common(fd, filename, argv, envp, flags);
				1957	}
				1958
				1959	#ifdef CONFIG_COMPAT
				1960	static int compat_do_execve(struct filename *filename,
				1961	const compat_uptr_t __user *__argv,
				1962	const compat_uptr_t __user *__envp)
				1963	{
				1964	struct user_arg_ptr argv = {
				1965	.is_compat = true,
				1966	.ptr.compat = __argv,
				1967	};
				1968	struct user_arg_ptr envp = {
				1969	.is_compat = true,
				1970	.ptr.compat = __envp,
				1971	};
				1972	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
				1973	}
				1974
				1975	static int compat_do_execveat(int fd, struct filename *filename,
				1976	const compat_uptr_t __user *__argv,
				1977	const compat_uptr_t __user *__envp,
				1978	int flags)
				1979	{
				1980	struct user_arg_ptr argv = {
				1981	.is_compat = true,
				1982	.ptr.compat = __argv,
				1983	};
				1984	struct user_arg_ptr envp = {
				1985	.is_compat = true,
				1986	.ptr.compat = __envp,
				1987	};
				1988	return do_execveat_common(fd, filename, argv, envp, flags);
				1989	}
				1990	#endif
				1991
				1992	void set_binfmt(struct linux_binfmt *new)
				1993	{
				1994	struct mm_struct *mm = current->mm;
				1995
				1996	if (mm->binfmt)
				1997	module_put(mm->binfmt->module);
				1998
				1999	mm->binfmt = new;
				2000	if (new)
				2001	__module_get(new->module);
				2002	}
				2003	EXPORT_SYMBOL(set_binfmt);
				2004
				2005	/*
				2006	* set_dumpable stores three-value SUID_DUMP_* into mm->flags.
				2007	*/
				2008	void set_dumpable(struct mm_struct *mm, int value)
				2009	{
				2010	if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
				2011	return;
				2012
				2013	set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
				2014	}
				2015
				2016	SYSCALL_DEFINE3(execve,
				2017	const char __user *, filename,
				2018	const char __user const __user , argv,
				2019	const char __user const __user , envp)
				2020	{
				2021	return do_execve(getname(filename), argv, envp);
				2022	}
				2023
				2024	SYSCALL_DEFINE5(execveat,
				2025	int, fd, const char __user *, filename,
				2026	const char __user const __user , argv,
				2027	const char __user const __user , envp,
				2028	int, flags)
				2029	{
				2030	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
				2031
				2032	return do_execveat(fd,
				2033	getname_flags(filename, lookup_flags, NULL),
				2034	argv, envp, flags);
				2035	}
				2036
				2037	#ifdef CONFIG_COMPAT
				2038	COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
				2039	const compat_uptr_t __user *, argv,
				2040	const compat_uptr_t __user *, envp)
				2041	{
				2042	return compat_do_execve(getname(filename), argv, envp);
				2043	}
				2044
				2045	COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
				2046	const char __user *, filename,
				2047	const compat_uptr_t __user *, argv,
				2048	const compat_uptr_t __user *, envp,
				2049	int, flags)
				2050	{
				2051	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
				2052
				2053	return compat_do_execveat(fd,
				2054	getname_flags(filename, lookup_flags, NULL),
				2055	argv, envp, flags);
				2056	}
				2057	#endif