Blame - ap/os/linux/linux-3.4.x/fs/open.c - T106_DC

blob: ece5fe58effc9a64af9a6bde1b845e0f36fc6d8c [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* linux/fs/open.c
				3	*
				4	* Copyright (C) 1991, 1992 Linus Torvalds
				5	*/
				6
				7	#include <linux/string.h>
				8	#include <linux/mm.h>
				9	#include <linux/file.h>
				10	#include <linux/fdtable.h>
				11	#include <linux/fsnotify.h>
				12	#include <linux/module.h>
				13	#include <linux/tty.h>
				14	#include <linux/namei.h>
				15	#include <linux/backing-dev.h>
				16	#include <linux/capability.h>
				17	#include <linux/securebits.h>
				18	#include <linux/security.h>
				19	#include <linux/mount.h>
				20	#include <linux/fcntl.h>
				21	#include <linux/slab.h>
				22	#include <asm/uaccess.h>
				23	#include <linux/fs.h>
				24	#include <linux/personality.h>
				25	#include <linux/pagemap.h>
				26	#include <linux/syscalls.h>
				27	#include <linux/rcupdate.h>
				28	#include <linux/audit.h>
				29	#include <linux/falloc.h>
				30	#include <linux/fs_struct.h>
				31	#include <linux/ima.h>
				32	#include <linux/dnotify.h>
				33
				34	#include "internal.h"
				35
xf.li	6c8fc1e	2023-08-12 00:11:09 -0700	[diff] [blame^]	36	#ifdef CONFIG_SYSVIPC_CROSS_SHM
				37	#include <../ipc/shm_ctrl.h>
				38	#endif
				39
lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	40	int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
				41	struct file *filp)
				42	{
				43	int ret;
				44	struct iattr newattrs;
				45
				46	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
				47	if (length < 0)
				48	return -EINVAL;
				49
				50	newattrs.ia_size = length;
				51	newattrs.ia_valid = ATTR_SIZE \| time_attrs;
				52	if (filp) {
				53	newattrs.ia_file = filp;
				54	newattrs.ia_valid \|= ATTR_FILE;
				55	}
				56
				57	/* Remove suid/sgid on truncate too */
				58	ret = should_remove_suid(dentry);
				59	if (ret)
				60	newattrs.ia_valid \|= ret \| ATTR_FORCE;
				61
				62	mutex_lock(&dentry->d_inode->i_mutex);
				63	ret = notify_change(dentry, &newattrs);
				64	mutex_unlock(&dentry->d_inode->i_mutex);
				65	return ret;
				66	}
				67
				68	static long do_sys_truncate(const char __user *pathname, loff_t length)
				69	{
				70	struct path path;
				71	struct inode *inode;
				72	int error;
				73
				74	error = -EINVAL;
				75	if (length < 0) /* sorry, but loff_t says... */
				76	goto out;
				77
				78	error = user_path(pathname, &path);
				79	if (error)
				80	goto out;
				81	inode = path.dentry->d_inode;
				82
				83	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
				84	error = -EISDIR;
				85	if (S_ISDIR(inode->i_mode))
				86	goto dput_and_out;
				87
				88	error = -EINVAL;
				89	if (!S_ISREG(inode->i_mode))
				90	goto dput_and_out;
				91
				92	error = mnt_want_write(path.mnt);
				93	if (error)
				94	goto dput_and_out;
				95
				96	error = inode_permission(inode, MAY_WRITE);
				97	if (error)
				98	goto mnt_drop_write_and_out;
				99
				100	error = -EPERM;
				101	if (IS_APPEND(inode))
				102	goto mnt_drop_write_and_out;
				103
				104	error = get_write_access(inode);
				105	if (error)
				106	goto mnt_drop_write_and_out;
				107
				108	/*
				109	* Make sure that there are no leases. get_write_access() protects
				110	* against the truncate racing with a lease-granting setlease().
				111	*/
				112	error = break_lease(inode, O_WRONLY);
				113	if (error)
				114	goto put_write_and_out;
				115
				116	error = locks_verify_truncate(inode, NULL, length);
				117	if (!error)
				118	error = security_path_truncate(&path);
				119	if (!error)
				120	error = do_truncate(path.dentry, length, 0, NULL);
				121
				122	put_write_and_out:
				123	put_write_access(inode);
				124	mnt_drop_write_and_out:
				125	mnt_drop_write(path.mnt);
				126	dput_and_out:
				127	path_put(&path);
				128	out:
				129	return error;
				130	}
				131
				132	SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
				133	{
				134	return do_sys_truncate(path, length);
				135	}
				136
				137	static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
				138	{
				139	struct inode * inode;
				140	struct dentry *dentry;
				141	struct file * file;
				142	int error;
				143
				144	error = -EINVAL;
				145	if (length < 0)
				146	goto out;
				147	error = -EBADF;
				148	file = fget(fd);
				149	if (!file)
				150	goto out;
				151
				152	/* explicitly opened as large or we are on 64-bit box */
				153	if (file->f_flags & O_LARGEFILE)
				154	small = 0;
				155
				156	dentry = file->f_path.dentry;
				157	inode = dentry->d_inode;
				158	error = -EINVAL;
				159	if (!S_ISREG(inode->i_mode) \|\| !(file->f_mode & FMODE_WRITE))
				160	goto out_putf;
				161
				162	error = -EINVAL;
				163	/* Cannot ftruncate over 2^31 bytes without large file support */
				164	if (small && length > MAX_NON_LFS)
				165	goto out_putf;
				166
				167	error = -EPERM;
				168	if (IS_APPEND(inode))
				169	goto out_putf;
				170
				171	error = locks_verify_truncate(inode, file, length);
				172	if (!error)
				173	error = security_path_truncate(&file->f_path);
				174	if (!error)
				175	error = do_truncate(dentry, length, ATTR_MTIME\|ATTR_CTIME, file);
				176	out_putf:
				177	fput(file);
				178	out:
				179	return error;
				180	}
				181
				182	SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
				183	{
				184	long ret = do_sys_ftruncate(fd, length, 1);
				185	/* avoid REGPARM breakage on x86: */
				186	asmlinkage_protect(2, ret, fd, length);
				187	return ret;
				188	}
				189
				190	/* LFS versions of truncate are only needed on 32 bit machines */
				191	#if BITS_PER_LONG == 32
				192	SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
				193	{
				194	return do_sys_truncate(path, length);
				195	}
				196	#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
				197	asmlinkage long SyS_truncate64(long path, loff_t length)
				198	{
				199	return SYSC_truncate64((const char __user *) path, length);
				200	}
				201	SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
				202	#endif
				203
				204	SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
				205	{
				206	long ret = do_sys_ftruncate(fd, length, 0);
				207	/* avoid REGPARM breakage on x86: */
				208	asmlinkage_protect(2, ret, fd, length);
				209	return ret;
				210	}
				211	#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
				212	asmlinkage long SyS_ftruncate64(long fd, loff_t length)
				213	{
				214	return SYSC_ftruncate64((unsigned int) fd, length);
				215	}
				216	SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
				217	#endif
				218	#endif /* BITS_PER_LONG == 32 */
				219
				220
				221	int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
				222	{
				223	struct inode *inode = file->f_path.dentry->d_inode;
				224	long ret;
				225
				226	if (offset < 0 \|\| len <= 0)
				227	return -EINVAL;
				228
				229	/* Return error if mode is not supported */
				230	if (mode & ~(FALLOC_FL_KEEP_SIZE \| FALLOC_FL_PUNCH_HOLE))
				231	return -EOPNOTSUPP;
				232
				233	/* Punch hole must have keep size set */
				234	if ((mode & FALLOC_FL_PUNCH_HOLE) &&
				235	!(mode & FALLOC_FL_KEEP_SIZE))
				236	return -EOPNOTSUPP;
				237
				238	if (!(file->f_mode & FMODE_WRITE))
				239	return -EBADF;
				240
				241	/* It's not possible punch hole on append only file */
				242	if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
				243	return -EPERM;
				244
				245	if (IS_IMMUTABLE(inode))
				246	return -EPERM;
				247
				248	/*
				249	* Revalidate the write permissions, in case security policy has
				250	* changed since the files were opened.
				251	*/
				252	ret = security_file_permission(file, MAY_WRITE);
				253	if (ret)
				254	return ret;
				255
				256	if (S_ISFIFO(inode->i_mode))
				257	return -ESPIPE;
				258
				259	/*
				260	* Let individual file system decide if it supports preallocation
				261	* for directories or not.
				262	*/
				263	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
				264	return -ENODEV;
				265
				266	/* Check for wrap through zero too */
				267	if (((offset + len) > inode->i_sb->s_maxbytes) \|\| ((offset + len) < 0))
				268	return -EFBIG;
				269
				270	if (!file->f_op->fallocate)
				271	return -EOPNOTSUPP;
				272
				273	return file->f_op->fallocate(file, mode, offset, len);
				274	}
				275
				276	SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
				277	{
				278	struct file *file;
				279	int error = -EBADF;
				280
				281	file = fget(fd);
				282	if (file) {
				283	error = do_fallocate(file, mode, offset, len);
				284	fput(file);
				285	}
				286
				287	return error;
				288	}
				289
				290	#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
				291	asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
				292	{
				293	return SYSC_fallocate((int)fd, (int)mode, offset, len);
				294	}
				295	SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
				296	#endif
				297
				298	/*
				299	* access() needs to use the real uid/gid, not the effective uid/gid.
				300	* We do this by temporarily clearing all FS-related capabilities and
				301	* switching the fsuid/fsgid around to the real ones.
				302	*/
				303	SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
				304	{
				305	const struct cred *old_cred;
				306	struct cred *override_cred;
				307	struct path path;
				308	struct inode *inode;
				309	int res;
				310
				311	if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
				312	return -EINVAL;
				313
				314	override_cred = prepare_creds();
				315	if (!override_cred)
				316	return -ENOMEM;
				317
				318	override_cred->fsuid = override_cred->uid;
				319	override_cred->fsgid = override_cred->gid;
				320
				321	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
				322	/* Clear the capabilities if we switch to a non-root user */
				323	if (override_cred->uid)
				324	cap_clear(override_cred->cap_effective);
				325	else
				326	override_cred->cap_effective =
				327	override_cred->cap_permitted;
				328	}
				329
				330	old_cred = override_creds(override_cred);
				331
				332	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
				333	if (res)
				334	goto out;
				335
				336	inode = path.dentry->d_inode;
				337
				338	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
				339	/*
				340	* MAY_EXEC on regular files is denied if the fs is mounted
				341	* with the "noexec" flag.
				342	*/
				343	res = -EACCES;
				344	if (path.mnt->mnt_flags & MNT_NOEXEC)
				345	goto out_path_release;
				346	}
				347
				348	res = inode_permission(inode, mode \| MAY_ACCESS);
				349	/* SuS v2 requires we report a read only fs too */
				350	if (res \|\| !(mode & S_IWOTH) \|\| special_file(inode->i_mode))
				351	goto out_path_release;
				352	/*
				353	* This is a rare case where using __mnt_is_readonly()
				354	* is OK without a mnt_want/drop_write() pair. Since
				355	* no actual write to the fs is performed here, we do
				356	* not need to telegraph to that to anyone.
				357	*
				358	* By doing this, we accept that this access is
				359	* inherently racy and know that the fs may change
				360	* state before we even see this result.
				361	*/
				362	if (__mnt_is_readonly(path.mnt))
				363	res = -EROFS;
				364
				365	out_path_release:
				366	path_put(&path);
				367	out:
				368	revert_creds(old_cred);
				369	put_cred(override_cred);
				370	return res;
				371	}
				372
				373	SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
				374	{
				375	return sys_faccessat(AT_FDCWD, filename, mode);
				376	}
				377
				378	SYSCALL_DEFINE1(chdir, const char __user *, filename)
				379	{
				380	struct path path;
				381	int error;
				382
				383	error = user_path_dir(filename, &path);
				384	if (error)
				385	goto out;
				386
				387	error = inode_permission(path.dentry->d_inode, MAY_EXEC \| MAY_CHDIR);
				388	if (error)
				389	goto dput_and_out;
				390
				391	set_fs_pwd(current->fs, &path);
				392
				393	dput_and_out:
				394	path_put(&path);
				395	out:
				396	return error;
				397	}
				398
				399	SYSCALL_DEFINE1(fchdir, unsigned int, fd)
				400	{
				401	struct file *file;
				402	struct inode *inode;
				403	int error, fput_needed;
				404
				405	error = -EBADF;
				406	file = fget_raw_light(fd, &fput_needed);
				407	if (!file)
				408	goto out;
				409
				410	inode = file->f_path.dentry->d_inode;
				411
				412	error = -ENOTDIR;
				413	if (!S_ISDIR(inode->i_mode))
				414	goto out_putf;
				415
				416	error = inode_permission(inode, MAY_EXEC \| MAY_CHDIR);
				417	if (!error)
				418	set_fs_pwd(current->fs, &file->f_path);
				419	out_putf:
				420	fput_light(file, fput_needed);
				421	out:
				422	return error;
				423	}
				424
				425	SYSCALL_DEFINE1(chroot, const char __user *, filename)
				426	{
				427	struct path path;
				428	int error;
				429
				430	error = user_path_dir(filename, &path);
				431	if (error)
				432	goto out;
				433
				434	error = inode_permission(path.dentry->d_inode, MAY_EXEC \| MAY_CHDIR);
				435	if (error)
				436	goto dput_and_out;
				437
				438	error = -EPERM;
				439	if (!capable(CAP_SYS_CHROOT))
				440	goto dput_and_out;
				441	error = security_path_chroot(&path);
				442	if (error)
				443	goto dput_and_out;
				444
				445	set_fs_root(current->fs, &path);
				446	error = 0;
				447	dput_and_out:
				448	path_put(&path);
				449	out:
				450	return error;
				451	}
				452
				453	static int chmod_common(struct path *path, umode_t mode)
				454	{
				455	struct inode *inode = path->dentry->d_inode;
				456	struct iattr newattrs;
				457	int error;
				458
				459	error = mnt_want_write(path->mnt);
				460	if (error)
				461	return error;
				462	mutex_lock(&inode->i_mutex);
				463	error = security_path_chmod(path, mode);
				464	if (error)
				465	goto out_unlock;
				466	newattrs.ia_mode = (mode & S_IALLUGO) \| (inode->i_mode & ~S_IALLUGO);
				467	newattrs.ia_valid = ATTR_MODE \| ATTR_CTIME;
				468	error = notify_change(path->dentry, &newattrs);
				469	out_unlock:
				470	mutex_unlock(&inode->i_mutex);
				471	mnt_drop_write(path->mnt);
				472	return error;
				473	}
				474
				475	SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
				476	{
				477	struct file * file;
				478	int err = -EBADF;
				479
				480	file = fget(fd);
				481	if (file) {
				482	audit_inode(NULL, file->f_path.dentry);
				483	err = chmod_common(&file->f_path, mode);
				484	fput(file);
				485	}
				486	return err;
				487	}
				488
				489	SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
				490	{
				491	struct path path;
				492	int error;
				493
				494	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
				495	if (!error) {
				496	error = chmod_common(&path, mode);
				497	path_put(&path);
				498	}
				499	return error;
				500	}
				501
				502	SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
				503	{
				504	return sys_fchmodat(AT_FDCWD, filename, mode);
				505	}
				506
				507	static int chown_common(struct path *path, uid_t user, gid_t group)
				508	{
				509	struct inode *inode = path->dentry->d_inode;
				510	int error;
				511	struct iattr newattrs;
				512
				513	newattrs.ia_valid = ATTR_CTIME;
				514	if (user != (uid_t) -1) {
				515	newattrs.ia_valid \|= ATTR_UID;
				516	newattrs.ia_uid = user;
				517	}
				518	if (group != (gid_t) -1) {
				519	newattrs.ia_valid \|= ATTR_GID;
				520	newattrs.ia_gid = group;
				521	}
				522	if (!S_ISDIR(inode->i_mode))
				523	newattrs.ia_valid \|=
				524	ATTR_KILL_SUID \| ATTR_KILL_SGID \| ATTR_KILL_PRIV;
				525	mutex_lock(&inode->i_mutex);
				526	error = security_path_chown(path, user, group);
				527	if (!error)
				528	error = notify_change(path->dentry, &newattrs);
				529	mutex_unlock(&inode->i_mutex);
				530
				531	return error;
				532	}
				533
				534	SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
				535	{
				536	struct path path;
				537	int error;
				538
				539	error = user_path(filename, &path);
				540	if (error)
				541	goto out;
				542	error = mnt_want_write(path.mnt);
				543	if (error)
				544	goto out_release;
				545	error = chown_common(&path, user, group);
				546	mnt_drop_write(path.mnt);
				547	out_release:
				548	path_put(&path);
				549	out:
				550	return error;
				551	}
				552
				553	SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
				554	gid_t, group, int, flag)
				555	{
				556	struct path path;
				557	int error = -EINVAL;
				558	int lookup_flags;
				559
				560	if ((flag & ~(AT_SYMLINK_NOFOLLOW \| AT_EMPTY_PATH)) != 0)
				561	goto out;
				562
				563	lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
				564	if (flag & AT_EMPTY_PATH)
				565	lookup_flags \|= LOOKUP_EMPTY;
				566	error = user_path_at(dfd, filename, lookup_flags, &path);
				567	if (error)
				568	goto out;
				569	error = mnt_want_write(path.mnt);
				570	if (error)
				571	goto out_release;
				572	error = chown_common(&path, user, group);
				573	mnt_drop_write(path.mnt);
				574	out_release:
				575	path_put(&path);
				576	out:
				577	return error;
				578	}
				579
				580	SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
				581	{
				582	struct path path;
				583	int error;
				584
				585	error = user_lpath(filename, &path);
				586	if (error)
				587	goto out;
				588	error = mnt_want_write(path.mnt);
				589	if (error)
				590	goto out_release;
				591	error = chown_common(&path, user, group);
				592	mnt_drop_write(path.mnt);
				593	out_release:
				594	path_put(&path);
				595	out:
				596	return error;
				597	}
				598
				599	SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
				600	{
				601	struct file * file;
				602	int error = -EBADF;
				603	struct dentry * dentry;
				604
				605	file = fget(fd);
				606	if (!file)
				607	goto out;
				608
				609	error = mnt_want_write_file(file);
				610	if (error)
				611	goto out_fput;
				612	dentry = file->f_path.dentry;
				613	audit_inode(NULL, dentry);
				614	error = chown_common(&file->f_path, user, group);
				615	mnt_drop_write_file(file);
				616	out_fput:
				617	fput(file);
				618	out:
				619	return error;
				620	}
				621
				622	/*
				623	* You have to be very careful that these write
				624	* counts get cleaned up in error cases and
				625	* upon __fput(). This should probably never
				626	* be called outside of __dentry_open().
				627	*/
				628	static inline int __get_file_write_access(struct inode *inode,
				629	struct vfsmount *mnt)
				630	{
				631	int error;
				632	error = get_write_access(inode);
				633	if (error)
				634	return error;
				635	/*
				636	* Do not take mount writer counts on
				637	* special files since no writes to
				638	* the mount itself will occur.
				639	*/
				640	if (!special_file(inode->i_mode)) {
				641	/*
				642	* Balanced in __fput()
				643	*/
				644	error = mnt_want_write(mnt);
				645	if (error)
				646	put_write_access(inode);
				647	}
				648	return error;
				649	}
				650
				651	static struct file __dentry_open(struct dentry dentry, struct vfsmount *mnt,
				652	struct file *f,
				653	int (open)(struct inode , struct file *),
				654	const struct cred *cred)
				655	{
				656	static const struct file_operations empty_fops = {};
				657	struct inode *inode;
				658	int error;
				659
				660	f->f_mode = OPEN_FMODE(f->f_flags) \| FMODE_LSEEK \|
				661	FMODE_PREAD \| FMODE_PWRITE;
				662
				663	if (unlikely(f->f_flags & O_PATH))
				664	f->f_mode = FMODE_PATH;
				665
				666	inode = dentry->d_inode;
				667	if (f->f_mode & FMODE_WRITE) {
				668	error = __get_file_write_access(inode, mnt);
				669	if (error)
				670	goto cleanup_file;
				671	if (!special_file(inode->i_mode))
				672	file_take_write(f);
				673	}
				674
				675	f->f_mapping = inode->i_mapping;
				676	f->f_path.dentry = dentry;
				677	f->f_path.mnt = mnt;
				678	f->f_pos = 0;
				679	file_sb_list_add(f, inode->i_sb);
				680
				681	if (unlikely(f->f_mode & FMODE_PATH)) {
				682	f->f_op = &empty_fops;
				683	return f;
				684	}
				685
				686	f->f_op = fops_get(inode->i_fop);
				687
				688	error = security_dentry_open(f, cred);
				689	if (error)
				690	goto cleanup_all;
				691
				692	error = break_lease(inode, f->f_flags);
				693	if (error)
				694	goto cleanup_all;
				695
				696	if (!open && f->f_op)
				697	open = f->f_op->open;
				698	if (open) {
				699	error = open(inode, f);
				700	if (error)
				701	goto cleanup_all;
				702	}
				703	if ((f->f_mode & (FMODE_READ \| FMODE_WRITE)) == FMODE_READ)
				704	i_readcount_inc(inode);
				705
				706	f->f_flags &= ~(O_CREAT \| O_EXCL \| O_NOCTTY \| O_TRUNC);
				707
				708	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
				709
				710	/* NB: we're sure to have correct a_ops only after f_op->open */
				711	if (f->f_flags & O_DIRECT) {
				712	if (!f->f_mapping->a_ops \|\|
				713	((!f->f_mapping->a_ops->direct_IO) &&
				714	(!f->f_mapping->a_ops->get_xip_mem))) {
				715	fput(f);
				716	f = ERR_PTR(-EINVAL);
				717	}
				718	}
				719
				720	return f;
				721
				722	cleanup_all:
				723	fops_put(f->f_op);
				724	if (f->f_mode & FMODE_WRITE) {
				725	put_write_access(inode);
				726	if (!special_file(inode->i_mode)) {
				727	/*
				728	* We don't consider this a real
				729	* mnt_want/drop_write() pair
				730	* because it all happenend right
				731	* here, so just reset the state.
				732	*/
				733	file_reset_write(f);
				734	mnt_drop_write(mnt);
				735	}
				736	}
				737	file_sb_list_del(f);
				738	f->f_path.dentry = NULL;
				739	f->f_path.mnt = NULL;
				740	cleanup_file:
				741	put_filp(f);
				742	dput(dentry);
				743	mntput(mnt);
				744	return ERR_PTR(error);
				745	}
				746
				747	/**
				748	* lookup_instantiate_filp - instantiates the open intent filp
				749	* @nd: pointer to nameidata
				750	* @dentry: pointer to dentry
				751	* @open: open callback
				752	*
				753	* Helper for filesystems that want to use lookup open intents and pass back
				754	* a fully instantiated struct file to the caller.
				755	* This function is meant to be called from within a filesystem's
				756	* lookup method.
				757	* Beware of calling it for non-regular files! Those ->open methods might block
				758	* (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
				759	* leading to a deadlock, as nobody can open that fifo anymore, because
				760	* another process to open fifo will block on locked parent when doing lookup).
				761	* Note that in case of error, nd->intent.open.file is destroyed, but the
				762	* path information remains valid.
				763	* If the open callback is set to NULL, then the standard f_op->open()
				764	* filesystem callback is substituted.
				765	*/
				766	struct file lookup_instantiate_filp(struct nameidata nd, struct dentry *dentry,
				767	int (open)(struct inode , struct file *))
				768	{
				769	const struct cred *cred = current_cred();
				770
				771	if (IS_ERR(nd->intent.open.file))
				772	goto out;
				773	if (IS_ERR(dentry))
				774	goto out_err;
				775	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
				776	nd->intent.open.file,
				777	open, cred);
				778	out:
				779	return nd->intent.open.file;
				780	out_err:
				781	release_open_intent(nd);
				782	nd->intent.open.file = ERR_CAST(dentry);
				783	goto out;
				784	}
				785	EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
				786
				787	/**
				788	* nameidata_to_filp - convert a nameidata to an open filp.
				789	* @nd: pointer to nameidata
				790	* @flags: open flags
				791	*
				792	* Note that this function destroys the original nameidata
				793	*/
				794	struct file nameidata_to_filp(struct nameidata nd)
				795	{
				796	const struct cred *cred = current_cred();
				797	struct file *filp;
				798
				799	/* Pick up the filp from the open intent */
				800	filp = nd->intent.open.file;
				801	nd->intent.open.file = NULL;
				802
				803	/* Has the filesystem initialised the file for us? */
				804	if (filp->f_path.dentry == NULL) {
				805	path_get(&nd->path);
				806	filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
				807	NULL, cred);
				808	}
				809	return filp;
				810	}
				811
				812	/*
				813	* dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
				814	* error.
				815	*/
				816	struct file dentry_open(struct dentry dentry, struct vfsmount *mnt, int flags,
				817	const struct cred *cred)
				818	{
				819	int error;
				820	struct file *f;
				821
				822	validate_creds(cred);
				823
				824	/* We must always pass in a valid mount pointer. */
				825	BUG_ON(!mnt);
				826
				827	error = -ENFILE;
				828	f = get_empty_filp();
				829	if (f == NULL) {
				830	dput(dentry);
				831	mntput(mnt);
				832	return ERR_PTR(error);
				833	}
				834
				835	f->f_flags = flags;
				836	return __dentry_open(dentry, mnt, f, NULL, cred);
				837	}
				838	EXPORT_SYMBOL(dentry_open);
				839
				840	static void __put_unused_fd(struct files_struct *files, unsigned int fd)
				841	{
				842	struct fdtable *fdt = files_fdtable(files);
				843	__clear_open_fd(fd, fdt);
				844	if (fd < files->next_fd)
				845	files->next_fd = fd;
				846	}
				847
				848	void put_unused_fd(unsigned int fd)
				849	{
				850	struct files_struct *files = current->files;
				851	spin_lock(&files->file_lock);
				852	__put_unused_fd(files, fd);
				853	spin_unlock(&files->file_lock);
				854	}
				855
				856	EXPORT_SYMBOL(put_unused_fd);
				857
				858	/*
				859	* Install a file pointer in the fd array.
				860	*
				861	* The VFS is full of places where we drop the files lock between
				862	* setting the open_fds bitmap and installing the file in the file
				863	* array. At any such point, we are vulnerable to a dup2() race
				864	* installing a file in the array before us. We need to detect this and
				865	* fput() the struct file we are about to overwrite in this case.
				866	*
				867	* It should never happen - if we allow dup2() do it, _really_ bad things
				868	* will follow.
				869	*/
				870
				871	void fd_install(unsigned int fd, struct file *file)
				872	{
				873	struct files_struct *files = current->files;
				874	struct fdtable *fdt;
				875	spin_lock(&files->file_lock);
				876	fdt = files_fdtable(files);
				877	BUG_ON(fdt->fd[fd] != NULL);
				878	rcu_assign_pointer(fdt->fd[fd], file);
				879	spin_unlock(&files->file_lock);
				880	}
				881
				882	EXPORT_SYMBOL(fd_install);
				883
				884	static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
				885	{
				886	int lookup_flags = 0;
				887	int acc_mode;
				888
				889	if (flags & O_CREAT)
				890	op->mode = (mode & S_IALLUGO) \| S_IFREG;
				891	else
				892	op->mode = 0;
				893
				894	/* Must never be set by userspace */
				895	flags &= ~FMODE_NONOTIFY;
				896
				897	/*
				898	* O_SYNC is implemented as __O_SYNC\|O_DSYNC. As many places only
				899	* check for O_DSYNC if the need any syncing at all we enforce it's
				900	* always set instead of having to deal with possibly weird behaviour
				901	* for malicious applications setting only __O_SYNC.
				902	*/
				903	if (flags & __O_SYNC)
				904	flags \|= O_DSYNC;
				905
				906	/*
				907	* If we have O_PATH in the open flag. Then we
				908	* cannot have anything other than the below set of flags
				909	*/
				910	if (flags & O_PATH) {
				911	flags &= O_DIRECTORY \| O_NOFOLLOW \| O_PATH;
				912	acc_mode = 0;
				913	} else {
				914	acc_mode = MAY_OPEN \| ACC_MODE(flags);
				915	}
				916
				917	op->open_flag = flags;
				918
				919	/* O_TRUNC implies we need access checks for write permissions */
				920	if (flags & O_TRUNC)
				921	acc_mode \|= MAY_WRITE;
				922
				923	/* Allow the LSM permission hook to distinguish append
				924	access from general write access. */
				925	if (flags & O_APPEND)
				926	acc_mode \|= MAY_APPEND;
				927
				928	op->acc_mode = acc_mode;
				929
				930	op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
				931
				932	if (flags & O_CREAT) {
				933	op->intent \|= LOOKUP_CREATE;
				934	if (flags & O_EXCL)
				935	op->intent \|= LOOKUP_EXCL;
				936	}
				937
				938	if (flags & O_DIRECTORY)
				939	lookup_flags \|= LOOKUP_DIRECTORY;
				940	if (!(flags & O_NOFOLLOW))
				941	lookup_flags \|= LOOKUP_FOLLOW;
				942	return lookup_flags;
				943	}
				944
				945	/**
				946	* filp_open - open file and return file pointer
				947	*
				948	* @filename: path to open
				949	* @flags: open flags as per the open(2) second argument
				950	* @mode: mode for the new file if O_CREAT is set, else ignored
				951	*
				952	* This is the helper to open a file from kernelspace if you really
				953	* have to. But in generally you should not do this, so please move
				954	* along, nothing to see here..
				955	*/
				956	struct file filp_open(const char filename, int flags, umode_t mode)
				957	{
				958	struct open_flags op;
				959	int lookup = build_open_flags(flags, mode, &op);
				960	return do_filp_open(AT_FDCWD, filename, &op, lookup);
				961	}
				962	EXPORT_SYMBOL(filp_open);
				963
				964	struct file file_open_root(struct dentry dentry, struct vfsmount *mnt,
				965	const char *filename, int flags)
				966	{
				967	struct open_flags op;
				968	int lookup = build_open_flags(flags, 0, &op);
				969	if (flags & O_CREAT)
				970	return ERR_PTR(-EINVAL);
				971	if (!filename && (flags & O_DIRECTORY))
				972	if (!dentry->d_inode->i_op->lookup)
				973	return ERR_PTR(-ENOTDIR);
				974	return do_file_open_root(dentry, mnt, filename, &op, lookup);
				975	}
				976	EXPORT_SYMBOL(file_open_root);
				977
				978	long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
				979	{
				980	struct open_flags op;
				981	int lookup = build_open_flags(flags, mode, &op);
				982	char *tmp = getname(filename);
				983	int fd = PTR_ERR(tmp);
xf.li	6c8fc1e	2023-08-12 00:11:09 -0700	[diff] [blame^]	984	#ifdef CONFIG_SYSVIPC_CROSS_SHM
				985	char *ptr = NULL;
				986	#endif
lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	987	if (!IS_ERR(tmp)) {
				988	fd = get_unused_fd_flags(flags);
				989	if (fd >= 0) {
				990	struct file *f = do_filp_open(dfd, tmp, &op, lookup);
				991	if (IS_ERR(f)) {
				992	put_unused_fd(fd);
				993	fd = PTR_ERR(f);
				994	} else {
				995	fsnotify_open(f);
				996	fd_install(fd, f);
xf.li	6c8fc1e	2023-08-12 00:11:09 -0700	[diff] [blame^]	997	#ifdef CONFIG_SYSVIPC_CROSS_SHM
				998	ptr = strrchr(tmp, '/');
				999	if (ptr)
				1000	{
				1001	if (strncmp(ptr + 1, "remote-", strlen("remote-"))== 0)
				1002	{
				1003	f->shm_flags = SHM_REMOTE_POSIX_YES;
				1004	}
				1005	}
				1006	#endif
lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1007	}
				1008	}
				1009	putname(tmp);
				1010	}
				1011	return fd;
				1012	}
				1013
				1014	SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
				1015	{
				1016	long ret;
				1017
				1018	if (force_o_largefile())
				1019	flags \|= O_LARGEFILE;
				1020
				1021	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
				1022	/* avoid REGPARM breakage on x86: */
				1023	asmlinkage_protect(3, ret, filename, flags, mode);
				1024	return ret;
				1025	}
				1026
				1027	SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
				1028	umode_t, mode)
				1029	{
				1030	long ret;
				1031
				1032	if (force_o_largefile())
				1033	flags \|= O_LARGEFILE;
				1034
				1035	ret = do_sys_open(dfd, filename, flags, mode);
				1036	/* avoid REGPARM breakage on x86: */
				1037	asmlinkage_protect(4, ret, dfd, filename, flags, mode);
				1038	return ret;
				1039	}
				1040
				1041	#ifndef __alpha__
				1042
				1043	/*
				1044	* For backward compatibility? Maybe this should be moved
				1045	* into arch/i386 instead?
				1046	*/
				1047	SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
				1048	{
				1049	return sys_open(pathname, O_CREAT \| O_WRONLY \| O_TRUNC, mode);
				1050	}
				1051
				1052	#endif
				1053
				1054	/*
				1055	* "id" is the POSIX thread ID. We use the
				1056	* files pointer for this..
				1057	*/
				1058	int filp_close(struct file *filp, fl_owner_t id)
				1059	{
				1060	int retval = 0;
				1061
				1062	if (!file_count(filp)) {
				1063	printk(KERN_ERR "VFS: Close: file count is 0\n");
				1064	return 0;
				1065	}
				1066
				1067	if (filp->f_op && filp->f_op->flush)
				1068	retval = filp->f_op->flush(filp, id);
				1069
				1070	if (likely(!(filp->f_mode & FMODE_PATH))) {
				1071	dnotify_flush(filp, id);
				1072	locks_remove_posix(filp, id);
				1073	}
				1074	fput(filp);
				1075	return retval;
				1076	}
				1077
				1078	EXPORT_SYMBOL(filp_close);
				1079
				1080	/*
				1081	* Careful here! We test whether the file pointer is NULL before
				1082	* releasing the fd. This ensures that one clone task can't release
				1083	* an fd while another clone is opening it.
				1084	*/
				1085	SYSCALL_DEFINE1(close, unsigned int, fd)
				1086	{
				1087	struct file * filp;
				1088	struct files_struct *files = current->files;
				1089	struct fdtable *fdt;
				1090	int retval;
				1091
				1092	spin_lock(&files->file_lock);
				1093	fdt = files_fdtable(files);
				1094	if (fd >= fdt->max_fds)
				1095	goto out_unlock;
				1096	filp = fdt->fd[fd];
				1097	if (!filp)
				1098	goto out_unlock;
				1099	rcu_assign_pointer(fdt->fd[fd], NULL);
				1100	__clear_close_on_exec(fd, fdt);
				1101	__put_unused_fd(files, fd);
				1102	spin_unlock(&files->file_lock);
				1103	retval = filp_close(filp, files);
				1104
				1105	/* can't restart close syscall because file table entry was cleared */
				1106	if (unlikely(retval == -ERESTARTSYS \|\|
				1107	retval == -ERESTARTNOINTR \|\|
				1108	retval == -ERESTARTNOHAND \|\|
				1109	retval == -ERESTART_RESTARTBLOCK))
				1110	retval = -EINTR;
				1111
				1112	return retval;
				1113
				1114	out_unlock:
				1115	spin_unlock(&files->file_lock);
				1116	return -EBADF;
				1117	}
				1118	EXPORT_SYMBOL(sys_close);
				1119
				1120	/*
				1121	* This routine simulates a hangup on the tty, to arrange that users
				1122	* are given clean terminals at login time.
				1123	*/
				1124	SYSCALL_DEFINE0(vhangup)
				1125	{
				1126	if (capable(CAP_SYS_TTY_CONFIG)) {
				1127	tty_vhangup_self();
				1128	return 0;
				1129	}
				1130	return -EPERM;
				1131	}
				1132
				1133	/*
				1134	* Called when an inode is about to be open.
				1135	* We use this to disallow opening large files on 32bit systems if
				1136	* the caller didn't specify O_LARGEFILE. On 64bit systems we force
				1137	* on this flag in sys_open.
				1138	*/
				1139	int generic_file_open(struct inode * inode, struct file * filp)
				1140	{
				1141	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
				1142	return -EOVERFLOW;
				1143	return 0;
				1144	}
				1145
				1146	EXPORT_SYMBOL(generic_file_open);
				1147
				1148	/*
				1149	* This is used by subsystems that don't want seekable
				1150	* file descriptors. The function is not supposed to ever fail, the only
				1151	* reason it returns an 'int' and not 'void' is so that it can be plugged
				1152	* directly into file_operations structure.
				1153	*/
				1154	int nonseekable_open(struct inode inode, struct file filp)
				1155	{
				1156	filp->f_mode &= ~(FMODE_LSEEK \| FMODE_PREAD \| FMODE_PWRITE);
				1157	return 0;
				1158	}
				1159
				1160	EXPORT_SYMBOL(nonseekable_open);