Blame - marvell/linux/fs/open.c - T108

blob: c6646926a61aae9d1270e109afb6ae40ad42129f [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* linux/fs/open.c
				4	*
				5	* Copyright (C) 1991, 1992 Linus Torvalds
				6	*/
				7
				8	#include <linux/string.h>
				9	#include <linux/mm.h>
				10	#include <linux/file.h>
				11	#include <linux/fdtable.h>
				12	#include <linux/fsnotify.h>
				13	#include <linux/module.h>
				14	#include <linux/tty.h>
				15	#include <linux/namei.h>
				16	#include <linux/backing-dev.h>
				17	#include <linux/capability.h>
				18	#include <linux/securebits.h>
				19	#include <linux/security.h>
				20	#include <linux/mount.h>
				21	#include <linux/fcntl.h>
				22	#include <linux/slab.h>
				23	#include <linux/uaccess.h>
				24	#include <linux/fs.h>
				25	#include <linux/personality.h>
				26	#include <linux/pagemap.h>
				27	#include <linux/syscalls.h>
				28	#include <linux/rcupdate.h>
				29	#include <linux/audit.h>
				30	#include <linux/falloc.h>
				31	#include <linux/fs_struct.h>
				32	#include <linux/ima.h>
				33	#include <linux/dnotify.h>
				34	#include <linux/compat.h>
				35
				36	#include "internal.h"
				37
				38	int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
				39	struct file *filp)
				40	{
				41	int ret;
				42	struct iattr newattrs;
				43
				44	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
				45	if (length < 0)
				46	return -EINVAL;
				47
				48	newattrs.ia_size = length;
				49	newattrs.ia_valid = ATTR_SIZE \| time_attrs;
				50	if (filp) {
				51	newattrs.ia_file = filp;
				52	newattrs.ia_valid \|= ATTR_FILE;
				53	}
				54
				55	/* Remove suid, sgid, and file capabilities on truncate too */
				56	ret = dentry_needs_remove_privs(dentry);
				57	if (ret < 0)
				58	return ret;
				59	if (ret)
				60	newattrs.ia_valid \|= ret \| ATTR_FORCE;
				61
				62	inode_lock(dentry->d_inode);
				63	/* Note any delegations or leases have already been broken: */
				64	ret = notify_change(dentry, &newattrs, NULL);
				65	inode_unlock(dentry->d_inode);
				66	return ret;
				67	}
				68
				69	long vfs_truncate(const struct path *path, loff_t length)
				70	{
				71	struct inode *inode;
				72	long error;
				73
				74	inode = path->dentry->d_inode;
				75
				76	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
				77	if (S_ISDIR(inode->i_mode))
				78	return -EISDIR;
				79	if (!S_ISREG(inode->i_mode))
				80	return -EINVAL;
				81
				82	error = mnt_want_write(path->mnt);
				83	if (error)
				84	goto out;
				85
				86	error = inode_permission(inode, MAY_WRITE);
				87	if (error)
				88	goto mnt_drop_write_and_out;
				89
				90	error = -EPERM;
				91	if (IS_APPEND(inode))
				92	goto mnt_drop_write_and_out;
				93
				94	error = get_write_access(inode);
				95	if (error)
				96	goto mnt_drop_write_and_out;
				97
				98	/*
				99	* Make sure that there are no leases. get_write_access() protects
				100	* against the truncate racing with a lease-granting setlease().
				101	*/
				102	error = break_lease(inode, O_WRONLY);
				103	if (error)
				104	goto put_write_and_out;
				105
				106	error = locks_verify_truncate(inode, NULL, length);
				107	if (!error)
				108	error = security_path_truncate(path);
				109	if (!error)
				110	error = do_truncate(path->dentry, length, 0, NULL);
				111
				112	put_write_and_out:
				113	put_write_access(inode);
				114	mnt_drop_write_and_out:
				115	mnt_drop_write(path->mnt);
				116	out:
				117	return error;
				118	}
				119	EXPORT_SYMBOL_GPL(vfs_truncate);
				120
				121	long do_sys_truncate(const char __user *pathname, loff_t length)
				122	{
				123	unsigned int lookup_flags = LOOKUP_FOLLOW;
				124	struct path path;
				125	int error;
				126
				127	if (length < 0) /* sorry, but loff_t says... */
				128	return -EINVAL;
				129
				130	retry:
				131	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
				132	if (!error) {
				133	error = vfs_truncate(&path, length);
				134	path_put(&path);
				135	}
				136	if (retry_estale(error, lookup_flags)) {
				137	lookup_flags \|= LOOKUP_REVAL;
				138	goto retry;
				139	}
				140	return error;
				141	}
				142
				143	SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
				144	{
				145	return do_sys_truncate(path, length);
				146	}
				147
				148	#ifdef CONFIG_COMPAT
				149	COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
				150	{
				151	return do_sys_truncate(path, length);
				152	}
				153	#endif
				154
				155	long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
				156	{
				157	struct inode *inode;
				158	struct dentry *dentry;
				159	struct fd f;
				160	int error;
				161
				162	error = -EINVAL;
				163	if (length < 0)
				164	goto out;
				165	error = -EBADF;
				166	f = fdget(fd);
				167	if (!f.file)
				168	goto out;
				169
				170	/* explicitly opened as large or we are on 64-bit box */
				171	if (f.file->f_flags & O_LARGEFILE)
				172	small = 0;
				173
				174	dentry = f.file->f_path.dentry;
				175	inode = dentry->d_inode;
				176	error = -EINVAL;
				177	if (!S_ISREG(inode->i_mode) \|\| !(f.file->f_mode & FMODE_WRITE))
				178	goto out_putf;
				179
				180	error = -EINVAL;
				181	/* Cannot ftruncate over 2^31 bytes without large file support */
				182	if (small && length > MAX_NON_LFS)
				183	goto out_putf;
				184
				185	error = -EPERM;
				186	/* Check IS_APPEND on real upper inode */
				187	if (IS_APPEND(file_inode(f.file)))
				188	goto out_putf;
				189
				190	sb_start_write(inode->i_sb);
				191	error = locks_verify_truncate(inode, f.file, length);
				192	if (!error)
				193	error = security_path_truncate(&f.file->f_path);
				194	if (!error)
				195	error = do_truncate(dentry, length, ATTR_MTIME\|ATTR_CTIME, f.file);
				196	sb_end_write(inode->i_sb);
				197	out_putf:
				198	fdput(f);
				199	out:
				200	return error;
				201	}
				202
				203	SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
				204	{
				205	return do_sys_ftruncate(fd, length, 1);
				206	}
				207
				208	#ifdef CONFIG_COMPAT
				209	COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
				210	{
				211	return do_sys_ftruncate(fd, length, 1);
				212	}
				213	#endif
				214
				215	/* LFS versions of truncate are only needed on 32 bit machines */
				216	#if BITS_PER_LONG == 32
				217	SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
				218	{
				219	return do_sys_truncate(path, length);
				220	}
				221
				222	SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
				223	{
				224	return do_sys_ftruncate(fd, length, 0);
				225	}
				226	#endif /* BITS_PER_LONG == 32 */
				227
				228
				229	int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
				230	{
				231	struct inode *inode = file_inode(file);
				232	long ret;
				233
				234	if (offset < 0 \|\| len <= 0)
				235	return -EINVAL;
				236
				237	/* Return error if mode is not supported */
				238	if (mode & ~FALLOC_FL_SUPPORTED_MASK)
				239	return -EOPNOTSUPP;
				240
				241	/* Punch hole and zero range are mutually exclusive */
				242	if ((mode & (FALLOC_FL_PUNCH_HOLE \| FALLOC_FL_ZERO_RANGE)) ==
				243	(FALLOC_FL_PUNCH_HOLE \| FALLOC_FL_ZERO_RANGE))
				244	return -EOPNOTSUPP;
				245
				246	/* Punch hole must have keep size set */
				247	if ((mode & FALLOC_FL_PUNCH_HOLE) &&
				248	!(mode & FALLOC_FL_KEEP_SIZE))
				249	return -EOPNOTSUPP;
				250
				251	/* Collapse range should only be used exclusively. */
				252	if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
				253	(mode & ~FALLOC_FL_COLLAPSE_RANGE))
				254	return -EINVAL;
				255
				256	/* Insert range should only be used exclusively. */
				257	if ((mode & FALLOC_FL_INSERT_RANGE) &&
				258	(mode & ~FALLOC_FL_INSERT_RANGE))
				259	return -EINVAL;
				260
				261	/* Unshare range should only be used with allocate mode. */
				262	if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
				263	(mode & ~(FALLOC_FL_UNSHARE_RANGE \| FALLOC_FL_KEEP_SIZE)))
				264	return -EINVAL;
				265
				266	if (!(file->f_mode & FMODE_WRITE))
				267	return -EBADF;
				268
				269	/*
				270	* We can only allow pure fallocate on append only files
				271	*/
				272	if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
				273	return -EPERM;
				274
				275	if (IS_IMMUTABLE(inode))
				276	return -EPERM;
				277
				278	/*
				279	* We cannot allow any fallocate operation on an active swapfile
				280	*/
				281	if (IS_SWAPFILE(inode))
				282	return -ETXTBSY;
				283
				284	/*
				285	* Revalidate the write permissions, in case security policy has
				286	* changed since the files were opened.
				287	*/
				288	ret = security_file_permission(file, MAY_WRITE);
				289	if (ret)
				290	return ret;
				291
				292	if (S_ISFIFO(inode->i_mode))
				293	return -ESPIPE;
				294
				295	if (S_ISDIR(inode->i_mode))
				296	return -EISDIR;
				297
				298	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
				299	return -ENODEV;
				300
				301	/* Check for wrap through zero too */
				302	if (((offset + len) > inode->i_sb->s_maxbytes) \|\| ((offset + len) < 0))
				303	return -EFBIG;
				304
				305	if (!file->f_op->fallocate)
				306	return -EOPNOTSUPP;
				307
				308	file_start_write(file);
				309	ret = file->f_op->fallocate(file, mode, offset, len);
				310
				311	/*
				312	* Create inotify and fanotify events.
				313	*
				314	* To keep the logic simple always create events if fallocate succeeds.
				315	* This implies that events are even created if the file size remains
				316	* unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
				317	*/
				318	if (ret == 0)
				319	fsnotify_modify(file);
				320
				321	file_end_write(file);
				322	return ret;
				323	}
				324	EXPORT_SYMBOL_GPL(vfs_fallocate);
				325
				326	int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
				327	{
				328	struct fd f = fdget(fd);
				329	int error = -EBADF;
				330
				331	if (f.file) {
				332	error = vfs_fallocate(f.file, mode, offset, len);
				333	fdput(f);
				334	}
				335	return error;
				336	}
				337
				338	SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
				339	{
				340	return ksys_fallocate(fd, mode, offset, len);
				341	}
				342
				343	/*
				344	* access() needs to use the real uid/gid, not the effective uid/gid.
				345	* We do this by temporarily clearing all FS-related capabilities and
				346	* switching the fsuid/fsgid around to the real ones.
				347	*/
				348	long do_faccessat(int dfd, const char __user *filename, int mode)
				349	{
				350	const struct cred *old_cred;
				351	struct cred *override_cred;
				352	struct path path;
				353	struct inode *inode;
				354	int res;
				355	unsigned int lookup_flags = LOOKUP_FOLLOW;
				356
				357	if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
				358	return -EINVAL;
				359
				360	override_cred = prepare_creds();
				361	if (!override_cred)
				362	return -ENOMEM;
				363
				364	override_cred->fsuid = override_cred->uid;
				365	override_cred->fsgid = override_cred->gid;
				366
				367	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
				368	/* Clear the capabilities if we switch to a non-root user */
				369	kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
				370	if (!uid_eq(override_cred->uid, root_uid))
				371	cap_clear(override_cred->cap_effective);
				372	else
				373	override_cred->cap_effective =
				374	override_cred->cap_permitted;
				375	}
				376
				377	/*
				378	* The new set of credentials can only be used in
				379	* task-synchronous circumstances, and does not need
				380	* RCU freeing, unless somebody then takes a separate
				381	* reference to it.
				382	*
				383	* NOTE! This is _only_ true because this credential
				384	* is used purely for override_creds() that installs
				385	* it as the subjective cred. Other threads will be
				386	* accessing ->real_cred, not the subjective cred.
				387	*
				388	* If somebody _does_ make a copy of this (using the
				389	* 'get_current_cred()' function), that will clear the
				390	* non_rcu field, because now that other user may be
				391	* expecting RCU freeing. But normal thread-synchronous
				392	* cred accesses will keep things non-RCY.
				393	*/
				394	override_cred->non_rcu = 1;
				395
				396	old_cred = override_creds(override_cred);
				397	retry:
				398	res = user_path_at(dfd, filename, lookup_flags, &path);
				399	if (res)
				400	goto out;
				401
				402	inode = d_backing_inode(path.dentry);
				403
				404	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
				405	/*
				406	* MAY_EXEC on regular files is denied if the fs is mounted
				407	* with the "noexec" flag.
				408	*/
				409	res = -EACCES;
				410	if (path_noexec(&path))
				411	goto out_path_release;
				412	}
				413
				414	res = inode_permission(inode, mode \| MAY_ACCESS);
				415	/* SuS v2 requires we report a read only fs too */
				416	if (res \|\| !(mode & S_IWOTH) \|\| special_file(inode->i_mode))
				417	goto out_path_release;
				418	/*
				419	* This is a rare case where using __mnt_is_readonly()
				420	* is OK without a mnt_want/drop_write() pair. Since
				421	* no actual write to the fs is performed here, we do
				422	* not need to telegraph to that to anyone.
				423	*
				424	* By doing this, we accept that this access is
				425	* inherently racy and know that the fs may change
				426	* state before we even see this result.
				427	*/
				428	if (__mnt_is_readonly(path.mnt))
				429	res = -EROFS;
				430
				431	out_path_release:
				432	path_put(&path);
				433	if (retry_estale(res, lookup_flags)) {
				434	lookup_flags \|= LOOKUP_REVAL;
				435	goto retry;
				436	}
				437	out:
				438	revert_creds(old_cred);
				439	put_cred(override_cred);
				440	return res;
				441	}
				442
				443	SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
				444	{
				445	return do_faccessat(dfd, filename, mode);
				446	}
				447
				448	SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
				449	{
				450	return do_faccessat(AT_FDCWD, filename, mode);
				451	}
				452
				453	int ksys_chdir(const char __user *filename)
				454	{
				455	struct path path;
				456	int error;
				457	unsigned int lookup_flags = LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
				458	retry:
				459	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
				460	if (error)
				461	goto out;
				462
				463	error = inode_permission(path.dentry->d_inode, MAY_EXEC \| MAY_CHDIR);
				464	if (error)
				465	goto dput_and_out;
				466
				467	set_fs_pwd(current->fs, &path);
				468
				469	dput_and_out:
				470	path_put(&path);
				471	if (retry_estale(error, lookup_flags)) {
				472	lookup_flags \|= LOOKUP_REVAL;
				473	goto retry;
				474	}
				475	out:
				476	return error;
				477	}
				478
				479	SYSCALL_DEFINE1(chdir, const char __user *, filename)
				480	{
				481	return ksys_chdir(filename);
				482	}
				483
				484	SYSCALL_DEFINE1(fchdir, unsigned int, fd)
				485	{
				486	struct fd f = fdget_raw(fd);
				487	int error;
				488
				489	error = -EBADF;
				490	if (!f.file)
				491	goto out;
				492
				493	error = -ENOTDIR;
				494	if (!d_can_lookup(f.file->f_path.dentry))
				495	goto out_putf;
				496
				497	error = inode_permission(file_inode(f.file), MAY_EXEC \| MAY_CHDIR);
				498	if (!error)
				499	set_fs_pwd(current->fs, &f.file->f_path);
				500	out_putf:
				501	fdput(f);
				502	out:
				503	return error;
				504	}
				505
				506	int ksys_chroot(const char __user *filename)
				507	{
				508	struct path path;
				509	int error;
				510	unsigned int lookup_flags = LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
				511	retry:
				512	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
				513	if (error)
				514	goto out;
				515
				516	error = inode_permission(path.dentry->d_inode, MAY_EXEC \| MAY_CHDIR);
				517	if (error)
				518	goto dput_and_out;
				519
				520	error = -EPERM;
				521	if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
				522	goto dput_and_out;
				523	error = security_path_chroot(&path);
				524	if (error)
				525	goto dput_and_out;
				526
				527	set_fs_root(current->fs, &path);
				528	error = 0;
				529	dput_and_out:
				530	path_put(&path);
				531	if (retry_estale(error, lookup_flags)) {
				532	lookup_flags \|= LOOKUP_REVAL;
				533	goto retry;
				534	}
				535	out:
				536	return error;
				537	}
				538
				539	SYSCALL_DEFINE1(chroot, const char __user *, filename)
				540	{
				541	return ksys_chroot(filename);
				542	}
				543
				544	static int chmod_common(const struct path *path, umode_t mode)
				545	{
				546	struct inode *inode = path->dentry->d_inode;
				547	struct inode *delegated_inode = NULL;
				548	struct iattr newattrs;
				549	int error;
				550
				551	error = mnt_want_write(path->mnt);
				552	if (error)
				553	return error;
				554	retry_deleg:
				555	inode_lock(inode);
				556	error = security_path_chmod(path, mode);
				557	if (error)
				558	goto out_unlock;
				559	newattrs.ia_mode = (mode & S_IALLUGO) \| (inode->i_mode & ~S_IALLUGO);
				560	newattrs.ia_valid = ATTR_MODE \| ATTR_CTIME;
				561	error = notify_change(path->dentry, &newattrs, &delegated_inode);
				562	out_unlock:
				563	inode_unlock(inode);
				564	if (delegated_inode) {
				565	error = break_deleg_wait(&delegated_inode);
				566	if (!error)
				567	goto retry_deleg;
				568	}
				569	mnt_drop_write(path->mnt);
				570	return error;
				571	}
				572
				573	int vfs_fchmod(struct file *file, umode_t mode)
				574	{
				575	audit_file(file);
				576	return chmod_common(&file->f_path, mode);
				577	}
				578
				579	int ksys_fchmod(unsigned int fd, umode_t mode)
				580	{
				581	struct fd f = fdget(fd);
				582	int err = -EBADF;
				583
				584	if (f.file) {
				585	err = vfs_fchmod(f.file, mode);
				586	fdput(f);
				587	}
				588	return err;
				589	}
				590
				591	SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
				592	{
				593	return ksys_fchmod(fd, mode);
				594	}
				595
				596	int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
				597	{
				598	struct path path;
				599	int error;
				600	unsigned int lookup_flags = LOOKUP_FOLLOW;
				601	retry:
				602	error = user_path_at(dfd, filename, lookup_flags, &path);
				603	if (!error) {
				604	error = chmod_common(&path, mode);
				605	path_put(&path);
				606	if (retry_estale(error, lookup_flags)) {
				607	lookup_flags \|= LOOKUP_REVAL;
				608	goto retry;
				609	}
				610	}
				611	return error;
				612	}
				613
				614	SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
				615	umode_t, mode)
				616	{
				617	return do_fchmodat(dfd, filename, mode);
				618	}
				619
				620	SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
				621	{
				622	return do_fchmodat(AT_FDCWD, filename, mode);
				623	}
				624
				625	static int chown_common(const struct path *path, uid_t user, gid_t group)
				626	{
				627	struct inode *inode = path->dentry->d_inode;
				628	struct inode *delegated_inode = NULL;
				629	int error;
				630	struct iattr newattrs;
				631	kuid_t uid;
				632	kgid_t gid;
				633
				634	uid = make_kuid(current_user_ns(), user);
				635	gid = make_kgid(current_user_ns(), group);
				636
				637	retry_deleg:
				638	newattrs.ia_valid = ATTR_CTIME;
				639	if (user != (uid_t) -1) {
				640	if (!uid_valid(uid))
				641	return -EINVAL;
				642	newattrs.ia_valid \|= ATTR_UID;
				643	newattrs.ia_uid = uid;
				644	}
				645	if (group != (gid_t) -1) {
				646	if (!gid_valid(gid))
				647	return -EINVAL;
				648	newattrs.ia_valid \|= ATTR_GID;
				649	newattrs.ia_gid = gid;
				650	}
				651	if (!S_ISDIR(inode->i_mode))
				652	newattrs.ia_valid \|=
				653	ATTR_KILL_SUID \| ATTR_KILL_SGID \| ATTR_KILL_PRIV;
				654	inode_lock(inode);
				655	error = security_path_chown(path, uid, gid);
				656	if (!error)
				657	error = notify_change(path->dentry, &newattrs, &delegated_inode);
				658	inode_unlock(inode);
				659	if (delegated_inode) {
				660	error = break_deleg_wait(&delegated_inode);
				661	if (!error)
				662	goto retry_deleg;
				663	}
				664	return error;
				665	}
				666
				667	int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
				668	int flag)
				669	{
				670	struct path path;
				671	int error = -EINVAL;
				672	int lookup_flags;
				673
				674	if ((flag & ~(AT_SYMLINK_NOFOLLOW \| AT_EMPTY_PATH)) != 0)
				675	goto out;
				676
				677	lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
				678	if (flag & AT_EMPTY_PATH)
				679	lookup_flags \|= LOOKUP_EMPTY;
				680	retry:
				681	error = user_path_at(dfd, filename, lookup_flags, &path);
				682	if (error)
				683	goto out;
				684	error = mnt_want_write(path.mnt);
				685	if (error)
				686	goto out_release;
				687	error = chown_common(&path, user, group);
				688	mnt_drop_write(path.mnt);
				689	out_release:
				690	path_put(&path);
				691	if (retry_estale(error, lookup_flags)) {
				692	lookup_flags \|= LOOKUP_REVAL;
				693	goto retry;
				694	}
				695	out:
				696	return error;
				697	}
				698
				699	SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
				700	gid_t, group, int, flag)
				701	{
				702	return do_fchownat(dfd, filename, user, group, flag);
				703	}
				704
				705	SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
				706	{
				707	return do_fchownat(AT_FDCWD, filename, user, group, 0);
				708	}
				709
				710	SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
				711	{
				712	return do_fchownat(AT_FDCWD, filename, user, group,
				713	AT_SYMLINK_NOFOLLOW);
				714	}
				715
				716	int vfs_fchown(struct file *file, uid_t user, gid_t group)
				717	{
				718	int error;
				719
				720	error = mnt_want_write_file(file);
				721	if (error)
				722	return error;
				723	audit_file(file);
				724	error = chown_common(&file->f_path, user, group);
				725	mnt_drop_write_file(file);
				726	return error;
				727	}
				728
				729	int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
				730	{
				731	struct fd f = fdget(fd);
				732	int error = -EBADF;
				733
				734	if (f.file) {
				735	error = vfs_fchown(f.file, user, group);
				736	fdput(f);
				737	}
				738	return error;
				739	}
				740
				741	SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
				742	{
				743	return ksys_fchown(fd, user, group);
				744	}
				745
				746	static int do_dentry_open(struct file *f,
				747	struct inode *inode,
				748	int (open)(struct inode , struct file *))
				749	{
				750	static const struct file_operations empty_fops = {};
				751	int error;
				752
				753	path_get(&f->f_path);
				754	f->f_inode = inode;
				755	f->f_mapping = inode->i_mapping;
				756	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
				757	f->f_sb_err = file_sample_sb_err(f);
				758
				759	if (unlikely(f->f_flags & O_PATH)) {
				760	f->f_mode = FMODE_PATH \| FMODE_OPENED;
				761	f->f_op = &empty_fops;
				762	return 0;
				763	}
				764
				765	/* Any file opened for execve()/uselib() has to be a regular file. */
				766	if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
				767	error = -EACCES;
				768	goto cleanup_file;
				769	}
				770
				771	if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
				772	error = get_write_access(inode);
				773	if (unlikely(error))
				774	goto cleanup_file;
				775	error = __mnt_want_write(f->f_path.mnt);
				776	if (unlikely(error)) {
				777	put_write_access(inode);
				778	goto cleanup_file;
				779	}
				780	f->f_mode \|= FMODE_WRITER;
				781	}
				782
				783	/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
				784	if (S_ISREG(inode->i_mode) \|\| S_ISDIR(inode->i_mode))
				785	f->f_mode \|= FMODE_ATOMIC_POS;
				786
				787	f->f_op = fops_get(inode->i_fop);
				788	if (WARN_ON(!f->f_op)) {
				789	error = -ENODEV;
				790	goto cleanup_all;
				791	}
				792
				793	error = security_file_open(f);
				794	if (error)
				795	goto cleanup_all;
				796
				797	error = break_lease(locks_inode(f), f->f_flags);
				798	if (error)
				799	goto cleanup_all;
				800
				801	/* normally all 3 are set; ->open() can clear them if needed */
				802	f->f_mode \|= FMODE_LSEEK \| FMODE_PREAD \| FMODE_PWRITE;
				803	if (!open)
				804	open = f->f_op->open;
				805	if (open) {
				806	error = open(inode, f);
				807	if (error)
				808	goto cleanup_all;
				809	}
				810	f->f_mode \|= FMODE_OPENED;
				811	if ((f->f_mode & (FMODE_READ \| FMODE_WRITE)) == FMODE_READ)
				812	i_readcount_inc(inode);
				813	if ((f->f_mode & FMODE_READ) &&
				814	likely(f->f_op->read \|\| f->f_op->read_iter))
				815	f->f_mode \|= FMODE_CAN_READ;
				816	if ((f->f_mode & FMODE_WRITE) &&
				817	likely(f->f_op->write \|\| f->f_op->write_iter))
				818	f->f_mode \|= FMODE_CAN_WRITE;
				819
				820	f->f_write_hint = WRITE_LIFE_NOT_SET;
				821	f->f_flags &= ~(O_CREAT \| O_EXCL \| O_NOCTTY \| O_TRUNC);
				822
				823	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
				824
				825	/* NB: we're sure to have correct a_ops only after f_op->open */
				826	if (f->f_flags & O_DIRECT) {
				827	if (!f->f_mapping->a_ops \|\| !f->f_mapping->a_ops->direct_IO)
				828	return -EINVAL;
				829	}
				830
				831	/*
				832	* XXX: Huge page cache doesn't support writing yet. Drop all page
				833	* cache for this file before processing writes.
				834	*/
				835	if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
				836	truncate_pagecache(inode, 0);
				837
				838	return 0;
				839
				840	cleanup_all:
				841	if (WARN_ON_ONCE(error > 0))
				842	error = -EINVAL;
				843	fops_put(f->f_op);
				844	if (f->f_mode & FMODE_WRITER) {
				845	put_write_access(inode);
				846	__mnt_drop_write(f->f_path.mnt);
				847	}
				848	cleanup_file:
				849	path_put(&f->f_path);
				850	f->f_path.mnt = NULL;
				851	f->f_path.dentry = NULL;
				852	f->f_inode = NULL;
				853	return error;
				854	}
				855
				856	/**
				857	* finish_open - finish opening a file
				858	* @file: file pointer
				859	* @dentry: pointer to dentry
				860	* @open: open callback
				861	* @opened: state of open
				862	*
				863	* This can be used to finish opening a file passed to i_op->atomic_open().
				864	*
				865	* If the open callback is set to NULL, then the standard f_op->open()
				866	* filesystem callback is substituted.
				867	*
				868	* NB: the dentry reference is _not_ consumed. If, for example, the dentry is
				869	* the return value of d_splice_alias(), then the caller needs to perform dput()
				870	* on it after finish_open().
				871	*
				872	* Returns zero on success or -errno if the open failed.
				873	*/
				874	int finish_open(struct file file, struct dentry dentry,
				875	int (open)(struct inode , struct file *))
				876	{
				877	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
				878
				879	file->f_path.dentry = dentry;
				880	return do_dentry_open(file, d_backing_inode(dentry), open);
				881	}
				882	EXPORT_SYMBOL(finish_open);
				883
				884	/**
				885	* finish_no_open - finish ->atomic_open() without opening the file
				886	*
				887	* @file: file pointer
				888	* @dentry: dentry or NULL (as returned from ->lookup())
				889	*
				890	* This can be used to set the result of a successful lookup in ->atomic_open().
				891	*
				892	* NB: unlike finish_open() this function does consume the dentry reference and
				893	* the caller need not dput() it.
				894	*
				895	* Returns "0" which must be the return value of ->atomic_open() after having
				896	* called this function.
				897	*/
				898	int finish_no_open(struct file file, struct dentry dentry)
				899	{
				900	file->f_path.dentry = dentry;
				901	return 0;
				902	}
				903	EXPORT_SYMBOL(finish_no_open);
				904
				905	char file_path(struct file filp, char *buf, int buflen)
				906	{
				907	return d_path(&filp->f_path, buf, buflen);
				908	}
				909	EXPORT_SYMBOL(file_path);
				910
				911	/**
				912	* vfs_open - open the file at the given path
				913	* @path: path to open
				914	* @file: newly allocated file with f_flag initialized
				915	* @cred: credentials to use
				916	*/
				917	int vfs_open(const struct path path, struct file file)
				918	{
				919	file->f_path = *path;
				920	return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
				921	}
				922
				923	struct file dentry_open(const struct path path, int flags,
				924	const struct cred *cred)
				925	{
				926	int error;
				927	struct file *f;
				928
				929	validate_creds(cred);
				930
				931	/* We must always pass in a valid mount pointer. */
				932	BUG_ON(!path->mnt);
				933
				934	f = alloc_empty_file(flags, cred);
				935	if (!IS_ERR(f)) {
				936	error = vfs_open(path, f);
				937	if (error) {
				938	fput(f);
				939	f = ERR_PTR(error);
				940	}
				941	}
				942	return f;
				943	}
				944	EXPORT_SYMBOL(dentry_open);
				945
				946	struct file open_with_fake_path(const struct path path, int flags,
				947	struct inode inode, const struct cred cred)
				948	{
				949	struct file *f = alloc_empty_file_noaccount(flags, cred);
				950	if (!IS_ERR(f)) {
				951	int error;
				952
				953	f->f_path = *path;
				954	error = do_dentry_open(f, inode, NULL);
				955	if (error) {
				956	fput(f);
				957	f = ERR_PTR(error);
				958	}
				959	}
				960	return f;
				961	}
				962	EXPORT_SYMBOL(open_with_fake_path);
				963
				964	static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
				965	{
				966	int lookup_flags = 0;
				967	int acc_mode = ACC_MODE(flags);
				968
				969	/*
				970	* Clear out all open flags we don't know about so that we don't report
				971	* them in fcntl(F_GETFD) or similar interfaces.
				972	*/
				973	flags &= VALID_OPEN_FLAGS;
				974
				975	if (flags & (O_CREAT \| __O_TMPFILE))
				976	op->mode = (mode & S_IALLUGO) \| S_IFREG;
				977	else
				978	op->mode = 0;
				979
				980	/* Must never be set by userspace */
				981	flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
				982
				983	/*
				984	* O_SYNC is implemented as __O_SYNC\|O_DSYNC. As many places only
				985	* check for O_DSYNC if the need any syncing at all we enforce it's
				986	* always set instead of having to deal with possibly weird behaviour
				987	* for malicious applications setting only __O_SYNC.
				988	*/
				989	if (flags & __O_SYNC)
				990	flags \|= O_DSYNC;
				991
				992	if (flags & __O_TMPFILE) {
				993	if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
				994	return -EINVAL;
				995	if (!(acc_mode & MAY_WRITE))
				996	return -EINVAL;
				997	} else if (flags & O_PATH) {
				998	/*
				999	* If we have O_PATH in the open flag. Then we
				1000	* cannot have anything other than the below set of flags
				1001	*/
				1002	flags &= O_DIRECTORY \| O_NOFOLLOW \| O_PATH;
				1003	acc_mode = 0;
				1004	}
				1005
				1006	op->open_flag = flags;
				1007
				1008	/* O_TRUNC implies we need access checks for write permissions */
				1009	if (flags & O_TRUNC)
				1010	acc_mode \|= MAY_WRITE;
				1011
				1012	/* Allow the LSM permission hook to distinguish append
				1013	access from general write access. */
				1014	if (flags & O_APPEND)
				1015	acc_mode \|= MAY_APPEND;
				1016
				1017	op->acc_mode = acc_mode;
				1018
				1019	op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
				1020
				1021	if (flags & O_CREAT) {
				1022	op->intent \|= LOOKUP_CREATE;
				1023	if (flags & O_EXCL)
				1024	op->intent \|= LOOKUP_EXCL;
				1025	}
				1026
				1027	if (flags & O_DIRECTORY)
				1028	lookup_flags \|= LOOKUP_DIRECTORY;
				1029	if (!(flags & O_NOFOLLOW))
				1030	lookup_flags \|= LOOKUP_FOLLOW;
				1031	op->lookup_flags = lookup_flags;
				1032	return 0;
				1033	}
				1034
				1035	/**
				1036	* file_open_name - open file and return file pointer
				1037	*
				1038	* @name: struct filename containing path to open
				1039	* @flags: open flags as per the open(2) second argument
				1040	* @mode: mode for the new file if O_CREAT is set, else ignored
				1041	*
				1042	* This is the helper to open a file from kernelspace if you really
				1043	* have to. But in generally you should not do this, so please move
				1044	* along, nothing to see here..
				1045	*/
				1046	struct file file_open_name(struct filename name, int flags, umode_t mode)
				1047	{
				1048	struct open_flags op;
				1049	int err = build_open_flags(flags, mode, &op);
				1050	return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op);
				1051	}
				1052
				1053	/**
				1054	* filp_open - open file and return file pointer
				1055	*
				1056	* @filename: path to open
				1057	* @flags: open flags as per the open(2) second argument
				1058	* @mode: mode for the new file if O_CREAT is set, else ignored
				1059	*
				1060	* This is the helper to open a file from kernelspace if you really
				1061	* have to. But in generally you should not do this, so please move
				1062	* along, nothing to see here..
				1063	*/
				1064	struct file filp_open(const char filename, int flags, umode_t mode)
				1065	{
				1066	struct filename *name = getname_kernel(filename);
				1067	struct file *file = ERR_CAST(name);
				1068
				1069	if (!IS_ERR(name)) {
				1070	file = file_open_name(name, flags, mode);
				1071	putname(name);
				1072	}
				1073	return file;
				1074	}
				1075	EXPORT_SYMBOL(filp_open);
				1076
				1077	/* ANDROID: Allow drivers to open only block files from kernel mode */
				1078	struct file filp_open_block(const char filename, int flags, umode_t mode)
				1079	{
				1080	struct file *file;
				1081
				1082	file = filp_open(filename, flags, mode);
				1083	if (IS_ERR(file))
				1084	goto err_out;
				1085
				1086	/* Drivers should only be allowed to open block devices */
				1087	if (!S_ISBLK(file->f_mapping->host->i_mode)) {
				1088	filp_close(file, NULL);
				1089	file = ERR_PTR(-ENOTBLK);
				1090	}
				1091
				1092	err_out:
				1093	return file;
				1094	}
				1095	EXPORT_SYMBOL_GPL(filp_open_block);
				1096
				1097	struct file file_open_root(struct dentry dentry, struct vfsmount *mnt,
				1098	const char *filename, int flags, umode_t mode)
				1099	{
				1100	struct open_flags op;
				1101	int err = build_open_flags(flags, mode, &op);
				1102	if (err)
				1103	return ERR_PTR(err);
				1104	return do_file_open_root(dentry, mnt, filename, &op);
				1105	}
				1106	EXPORT_SYMBOL(file_open_root);
				1107
				1108	long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
				1109	{
				1110	struct open_flags op;
				1111	int fd = build_open_flags(flags, mode, &op);
				1112	struct filename *tmp;
				1113
				1114	if (fd)
				1115	return fd;
				1116
				1117	tmp = getname(filename);
				1118	if (IS_ERR(tmp))
				1119	return PTR_ERR(tmp);
				1120
				1121	fd = get_unused_fd_flags(flags);
				1122	if (fd >= 0) {
				1123	struct file *f = do_filp_open(dfd, tmp, &op);
				1124	if (IS_ERR(f)) {
				1125	put_unused_fd(fd);
				1126	fd = PTR_ERR(f);
				1127	} else {
				1128	fsnotify_open(f);
				1129	fd_install(fd, f);
				1130	}
				1131	}
				1132	putname(tmp);
				1133	return fd;
				1134	}
				1135
				1136	SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
				1137	{
				1138	if (force_o_largefile())
				1139	flags \|= O_LARGEFILE;
				1140
				1141	return do_sys_open(AT_FDCWD, filename, flags, mode);
				1142	}
				1143
				1144	SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
				1145	umode_t, mode)
				1146	{
				1147	if (force_o_largefile())
				1148	flags \|= O_LARGEFILE;
				1149
				1150	return do_sys_open(dfd, filename, flags, mode);
				1151	}
				1152
				1153	#ifdef CONFIG_COMPAT
				1154	/*
				1155	* Exactly like sys_open(), except that it doesn't set the
				1156	* O_LARGEFILE flag.
				1157	*/
				1158	COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
				1159	{
				1160	return do_sys_open(AT_FDCWD, filename, flags, mode);
				1161	}
				1162
				1163	/*
				1164	* Exactly like sys_openat(), except that it doesn't set the
				1165	* O_LARGEFILE flag.
				1166	*/
				1167	COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
				1168	{
				1169	return do_sys_open(dfd, filename, flags, mode);
				1170	}
				1171	#endif
				1172
				1173	#ifndef __alpha__
				1174
				1175	/*
				1176	* For backward compatibility? Maybe this should be moved
				1177	* into arch/i386 instead?
				1178	*/
				1179	SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
				1180	{
				1181	return ksys_open(pathname, O_CREAT \| O_WRONLY \| O_TRUNC, mode);
				1182	}
				1183
				1184	#endif
				1185
				1186	/*
				1187	* "id" is the POSIX thread ID. We use the
				1188	* files pointer for this..
				1189	*/
				1190	int filp_close(struct file *filp, fl_owner_t id)
				1191	{
				1192	int retval = 0;
				1193
				1194	if (!file_count(filp)) {
				1195	printk(KERN_ERR "VFS: Close: file count is 0\n");
				1196	return 0;
				1197	}
				1198
				1199	if (filp->f_op->flush)
				1200	retval = filp->f_op->flush(filp, id);
				1201
				1202	if (likely(!(filp->f_mode & FMODE_PATH))) {
				1203	dnotify_flush(filp, id);
				1204	locks_remove_posix(filp, id);
				1205	}
				1206	fput(filp);
				1207	return retval;
				1208	}
				1209
				1210	EXPORT_SYMBOL(filp_close);
				1211
				1212	/*
				1213	* Careful here! We test whether the file pointer is NULL before
				1214	* releasing the fd. This ensures that one clone task can't release
				1215	* an fd while another clone is opening it.
				1216	*/
				1217	SYSCALL_DEFINE1(close, unsigned int, fd)
				1218	{
				1219	int retval = __close_fd(current->files, fd);
				1220
				1221	/* can't restart close syscall because file table entry was cleared */
				1222	if (unlikely(retval == -ERESTARTSYS \|\|
				1223	retval == -ERESTARTNOINTR \|\|
				1224	retval == -ERESTARTNOHAND \|\|
				1225	retval == -ERESTART_RESTARTBLOCK))
				1226	retval = -EINTR;
				1227
				1228	return retval;
				1229	}
				1230
				1231	/*
				1232	* This routine simulates a hangup on the tty, to arrange that users
				1233	* are given clean terminals at login time.
				1234	*/
				1235	SYSCALL_DEFINE0(vhangup)
				1236	{
				1237	if (capable(CAP_SYS_TTY_CONFIG)) {
				1238	tty_vhangup_self();
				1239	return 0;
				1240	}
				1241	return -EPERM;
				1242	}
				1243
				1244	/*
				1245	* Called when an inode is about to be open.
				1246	* We use this to disallow opening large files on 32bit systems if
				1247	* the caller didn't specify O_LARGEFILE. On 64bit systems we force
				1248	* on this flag in sys_open.
				1249	*/
				1250	int generic_file_open(struct inode * inode, struct file * filp)
				1251	{
				1252	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
				1253	return -EOVERFLOW;
				1254	return 0;
				1255	}
				1256
				1257	EXPORT_SYMBOL(generic_file_open);
				1258
				1259	/*
				1260	* This is used by subsystems that don't want seekable
				1261	* file descriptors. The function is not supposed to ever fail, the only
				1262	* reason it returns an 'int' and not 'void' is so that it can be plugged
				1263	* directly into file_operations structure.
				1264	*/
				1265	int nonseekable_open(struct inode inode, struct file filp)
				1266	{
				1267	filp->f_mode &= ~(FMODE_LSEEK \| FMODE_PREAD \| FMODE_PWRITE);
				1268	return 0;
				1269	}
				1270
				1271	EXPORT_SYMBOL(nonseekable_open);
				1272
				1273	/*
				1274	* stream_open is used by subsystems that want stream-like file descriptors.
				1275	* Such file descriptors are not seekable and don't have notion of position
				1276	* (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
				1277	* Contrary to file descriptors of other regular files, .read() and .write()
				1278	* can run simultaneously.
				1279	*
				1280	* stream_open never fails and is marked to return int so that it could be
				1281	* directly used as file_operations.open .
				1282	*/
				1283	int stream_open(struct inode inode, struct file filp)
				1284	{
				1285	filp->f_mode &= ~(FMODE_LSEEK \| FMODE_PREAD \| FMODE_PWRITE \| FMODE_ATOMIC_POS);
				1286	filp->f_mode \|= FMODE_STREAM;
				1287	return 0;
				1288	}
				1289
				1290	EXPORT_SYMBOL(stream_open);