Blame - ap/os/linux/linux-3.4.x/security/commoncap.c - T106_DC

blob: 0051ac2d0583773560e0186f56d3a9a8a5c7edd9 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* Common capabilities, needed by capability.o.
				2	*
				3	* This program is free software; you can redistribute it and/or modify
				4	* it under the terms of the GNU General Public License as published by
				5	* the Free Software Foundation; either version 2 of the License, or
				6	* (at your option) any later version.
				7	*
				8	*/
				9
				10	#include <linux/capability.h>
				11	#include <linux/audit.h>
				12	#include <linux/module.h>
				13	#include <linux/init.h>
				14	#include <linux/kernel.h>
				15	#include <linux/security.h>
				16	#include <linux/file.h>
				17	#include <linux/mm.h>
				18	#include <linux/mman.h>
				19	#include <linux/pagemap.h>
				20	#include <linux/swap.h>
				21	#include <linux/skbuff.h>
				22	#include <linux/netlink.h>
				23	#include <linux/ptrace.h>
				24	#include <linux/xattr.h>
				25	#include <linux/hugetlb.h>
				26	#include <linux/mount.h>
				27	#include <linux/sched.h>
				28	#include <linux/prctl.h>
				29	#include <linux/securebits.h>
				30	#include <linux/user_namespace.h>
				31	#include <linux/binfmts.h>
				32	#include <linux/personality.h>
				33
				34	#ifdef CONFIG_ANDROID_PARANOID_NETWORK
				35	#include <linux/android_aid.h>
				36	#endif
				37
				38	/*
				39	* If a non-root user executes a setuid-root binary in
				40	* !secure(SECURE_NOROOT) mode, then we raise capabilities.
				41	* However if fE is also set, then the intent is for only
				42	* the file capabilities to be applied, and the setuid-root
				43	* bit is left on either to change the uid (plausible) or
				44	* to get full privilege on a kernel without file capabilities
				45	* support. So in that case we do not raise capabilities.
				46	*
				47	* Warn if that happens, once per boot.
				48	*/
				49	static void warn_setuid_and_fcaps_mixed(const char *fname)
				50	{
				51	static int warned;
				52	if (!warned) {
				53	printk(KERN_INFO "warning: `%s' has both setuid-root and"
				54	" effective capabilities. Therefore not raising all"
				55	" capabilities.\n", fname);
				56	warned = 1;
				57	}
				58	}
				59
				60	int cap_netlink_send(struct sock sk, struct sk_buff skb)
				61	{
				62	return 0;
				63	}
				64
				65	/**
				66	* cap_capable - Determine whether a task has a particular effective capability
				67	* @cred: The credentials to use
				68	* @ns: The user namespace in which we need the capability
				69	* @cap: The capability to check for
				70	* @audit: Whether to write an audit message or not
				71	*
				72	* Determine whether the nominated task has the specified capability amongst
				73	* its effective set, returning 0 if it does, -ve if it does not.
				74	*
				75	* NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
				76	* and has_capability() functions. That is, it has the reverse semantics:
				77	* cap_has_capability() returns 0 when a task has a capability, but the
				78	* kernel's capable() and has_capability() returns 1 for this case.
				79	*/
				80	int cap_capable(const struct cred cred, struct user_namespace targ_ns,
				81	int cap, int audit)
				82	{
				83	#ifdef CONFIG_ANDROID_PARANOID_NETWORK
				84	if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW))
				85	return 0;
				86	if (cap == CAP_NET_ADMIN && in_egroup_p(AID_NET_ADMIN))
				87	return 0;
				88	#endif
				89
				90	for (;;) {
				91	/* The creator of the user namespace has all caps. */
				92	if (targ_ns != &init_user_ns && targ_ns->creator == cred->user)
				93	return 0;
				94
				95	/* Do we have the necessary capabilities? */
				96	if (targ_ns == cred->user->user_ns)
				97	return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
				98
				99	/* Have we tried all of the parent namespaces? */
				100	if (targ_ns == &init_user_ns)
				101	return -EPERM;
				102
				103	/*
				104	*If you have a capability in a parent user ns, then you have
				105	* it over all children user namespaces as well.
				106	*/
				107	targ_ns = targ_ns->creator->user_ns;
				108	}
				109
				110	/* We never get here */
				111	}
				112
				113	/**
				114	* cap_settime - Determine whether the current process may set the system clock
				115	* @ts: The time to set
				116	* @tz: The timezone to set
				117	*
				118	* Determine whether the current process may set the system clock and timezone
				119	* information, returning 0 if permission granted, -ve if denied.
				120	*/
				121	int cap_settime(const struct timespec ts, const struct timezone tz)
				122	{
				123	if (!capable(CAP_SYS_TIME))
				124	return -EPERM;
				125	return 0;
				126	}
				127
				128	/**
				129	* cap_ptrace_access_check - Determine whether the current process may access
				130	* another
				131	* @child: The process to be accessed
				132	* @mode: The mode of attachment.
				133	*
				134	* If we are in the same or an ancestor user_ns and have all the target
				135	* task's capabilities, then ptrace access is allowed.
				136	* If we have the ptrace capability to the target user_ns, then ptrace
				137	* access is allowed.
				138	* Else denied.
				139	*
				140	* Determine whether a process may access another, returning 0 if permission
				141	* granted, -ve if denied.
				142	*/
				143	int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
				144	{
				145	int ret = 0;
				146	const struct cred cred, child_cred;
				147
				148	rcu_read_lock();
				149	cred = current_cred();
				150	child_cred = __task_cred(child);
				151	if (cred->user->user_ns == child_cred->user->user_ns &&
				152	cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
				153	goto out;
				154	if (ns_capable(child_cred->user->user_ns, CAP_SYS_PTRACE))
				155	goto out;
				156	ret = -EPERM;
				157	out:
				158	rcu_read_unlock();
				159	return ret;
				160	}
				161
				162	/**
				163	* cap_ptrace_traceme - Determine whether another process may trace the current
				164	* @parent: The task proposed to be the tracer
				165	*
				166	* If parent is in the same or an ancestor user_ns and has all current's
				167	* capabilities, then ptrace access is allowed.
				168	* If parent has the ptrace capability to current's user_ns, then ptrace
				169	* access is allowed.
				170	* Else denied.
				171	*
				172	* Determine whether the nominated task is permitted to trace the current
				173	* process, returning 0 if permission is granted, -ve if denied.
				174	*/
				175	int cap_ptrace_traceme(struct task_struct *parent)
				176	{
				177	int ret = 0;
				178	const struct cred cred, child_cred;
				179
				180	rcu_read_lock();
				181	cred = __task_cred(parent);
				182	child_cred = current_cred();
				183	if (cred->user->user_ns == child_cred->user->user_ns &&
				184	cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
				185	goto out;
				186	if (has_ns_capability(parent, child_cred->user->user_ns, CAP_SYS_PTRACE))
				187	goto out;
				188	ret = -EPERM;
				189	out:
				190	rcu_read_unlock();
				191	return ret;
				192	}
				193
				194	/**
				195	* cap_capget - Retrieve a task's capability sets
				196	* @target: The task from which to retrieve the capability sets
				197	* @effective: The place to record the effective set
				198	* @inheritable: The place to record the inheritable set
				199	* @permitted: The place to record the permitted set
				200	*
				201	* This function retrieves the capabilities of the nominated task and returns
				202	* them to the caller.
				203	*/
				204	int cap_capget(struct task_struct target, kernel_cap_t effective,
				205	kernel_cap_t inheritable, kernel_cap_t permitted)
				206	{
				207	const struct cred *cred;
				208
				209	/* Derived from kernel/capability.c:sys_capget. */
				210	rcu_read_lock();
				211	cred = __task_cred(target);
				212	*effective = cred->cap_effective;
				213	*inheritable = cred->cap_inheritable;
				214	*permitted = cred->cap_permitted;
				215	rcu_read_unlock();
				216	return 0;
				217	}
				218
				219	/*
				220	* Determine whether the inheritable capabilities are limited to the old
				221	* permitted set. Returns 1 if they are limited, 0 if they are not.
				222	*/
				223	static inline int cap_inh_is_capped(void)
				224	{
				225
				226	/* they are so limited unless the current task has the CAP_SETPCAP
				227	* capability
				228	*/
				229	if (cap_capable(current_cred(), current_cred()->user->user_ns,
				230	CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
				231	return 0;
				232	return 1;
				233	}
				234
				235	/**
				236	* cap_capset - Validate and apply proposed changes to current's capabilities
				237	* @new: The proposed new credentials; alterations should be made here
				238	* @old: The current task's current credentials
				239	* @effective: A pointer to the proposed new effective capabilities set
				240	* @inheritable: A pointer to the proposed new inheritable capabilities set
				241	* @permitted: A pointer to the proposed new permitted capabilities set
				242	*
				243	* This function validates and applies a proposed mass change to the current
				244	* process's capability sets. The changes are made to the proposed new
				245	* credentials, and assuming no error, will be committed by the caller of LSM.
				246	*/
				247	int cap_capset(struct cred *new,
				248	const struct cred *old,
				249	const kernel_cap_t *effective,
				250	const kernel_cap_t *inheritable,
				251	const kernel_cap_t *permitted)
				252	{
				253	if (cap_inh_is_capped() &&
				254	!cap_issubset(*inheritable,
				255	cap_combine(old->cap_inheritable,
				256	old->cap_permitted)))
				257	/* incapable of using this inheritable set */
				258	return -EPERM;
				259
				260	if (!cap_issubset(*inheritable,
				261	cap_combine(old->cap_inheritable,
				262	old->cap_bset)))
				263	/* no new pI capabilities outside bounding set */
				264	return -EPERM;
				265
				266	/* verify restrictions on target's new Permitted set */
				267	if (!cap_issubset(*permitted, old->cap_permitted))
				268	return -EPERM;
				269
				270	/* verify the _new_Effective_ is a subset of the _new_Permitted_ */
				271	if (!cap_issubset(effective, permitted))
				272	return -EPERM;
				273
				274	new->cap_effective = *effective;
				275	new->cap_inheritable = *inheritable;
				276	new->cap_permitted = *permitted;
				277	return 0;
				278	}
				279
				280	/*
				281	* Clear proposed capability sets for execve().
				282	*/
				283	static inline void bprm_clear_caps(struct linux_binprm *bprm)
				284	{
				285	cap_clear(bprm->cred->cap_permitted);
				286	bprm->cap_effective = false;
				287	}
				288
				289	/**
				290	* cap_inode_need_killpriv - Determine if inode change affects privileges
				291	* @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
				292	*
				293	* Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
				294	* affects the security markings on that inode, and if it is, should
				295	* inode_killpriv() be invoked or the change rejected?
				296	*
				297	* Returns 0 if granted; +ve if granted, but inode_killpriv() is required; and
				298	* -ve to deny the change.
				299	*/
				300	int cap_inode_need_killpriv(struct dentry *dentry)
				301	{
				302	struct inode *inode = dentry->d_inode;
				303	int error;
				304
				305	if (!inode->i_op->getxattr)
				306	return 0;
				307
				308	error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0);
				309	if (error <= 0)
				310	return 0;
				311	return 1;
				312	}
				313
				314	/**
				315	* cap_inode_killpriv - Erase the security markings on an inode
				316	* @dentry: The inode/dentry to alter
				317	*
				318	* Erase the privilege-enhancing security markings on an inode.
				319	*
				320	* Returns 0 if successful, -ve on error.
				321	*/
				322	int cap_inode_killpriv(struct dentry *dentry)
				323	{
				324	struct inode *inode = dentry->d_inode;
				325
				326	if (!inode->i_op->removexattr)
				327	return 0;
				328
				329	return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS);
				330	}
				331
				332	/*
				333	* Calculate the new process capability sets from the capability sets attached
				334	* to a file.
				335	*/
				336	static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
				337	struct linux_binprm *bprm,
				338	bool *effective,
				339	bool *has_cap)
				340	{
				341	struct cred *new = bprm->cred;
				342	unsigned i;
				343	int ret = 0;
				344
				345	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
				346	*effective = true;
				347
				348	if (caps->magic_etc & VFS_CAP_REVISION_MASK)
				349	*has_cap = true;
				350
				351	CAP_FOR_EACH_U32(i) {
				352	__u32 permitted = caps->permitted.cap[i];
				353	__u32 inheritable = caps->inheritable.cap[i];
				354
				355	/*
				356	* pP' = (X & fP) \| (pI & fI)
				357	*/
				358	new->cap_permitted.cap[i] =
				359	(new->cap_bset.cap[i] & permitted) \|
				360	(new->cap_inheritable.cap[i] & inheritable);
				361
				362	if (permitted & ~new->cap_permitted.cap[i])
				363	/* insufficient to execute correctly */
				364	ret = -EPERM;
				365	}
				366
				367	/*
				368	* For legacy apps, with no internal support for recognizing they
				369	* do not have enough capabilities, we return an error if they are
				370	* missing some "forced" (aka file-permitted) capabilities.
				371	*/
				372	return *effective ? ret : 0;
				373	}
				374
				375	/*
				376	* Extract the on-exec-apply capability sets for an executable file.
				377	*/
				378	int get_vfs_caps_from_disk(const struct dentry dentry, struct cpu_vfs_cap_data cpu_caps)
				379	{
				380	struct inode *inode = dentry->d_inode;
				381	__u32 magic_etc;
				382	unsigned tocopy, i;
				383	int size;
				384	struct vfs_cap_data caps;
				385
				386	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
				387
				388	if (!inode \|\| !inode->i_op->getxattr)
				389	return -ENODATA;
				390
				391	size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps,
				392	XATTR_CAPS_SZ);
				393	if (size == -ENODATA \|\| size == -EOPNOTSUPP)
				394	/* no data, that's ok */
				395	return -ENODATA;
				396	if (size < 0)
				397	return size;
				398
				399	if (size < sizeof(magic_etc))
				400	return -EINVAL;
				401
				402	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
				403
				404	switch (magic_etc & VFS_CAP_REVISION_MASK) {
				405	case VFS_CAP_REVISION_1:
				406	if (size != XATTR_CAPS_SZ_1)
				407	return -EINVAL;
				408	tocopy = VFS_CAP_U32_1;
				409	break;
				410	case VFS_CAP_REVISION_2:
				411	if (size != XATTR_CAPS_SZ_2)
				412	return -EINVAL;
				413	tocopy = VFS_CAP_U32_2;
				414	break;
				415	default:
				416	return -EINVAL;
				417	}
				418
				419	CAP_FOR_EACH_U32(i) {
				420	if (i >= tocopy)
				421	break;
				422	cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
				423	cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
				424	}
				425
				426	return 0;
				427	}
				428
				429	/*
				430	* Attempt to get the on-exec apply capability sets for an executable file from
				431	* its xattrs and, if present, apply them to the proposed credentials being
				432	* constructed by execve().
				433	*/
				434	static int get_file_caps(struct linux_binprm bprm, bool effective, bool *has_cap)
				435	{
				436	struct dentry *dentry;
				437	int rc = 0;
				438	struct cpu_vfs_cap_data vcaps;
				439
				440	bprm_clear_caps(bprm);
				441
				442	if (!file_caps_enabled)
				443	return 0;
				444
				445	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
				446	return 0;
				447
				448	dentry = dget(bprm->file->f_dentry);
				449
				450	rc = get_vfs_caps_from_disk(dentry, &vcaps);
				451	if (rc < 0) {
				452	if (rc == -EINVAL)
				453	printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n",
				454	__func__, rc, bprm->filename);
				455	else if (rc == -ENODATA)
				456	rc = 0;
				457	goto out;
				458	}
				459
				460	rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_cap);
				461	if (rc == -EINVAL)
				462	printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
				463	__func__, rc, bprm->filename);
				464
				465	out:
				466	dput(dentry);
				467	if (rc)
				468	bprm_clear_caps(bprm);
				469
				470	return rc;
				471	}
				472
				473	/**
				474	* cap_bprm_set_creds - Set up the proposed credentials for execve().
				475	* @bprm: The execution parameters, including the proposed creds
				476	*
				477	* Set up the proposed credentials for a new execution context being
				478	* constructed by execve(). The proposed creds in @bprm->cred is altered,
				479	* which won't take effect immediately. Returns 0 if successful, -ve on error.
				480	*/
				481	int cap_bprm_set_creds(struct linux_binprm *bprm)
				482	{
				483	const struct cred *old = current_cred();
				484	struct cred *new = bprm->cred;
				485	bool effective, has_cap = false;
				486	int ret;
				487
				488	effective = false;
				489	ret = get_file_caps(bprm, &effective, &has_cap);
				490	if (ret < 0)
				491	return ret;
				492
				493	if (!issecure(SECURE_NOROOT)) {
				494	/*
				495	* If the legacy file capability is set, then don't set privs
				496	* for a setuid root binary run by a non-root user. Do set it
				497	* for a root user just to cause least surprise to an admin.
				498	*/
				499	if (has_cap && new->uid != 0 && new->euid == 0) {
				500	warn_setuid_and_fcaps_mixed(bprm->filename);
				501	goto skip;
				502	}
				503	/*
				504	* To support inheritance of root-permissions and suid-root
				505	* executables under compatibility mode, we override the
				506	* capability sets for the file.
				507	*
				508	* If only the real uid is 0, we do not set the effective bit.
				509	*/
				510	if (new->euid == 0 \|\| new->uid == 0) {
				511	/* pP' = (cap_bset & ~0) \| (pI & ~0) */
				512	new->cap_permitted = cap_combine(old->cap_bset,
				513	old->cap_inheritable);
				514	}
				515	if (new->euid == 0)
				516	effective = true;
				517	}
				518	skip:
				519
				520	/* if we have fs caps, clear dangerous personality flags */
				521	if (!cap_issubset(new->cap_permitted, old->cap_permitted))
				522	bprm->per_clear \|= PER_CLEAR_ON_SETID;
				523
				524
				525	/* Don't let someone trace a set[ug]id/setpcap binary with the revised
				526	* credentials unless they have the appropriate permit
				527	*/
				528	if ((new->euid != old->uid \|\|
				529	new->egid != old->gid \|\|
				530	!cap_issubset(new->cap_permitted, old->cap_permitted)) &&
				531	bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
				532	/* downgrade; they get no more than they had, and maybe less */
				533	if (!capable(CAP_SETUID)) {
				534	new->euid = new->uid;
				535	new->egid = new->gid;
				536	}
				537	new->cap_permitted = cap_intersect(new->cap_permitted,
				538	old->cap_permitted);
				539	}
				540
				541	new->suid = new->fsuid = new->euid;
				542	new->sgid = new->fsgid = new->egid;
				543
				544	if (effective)
				545	new->cap_effective = new->cap_permitted;
				546	else
				547	cap_clear(new->cap_effective);
				548	bprm->cap_effective = effective;
				549
				550	/*
				551	* Audit candidate if current->cap_effective is set
				552	*
				553	* We do not bother to audit if 3 things are true:
				554	* 1) cap_effective has all caps
				555	* 2) we are root
				556	* 3) root is supposed to have all caps (SECURE_NOROOT)
				557	* Since this is just a normal root execing a process.
				558	*
				559	* Number 1 above might fail if you don't have a full bset, but I think
				560	* that is interesting information to audit.
				561	*/
				562	if (!cap_isclear(new->cap_effective)) {
				563	if (!cap_issubset(CAP_FULL_SET, new->cap_effective) \|\|
				564	new->euid != 0 \|\| new->uid != 0 \|\|
				565	issecure(SECURE_NOROOT)) {
				566	ret = audit_log_bprm_fcaps(bprm, new, old);
				567	if (ret < 0)
				568	return ret;
				569	}
				570	}
				571
				572	new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
				573	return 0;
				574	}
				575
				576	/**
				577	* cap_bprm_secureexec - Determine whether a secure execution is required
				578	* @bprm: The execution parameters
				579	*
				580	* Determine whether a secure execution is required, return 1 if it is, and 0
				581	* if it is not.
				582	*
				583	* The credentials have been committed by this point, and so are no longer
				584	* available through @bprm->cred.
				585	*/
				586	int cap_bprm_secureexec(struct linux_binprm *bprm)
				587	{
				588	const struct cred *cred = current_cred();
				589
				590	if (cred->uid != 0) {
				591	if (bprm->cap_effective)
				592	return 1;
				593	if (!cap_isclear(cred->cap_permitted))
				594	return 1;
				595	}
				596
				597	return (cred->euid != cred->uid \|\|
				598	cred->egid != cred->gid);
				599	}
				600
				601	/**
				602	* cap_inode_setxattr - Determine whether an xattr may be altered
				603	* @dentry: The inode/dentry being altered
				604	* @name: The name of the xattr to be changed
				605	* @value: The value that the xattr will be changed to
				606	* @size: The size of value
				607	* @flags: The replacement flag
				608	*
				609	* Determine whether an xattr may be altered or set on an inode, returning 0 if
				610	* permission is granted, -ve if denied.
				611	*
				612	* This is used to make sure security xattrs don't get updated or set by those
				613	* who aren't privileged to do so.
				614	*/
				615	int cap_inode_setxattr(struct dentry dentry, const char name,
				616	const void *value, size_t size, int flags)
				617	{
				618	if (!strcmp(name, XATTR_NAME_CAPS)) {
				619	if (!capable(CAP_SETFCAP))
				620	return -EPERM;
				621	return 0;
				622	}
				623
				624	if (!strncmp(name, XATTR_SECURITY_PREFIX,
				625	sizeof(XATTR_SECURITY_PREFIX) - 1) &&
				626	!capable(CAP_SYS_ADMIN))
				627	return -EPERM;
				628	return 0;
				629	}
				630
				631	/**
				632	* cap_inode_removexattr - Determine whether an xattr may be removed
				633	* @dentry: The inode/dentry being altered
				634	* @name: The name of the xattr to be changed
				635	*
				636	* Determine whether an xattr may be removed from an inode, returning 0 if
				637	* permission is granted, -ve if denied.
				638	*
				639	* This is used to make sure security xattrs don't get removed by those who
				640	* aren't privileged to remove them.
				641	*/
				642	int cap_inode_removexattr(struct dentry dentry, const char name)
				643	{
				644	if (!strcmp(name, XATTR_NAME_CAPS)) {
				645	if (!capable(CAP_SETFCAP))
				646	return -EPERM;
				647	return 0;
				648	}
				649
				650	if (!strncmp(name, XATTR_SECURITY_PREFIX,
				651	sizeof(XATTR_SECURITY_PREFIX) - 1) &&
				652	!capable(CAP_SYS_ADMIN))
				653	return -EPERM;
				654	return 0;
				655	}
				656
				657	/*
				658	* cap_emulate_setxuid() fixes the effective / permitted capabilities of
				659	* a process after a call to setuid, setreuid, or setresuid.
				660	*
				661	* 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
				662	* {r,e,s}uid != 0, the permitted and effective capabilities are
				663	* cleared.
				664	*
				665	* 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
				666	* capabilities of the process are cleared.
				667	*
				668	* 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
				669	* capabilities are set to the permitted capabilities.
				670	*
				671	* fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
				672	* never happen.
				673	*
				674	* -astor
				675	*
				676	* cevans - New behaviour, Oct '99
				677	* A process may, via prctl(), elect to keep its capabilities when it
				678	* calls setuid() and switches away from uid==0. Both permitted and
				679	* effective sets will be retained.
				680	* Without this change, it was impossible for a daemon to drop only some
				681	* of its privilege. The call to setuid(!=0) would drop all privileges!
				682	* Keeping uid 0 is not an option because uid 0 owns too many vital
				683	* files..
				684	* Thanks to Olaf Kirch and Peter Benie for spotting this.
				685	*/
				686	static inline void cap_emulate_setxuid(struct cred new, const struct cred old)
				687	{
				688	if ((old->uid == 0 \|\| old->euid == 0 \|\| old->suid == 0) &&
				689	(new->uid != 0 && new->euid != 0 && new->suid != 0) &&
				690	!issecure(SECURE_KEEP_CAPS)) {
				691	cap_clear(new->cap_permitted);
				692	cap_clear(new->cap_effective);
				693	}
				694	if (old->euid == 0 && new->euid != 0)
				695	cap_clear(new->cap_effective);
				696	if (old->euid != 0 && new->euid == 0)
				697	new->cap_effective = new->cap_permitted;
				698	}
				699
				700	/**
				701	* cap_task_fix_setuid - Fix up the results of setuid() call
				702	* @new: The proposed credentials
				703	* @old: The current task's current credentials
				704	* @flags: Indications of what has changed
				705	*
				706	* Fix up the results of setuid() call before the credential changes are
				707	* actually applied, returning 0 to grant the changes, -ve to deny them.
				708	*/
				709	int cap_task_fix_setuid(struct cred new, const struct cred old, int flags)
				710	{
				711	switch (flags) {
				712	case LSM_SETID_RE:
				713	case LSM_SETID_ID:
				714	case LSM_SETID_RES:
				715	/* juggle the capabilities to follow [RES]UID changes unless
				716	* otherwise suppressed */
				717	if (!issecure(SECURE_NO_SETUID_FIXUP))
				718	cap_emulate_setxuid(new, old);
				719	break;
				720
				721	case LSM_SETID_FS:
				722	/* juggle the capabilties to follow FSUID changes, unless
				723	* otherwise suppressed
				724	*
				725	* FIXME - is fsuser used for all CAP_FS_MASK capabilities?
				726	* if not, we might be a bit too harsh here.
				727	*/
				728	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
				729	if (old->fsuid == 0 && new->fsuid != 0)
				730	new->cap_effective =
				731	cap_drop_fs_set(new->cap_effective);
				732
				733	if (old->fsuid != 0 && new->fsuid == 0)
				734	new->cap_effective =
				735	cap_raise_fs_set(new->cap_effective,
				736	new->cap_permitted);
				737	}
				738	break;
				739
				740	default:
				741	return -EINVAL;
				742	}
				743
				744	return 0;
				745	}
				746
				747	/*
				748	* Rationale: code calling task_setscheduler, task_setioprio, and
				749	* task_setnice, assumes that
				750	* . if capable(cap_sys_nice), then those actions should be allowed
				751	* . if not capable(cap_sys_nice), but acting on your own processes,
				752	* then those actions should be allowed
				753	* This is insufficient now since you can call code without suid, but
				754	* yet with increased caps.
				755	* So we check for increased caps on the target process.
				756	*/
				757	static int cap_safe_nice(struct task_struct *p)
				758	{
				759	int is_subset;
				760
				761	rcu_read_lock();
				762	is_subset = cap_issubset(__task_cred(p)->cap_permitted,
				763	current_cred()->cap_permitted);
				764	rcu_read_unlock();
				765
				766	if (!is_subset && !capable(CAP_SYS_NICE))
				767	return -EPERM;
				768	return 0;
				769	}
				770
				771	/**
				772	* cap_task_setscheduler - Detemine if scheduler policy change is permitted
				773	* @p: The task to affect
				774	*
				775	* Detemine if the requested scheduler policy change is permitted for the
				776	* specified task, returning 0 if permission is granted, -ve if denied.
				777	*/
				778	int cap_task_setscheduler(struct task_struct *p)
				779	{
				780	return cap_safe_nice(p);
				781	}
				782
				783	/**
				784	* cap_task_ioprio - Detemine if I/O priority change is permitted
				785	* @p: The task to affect
				786	* @ioprio: The I/O priority to set
				787	*
				788	* Detemine if the requested I/O priority change is permitted for the specified
				789	* task, returning 0 if permission is granted, -ve if denied.
				790	*/
				791	int cap_task_setioprio(struct task_struct *p, int ioprio)
				792	{
				793	return cap_safe_nice(p);
				794	}
				795
				796	/**
				797	* cap_task_ioprio - Detemine if task priority change is permitted
				798	* @p: The task to affect
				799	* @nice: The nice value to set
				800	*
				801	* Detemine if the requested task priority change is permitted for the
				802	* specified task, returning 0 if permission is granted, -ve if denied.
				803	*/
				804	int cap_task_setnice(struct task_struct *p, int nice)
				805	{
				806	return cap_safe_nice(p);
				807	}
				808
				809	/*
				810	* Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from
				811	* the current task's bounding set. Returns 0 on success, -ve on error.
				812	*/
				813	static long cap_prctl_drop(struct cred *new, unsigned long cap)
				814	{
				815	if (!capable(CAP_SETPCAP))
				816	return -EPERM;
				817	if (!cap_valid(cap))
				818	return -EINVAL;
				819
				820	cap_lower(new->cap_bset, cap);
				821	return 0;
				822	}
				823
				824	/**
				825	* cap_task_prctl - Implement process control functions for this security module
				826	* @option: The process control function requested
				827	* @arg2, @arg3, @arg4, @arg5: The argument data for this function
				828	*
				829	* Allow process control functions (sys_prctl()) to alter capabilities; may
				830	* also deny access to other functions not otherwise implemented here.
				831	*
				832	* Returns 0 or +ve on success, -ENOSYS if this function is not implemented
				833	* here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM
				834	* modules will consider performing the function.
				835	*/
				836	int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
				837	unsigned long arg4, unsigned long arg5)
				838	{
				839	struct cred *new;
				840	long error = 0;
				841
				842	new = prepare_creds();
				843	if (!new)
				844	return -ENOMEM;
				845
				846	switch (option) {
				847	case PR_CAPBSET_READ:
				848	error = -EINVAL;
				849	if (!cap_valid(arg2))
				850	goto error;
				851	error = !!cap_raised(new->cap_bset, arg2);
				852	goto no_change;
				853
				854	case PR_CAPBSET_DROP:
				855	error = cap_prctl_drop(new, arg2);
				856	if (error < 0)
				857	goto error;
				858	goto changed;
				859
				860	/*
				861	* The next four prctl's remain to assist with transitioning a
				862	* system from legacy UID=0 based privilege (when filesystem
				863	* capabilities are not in use) to a system using filesystem
				864	* capabilities only - as the POSIX.1e draft intended.
				865	*
				866	* Note:
				867	*
				868	* PR_SET_SECUREBITS =
				869	* issecure_mask(SECURE_KEEP_CAPS_LOCKED)
				870	* \| issecure_mask(SECURE_NOROOT)
				871	* \| issecure_mask(SECURE_NOROOT_LOCKED)
				872	* \| issecure_mask(SECURE_NO_SETUID_FIXUP)
				873	* \| issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
				874	*
				875	* will ensure that the current process and all of its
				876	* children will be locked into a pure
				877	* capability-based-privilege environment.
				878	*/
				879	case PR_SET_SECUREBITS:
				880	error = -EPERM;
				881	if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
				882	& (new->securebits ^ arg2)) /[1]/
				883	\|\| ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /[2]/
				884	\|\| (arg2 & ~(SECURE_ALL_LOCKS \| SECURE_ALL_BITS)) /[3]/
				885	\|\| (cap_capable(current_cred(),
				886	current_cred()->user->user_ns, CAP_SETPCAP,
				887	SECURITY_CAP_AUDIT) != 0) /[4]/
				888	/*
				889	* [1] no changing of bits that are locked
				890	* [2] no unlocking of locks
				891	* [3] no setting of unsupported bits
				892	* [4] doing anything requires privilege (go read about
				893	* the "sendmail capabilities bug")
				894	*/
				895	)
				896	/* cannot change a locked bit */
				897	goto error;
				898	new->securebits = arg2;
				899	goto changed;
				900
				901	case PR_GET_SECUREBITS:
				902	error = new->securebits;
				903	goto no_change;
				904
				905	case PR_GET_KEEPCAPS:
				906	if (issecure(SECURE_KEEP_CAPS))
				907	error = 1;
				908	goto no_change;
				909
				910	case PR_SET_KEEPCAPS:
				911	error = -EINVAL;
				912	if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
				913	goto error;
				914	error = -EPERM;
				915	if (issecure(SECURE_KEEP_CAPS_LOCKED))
				916	goto error;
				917	if (arg2)
				918	new->securebits \|= issecure_mask(SECURE_KEEP_CAPS);
				919	else
				920	new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
				921	goto changed;
				922
				923	default:
				924	/* No functionality available - continue with default */
				925	error = -ENOSYS;
				926	goto error;
				927	}
				928
				929	/* Functionality provided */
				930	changed:
				931	return commit_creds(new);
				932
				933	no_change:
				934	error:
				935	abort_creds(new);
				936	return error;
				937	}
				938
				939	/**
				940	* cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
				941	* @mm: The VM space in which the new mapping is to be made
				942	* @pages: The size of the mapping
				943	*
				944	* Determine whether the allocation of a new virtual mapping by the current
				945	* task is permitted, returning 0 if permission is granted, -ve if not.
				946	*/
				947	int cap_vm_enough_memory(struct mm_struct *mm, long pages)
				948	{
				949	int cap_sys_admin = 0;
				950
				951	if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
				952	SECURITY_CAP_NOAUDIT) == 0)
				953	cap_sys_admin = 1;
				954	return __vm_enough_memory(mm, pages, cap_sys_admin);
				955	}
				956
				957	/*
				958	* cap_file_mmap - check if able to map given addr
				959	* @file: unused
				960	* @reqprot: unused
				961	* @prot: unused
				962	* @flags: unused
				963	* @addr: address attempting to be mapped
				964	* @addr_only: unused
				965	*
				966	* If the process is attempting to map memory below dac_mmap_min_addr they need
				967	* CAP_SYS_RAWIO. The other parameters to this function are unused by the
				968	* capability security module. Returns 0 if this mapping should be allowed
				969	* -EPERM if not.
				970	*/
				971	int cap_file_mmap(struct file *file, unsigned long reqprot,
				972	unsigned long prot, unsigned long flags,
				973	unsigned long addr, unsigned long addr_only)
				974	{
				975	int ret = 0;
				976
				977	if (addr < dac_mmap_min_addr) {
				978	ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
				979	SECURITY_CAP_AUDIT);
				980	/* set PF_SUPERPRIV if it turns out we allow the low mmap */
				981	if (ret == 0)
				982	current->flags \|= PF_SUPERPRIV;
				983	}
				984	return ret;
				985	}