Blame - marvell/linux/kernel/bpf/syscall.c - T108

blob: de788761b708eaffdbddf8545f352ff813f96958 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
				3	*/
				4	#include <linux/bpf.h>
				5	#include <linux/bpf_trace.h>
				6	#include <linux/bpf_lirc.h>
				7	#include <linux/btf.h>
				8	#include <linux/syscalls.h>
				9	#include <linux/slab.h>
				10	#include <linux/sched/signal.h>
				11	#include <linux/vmalloc.h>
				12	#include <linux/mmzone.h>
				13	#include <linux/anon_inodes.h>
				14	#include <linux/fdtable.h>
				15	#include <linux/file.h>
				16	#include <linux/fs.h>
				17	#include <linux/license.h>
				18	#include <linux/filter.h>
				19	#include <linux/version.h>
				20	#include <linux/kernel.h>
				21	#include <linux/idr.h>
				22	#include <linux/cred.h>
				23	#include <linux/timekeeping.h>
				24	#include <linux/ctype.h>
				25	#include <linux/nospec.h>
				26
				27	#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY \|\| \
				28	(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY \|\| \
				29	(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY \|\| \
				30	(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
				31	#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
				32	#define IS_FD_MAP(map) (IS_FD_ARRAY(map) \|\| IS_FD_HASH(map))
				33
				34	#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY \| BPF_F_WRONLY)
				35
				36	DEFINE_PER_CPU(int, bpf_prog_active);
				37	static DEFINE_IDR(prog_idr);
				38	static DEFINE_SPINLOCK(prog_idr_lock);
				39	static DEFINE_IDR(map_idr);
				40	static DEFINE_SPINLOCK(map_idr_lock);
				41
				42	int sysctl_unprivileged_bpf_disabled __read_mostly =
				43	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
				44
				45	static const struct bpf_map_ops * const bpf_map_types[] = {
				46	#define BPF_PROG_TYPE(_id, _ops)
				47	#define BPF_MAP_TYPE(_id, _ops) \
				48	[_id] = &_ops,
				49	#include <linux/bpf_types.h>
				50	#undef BPF_PROG_TYPE
				51	#undef BPF_MAP_TYPE
				52	};
				53
				54	/*
				55	* If we're handed a bigger struct than we know of, ensure all the unknown bits
				56	* are 0 - i.e. new user-space does not rely on any kernel feature extensions
				57	* we don't know about yet.
				58	*
				59	* There is a ToCToU between this function call and the following
				60	* copy_from_user() call. However, this is not a concern since this function is
				61	* meant to be a future-proofing of bits.
				62	*/
				63	int bpf_check_uarg_tail_zero(void __user *uaddr,
				64	size_t expected_size,
				65	size_t actual_size)
				66	{
				67	unsigned char __user *addr;
				68	unsigned char __user *end;
				69	unsigned char val;
				70	int err;
				71
				72	if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
				73	return -E2BIG;
				74
				75	if (unlikely(!access_ok(uaddr, actual_size)))
				76	return -EFAULT;
				77
				78	if (actual_size <= expected_size)
				79	return 0;
				80
				81	addr = uaddr + expected_size;
				82	end = uaddr + actual_size;
				83
				84	for (; addr < end; addr++) {
				85	err = get_user(val, addr);
				86	if (err)
				87	return err;
				88	if (val)
				89	return -E2BIG;
				90	}
				91
				92	return 0;
				93	}
				94
				95	const struct bpf_map_ops bpf_map_offload_ops = {
				96	.map_alloc = bpf_map_offload_map_alloc,
				97	.map_free = bpf_map_offload_map_free,
				98	.map_check_btf = map_check_no_btf,
				99	};
				100
				101	static struct bpf_map find_and_alloc_map(union bpf_attr attr)
				102	{
				103	const struct bpf_map_ops *ops;
				104	u32 type = attr->map_type;
				105	struct bpf_map *map;
				106	int err;
				107
				108	if (type >= ARRAY_SIZE(bpf_map_types))
				109	return ERR_PTR(-EINVAL);
				110	type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
				111	ops = bpf_map_types[type];
				112	if (!ops)
				113	return ERR_PTR(-EINVAL);
				114
				115	if (ops->map_alloc_check) {
				116	err = ops->map_alloc_check(attr);
				117	if (err)
				118	return ERR_PTR(err);
				119	}
				120	if (attr->map_ifindex)
				121	ops = &bpf_map_offload_ops;
				122	map = ops->map_alloc(attr);
				123	if (IS_ERR(map))
				124	return map;
				125	map->ops = ops;
				126	map->map_type = type;
				127	return map;
				128	}
				129
				130	void *bpf_map_area_alloc(u64 size, int numa_node)
				131	{
				132	/* We really just want to fail instead of triggering OOM killer
				133	* under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
				134	* which is used for lower order allocation requests.
				135	*
				136	* It has been observed that higher order allocation requests done by
				137	* vmalloc with __GFP_NORETRY being set might fail due to not trying
				138	* to reclaim memory from the page cache, thus we set
				139	* __GFP_RETRY_MAYFAIL to avoid such situations.
				140	*/
				141
				142	const gfp_t flags = __GFP_NOWARN \| __GFP_ZERO;
				143	void *area;
				144
				145	if (size >= SIZE_MAX)
				146	return NULL;
				147
				148	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
				149	area = kmalloc_node(size, GFP_USER \| __GFP_NORETRY \| flags,
				150	numa_node);
				151	if (area != NULL)
				152	return area;
				153	}
				154
				155	return __vmalloc_node_flags_caller(size, numa_node,
				156	GFP_KERNEL \| __GFP_RETRY_MAYFAIL \|
				157	flags, __builtin_return_address(0));
				158	}
				159
				160	void bpf_map_area_free(void *area)
				161	{
				162	kvfree(area);
				163	}
				164
				165	static u32 bpf_map_flags_retain_permanent(u32 flags)
				166	{
				167	/* Some map creation flags are not tied to the map object but
				168	* rather to the map fd instead, so they have no meaning upon
				169	* map object inspection since multiple file descriptors with
				170	* different (access) properties can exist here. Thus, given
				171	* this has zero meaning for the map itself, lets clear these
				172	* from here.
				173	*/
				174	return flags & ~(BPF_F_RDONLY \| BPF_F_WRONLY);
				175	}
				176
				177	void bpf_map_init_from_attr(struct bpf_map map, union bpf_attr attr)
				178	{
				179	map->map_type = attr->map_type;
				180	map->key_size = attr->key_size;
				181	map->value_size = attr->value_size;
				182	map->max_entries = attr->max_entries;
				183	map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
				184	map->numa_node = bpf_map_attr_numa_node(attr);
				185	}
				186
				187	static int bpf_charge_memlock(struct user_struct *user, u32 pages)
				188	{
				189	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				190
				191	if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
				192	atomic_long_sub(pages, &user->locked_vm);
				193	return -EPERM;
				194	}
				195	return 0;
				196	}
				197
				198	static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
				199	{
				200	if (user)
				201	atomic_long_sub(pages, &user->locked_vm);
				202	}
				203
				204	int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
				205	{
				206	u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
				207	struct user_struct *user;
				208	int ret;
				209
				210	if (size >= U32_MAX - PAGE_SIZE)
				211	return -E2BIG;
				212
				213	user = get_current_user();
				214	ret = bpf_charge_memlock(user, pages);
				215	if (ret) {
				216	free_uid(user);
				217	return ret;
				218	}
				219
				220	mem->pages = pages;
				221	mem->user = user;
				222
				223	return 0;
				224	}
				225
				226	void bpf_map_charge_finish(struct bpf_map_memory *mem)
				227	{
				228	bpf_uncharge_memlock(mem->user, mem->pages);
				229	free_uid(mem->user);
				230	}
				231
				232	void bpf_map_charge_move(struct bpf_map_memory *dst,
				233	struct bpf_map_memory *src)
				234	{
				235	dst = src;
				236
				237	/* Make sure src will not be used for the redundant uncharging. */
				238	memset(src, 0, sizeof(struct bpf_map_memory));
				239	}
				240
				241	int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
				242	{
				243	int ret;
				244
				245	ret = bpf_charge_memlock(map->memory.user, pages);
				246	if (ret)
				247	return ret;
				248	map->memory.pages += pages;
				249	return ret;
				250	}
				251
				252	void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
				253	{
				254	bpf_uncharge_memlock(map->memory.user, pages);
				255	map->memory.pages -= pages;
				256	}
				257
				258	static int bpf_map_alloc_id(struct bpf_map *map)
				259	{
				260	int id;
				261
				262	idr_preload(GFP_KERNEL);
				263	spin_lock_bh(&map_idr_lock);
				264	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
				265	if (id > 0)
				266	map->id = id;
				267	spin_unlock_bh(&map_idr_lock);
				268	idr_preload_end();
				269
				270	if (WARN_ON_ONCE(!id))
				271	return -ENOSPC;
				272
				273	return id > 0 ? 0 : id;
				274	}
				275
				276	void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
				277	{
				278	unsigned long flags;
				279
				280	/* Offloaded maps are removed from the IDR store when their device
				281	* disappears - even if someone holds an fd to them they are unusable,
				282	* the memory is gone, all ops will fail; they are simply waiting for
				283	* refcnt to drop to be freed.
				284	*/
				285	if (!map->id)
				286	return;
				287
				288	if (do_idr_lock)
				289	spin_lock_irqsave(&map_idr_lock, flags);
				290	else
				291	__acquire(&map_idr_lock);
				292
				293	idr_remove(&map_idr, map->id);
				294	map->id = 0;
				295
				296	if (do_idr_lock)
				297	spin_unlock_irqrestore(&map_idr_lock, flags);
				298	else
				299	__release(&map_idr_lock);
				300	}
				301
				302	/* called from workqueue */
				303	static void bpf_map_free_deferred(struct work_struct *work)
				304	{
				305	struct bpf_map *map = container_of(work, struct bpf_map, work);
				306	struct bpf_map_memory mem;
				307
				308	bpf_map_charge_move(&mem, &map->memory);
				309	security_bpf_map_free(map);
				310	/* implementation dependent freeing */
				311	map->ops->map_free(map);
				312	bpf_map_charge_finish(&mem);
				313	}
				314
				315	static void bpf_map_put_uref(struct bpf_map *map)
				316	{
				317	if (atomic_dec_and_test(&map->usercnt)) {
				318	if (map->ops->map_release_uref)
				319	map->ops->map_release_uref(map);
				320	}
				321	}
				322
				323	/* decrement map refcnt and schedule it for freeing via workqueue
				324	* (unrelying map implementation ops->map_free() might sleep)
				325	*/
				326	static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
				327	{
				328	if (atomic_dec_and_test(&map->refcnt)) {
				329	/* bpf_map_free_id() must be called first */
				330	bpf_map_free_id(map, do_idr_lock);
				331	btf_put(map->btf);
				332	INIT_WORK(&map->work, bpf_map_free_deferred);
				333	schedule_work(&map->work);
				334	}
				335	}
				336
				337	void bpf_map_put(struct bpf_map *map)
				338	{
				339	__bpf_map_put(map, true);
				340	}
				341	EXPORT_SYMBOL_GPL(bpf_map_put);
				342
				343	void bpf_map_put_with_uref(struct bpf_map *map)
				344	{
				345	bpf_map_put_uref(map);
				346	bpf_map_put(map);
				347	}
				348
				349	static int bpf_map_release(struct inode inode, struct file filp)
				350	{
				351	struct bpf_map *map = filp->private_data;
				352
				353	if (map->ops->map_release)
				354	map->ops->map_release(map, filp);
				355
				356	bpf_map_put_with_uref(map);
				357	return 0;
				358	}
				359
				360	static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
				361	{
				362	fmode_t mode = f.file->f_mode;
				363
				364	/* Our file permissions may have been overridden by global
				365	* map permissions facing syscall side.
				366	*/
				367	if (READ_ONCE(map->frozen))
				368	mode &= ~FMODE_CAN_WRITE;
				369	return mode;
				370	}
				371
				372	#ifdef CONFIG_PROC_FS
				373	static void bpf_map_show_fdinfo(struct seq_file m, struct file filp)
				374	{
				375	const struct bpf_map *map = filp->private_data;
				376	const struct bpf_array *array;
				377	u32 owner_prog_type = 0;
				378	u32 owner_jited = 0;
				379
				380	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
				381	array = container_of(map, struct bpf_array, map);
				382	owner_prog_type = array->owner_prog_type;
				383	owner_jited = array->owner_jited;
				384	}
				385
				386	seq_printf(m,
				387	"map_type:\t%u\n"
				388	"key_size:\t%u\n"
				389	"value_size:\t%u\n"
				390	"max_entries:\t%u\n"
				391	"map_flags:\t%#x\n"
				392	"memlock:\t%llu\n"
				393	"map_id:\t%u\n"
				394	"frozen:\t%u\n",
				395	map->map_type,
				396	map->key_size,
				397	map->value_size,
				398	map->max_entries,
				399	map->map_flags,
				400	map->memory.pages * 1ULL << PAGE_SHIFT,
				401	map->id,
				402	READ_ONCE(map->frozen));
				403
				404	if (owner_prog_type) {
				405	seq_printf(m, "owner_prog_type:\t%u\n",
				406	owner_prog_type);
				407	seq_printf(m, "owner_jited:\t%u\n",
				408	owner_jited);
				409	}
				410	}
				411	#endif
				412
				413	static ssize_t bpf_dummy_read(struct file filp, char __user buf, size_t siz,
				414	loff_t *ppos)
				415	{
				416	/* We need this handler such that alloc_file() enables
				417	* f_mode with FMODE_CAN_READ.
				418	*/
				419	return -EINVAL;
				420	}
				421
				422	static ssize_t bpf_dummy_write(struct file filp, const char __user buf,
				423	size_t siz, loff_t *ppos)
				424	{
				425	/* We need this handler such that alloc_file() enables
				426	* f_mode with FMODE_CAN_WRITE.
				427	*/
				428	return -EINVAL;
				429	}
				430
				431	const struct file_operations bpf_map_fops = {
				432	#ifdef CONFIG_PROC_FS
				433	.show_fdinfo = bpf_map_show_fdinfo,
				434	#endif
				435	.release = bpf_map_release,
				436	.read = bpf_dummy_read,
				437	.write = bpf_dummy_write,
				438	};
				439
				440	int bpf_map_new_fd(struct bpf_map *map, int flags)
				441	{
				442	int ret;
				443
				444	ret = security_bpf_map(map, OPEN_FMODE(flags));
				445	if (ret < 0)
				446	return ret;
				447
				448	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
				449	flags \| O_CLOEXEC);
				450	}
				451
				452	int bpf_get_file_flag(int flags)
				453	{
				454	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
				455	return -EINVAL;
				456	if (flags & BPF_F_RDONLY)
				457	return O_RDONLY;
				458	if (flags & BPF_F_WRONLY)
				459	return O_WRONLY;
				460	return O_RDWR;
				461	}
				462
				463	/* helper macro to check that unused fields 'union bpf_attr' are zero */
				464	#define CHECK_ATTR(CMD) \
				465	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
				466	sizeof(attr->CMD##_LAST_FIELD), 0, \
				467	sizeof(*attr) - \
				468	offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
				469	sizeof(attr->CMD##_LAST_FIELD)) != NULL
				470
				471	/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
				472	* Return 0 on success and < 0 on error.
				473	*/
				474	static int bpf_obj_name_cpy(char dst, const char src)
				475	{
				476	const char *end = src + BPF_OBJ_NAME_LEN;
				477
				478	memset(dst, 0, BPF_OBJ_NAME_LEN);
				479	/* Copy all isalnum(), '_' and '.' chars. */
				480	while (src < end && *src) {
				481	if (!isalnum(*src) &&
				482	src != '_' && src != '.')
				483	return -EINVAL;
				484	dst++ = src++;
				485	}
				486
				487	/* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
				488	if (src == end)
				489	return -EINVAL;
				490
				491	return 0;
				492	}
				493
				494	int map_check_no_btf(const struct bpf_map *map,
				495	const struct btf *btf,
				496	const struct btf_type *key_type,
				497	const struct btf_type *value_type)
				498	{
				499	return -ENOTSUPP;
				500	}
				501
				502	static int map_check_btf(struct bpf_map map, const struct btf btf,
				503	u32 btf_key_id, u32 btf_value_id)
				504	{
				505	const struct btf_type key_type, value_type;
				506	u32 key_size, value_size;
				507	int ret = 0;
				508
				509	/* Some maps allow key to be unspecified. */
				510	if (btf_key_id) {
				511	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
				512	if (!key_type \|\| key_size != map->key_size)
				513	return -EINVAL;
				514	} else {
				515	key_type = btf_type_by_id(btf, 0);
				516	if (!map->ops->map_check_btf)
				517	return -EINVAL;
				518	}
				519
				520	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
				521	if (!value_type \|\| value_size != map->value_size)
				522	return -EINVAL;
				523
				524	map->spin_lock_off = btf_find_spin_lock(btf, value_type);
				525
				526	if (map_value_has_spin_lock(map)) {
				527	if (map->map_flags & BPF_F_RDONLY_PROG)
				528	return -EACCES;
				529	if (map->map_type != BPF_MAP_TYPE_HASH &&
				530	map->map_type != BPF_MAP_TYPE_ARRAY &&
				531	map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
				532	map->map_type != BPF_MAP_TYPE_SK_STORAGE)
				533	return -ENOTSUPP;
				534	if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
				535	map->value_size) {
				536	WARN_ONCE(1,
				537	"verifier bug spin_lock_off %d value_size %d\n",
				538	map->spin_lock_off, map->value_size);
				539	return -EFAULT;
				540	}
				541	}
				542
				543	if (map->ops->map_check_btf)
				544	ret = map->ops->map_check_btf(map, btf, key_type, value_type);
				545
				546	return ret;
				547	}
				548
				549	#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
				550	/* called via syscall */
				551	static int map_create(union bpf_attr *attr)
				552	{
				553	int numa_node = bpf_map_attr_numa_node(attr);
				554	struct bpf_map_memory mem;
				555	struct bpf_map *map;
				556	int f_flags;
				557	int err;
				558
				559	err = CHECK_ATTR(BPF_MAP_CREATE);
				560	if (err)
				561	return -EINVAL;
				562
				563	f_flags = bpf_get_file_flag(attr->map_flags);
				564	if (f_flags < 0)
				565	return f_flags;
				566
				567	if (numa_node != NUMA_NO_NODE &&
				568	((unsigned int)numa_node >= nr_node_ids \|\|
				569	!node_online(numa_node)))
				570	return -EINVAL;
				571
				572	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
				573	map = find_and_alloc_map(attr);
				574	if (IS_ERR(map))
				575	return PTR_ERR(map);
				576
				577	err = bpf_obj_name_cpy(map->name, attr->map_name);
				578	if (err)
				579	goto free_map;
				580
				581	atomic_set(&map->refcnt, 1);
				582	atomic_set(&map->usercnt, 1);
				583
				584	if (attr->btf_key_type_id \|\| attr->btf_value_type_id) {
				585	struct btf *btf;
				586
				587	if (!attr->btf_value_type_id) {
				588	err = -EINVAL;
				589	goto free_map;
				590	}
				591
				592	btf = btf_get_by_fd(attr->btf_fd);
				593	if (IS_ERR(btf)) {
				594	err = PTR_ERR(btf);
				595	goto free_map;
				596	}
				597
				598	err = map_check_btf(map, btf, attr->btf_key_type_id,
				599	attr->btf_value_type_id);
				600	if (err) {
				601	btf_put(btf);
				602	goto free_map;
				603	}
				604
				605	map->btf = btf;
				606	map->btf_key_type_id = attr->btf_key_type_id;
				607	map->btf_value_type_id = attr->btf_value_type_id;
				608	} else {
				609	map->spin_lock_off = -EINVAL;
				610	}
				611
				612	err = security_bpf_map_alloc(map);
				613	if (err)
				614	goto free_map;
				615
				616	err = bpf_map_alloc_id(map);
				617	if (err)
				618	goto free_map_sec;
				619
				620	err = bpf_map_new_fd(map, f_flags);
				621	if (err < 0) {
				622	/* failed to allocate fd.
				623	* bpf_map_put_with_uref() is needed because the above
				624	* bpf_map_alloc_id() has published the map
				625	* to the userspace and the userspace may
				626	* have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
				627	*/
				628	bpf_map_put_with_uref(map);
				629	return err;
				630	}
				631
				632	return err;
				633
				634	free_map_sec:
				635	security_bpf_map_free(map);
				636	free_map:
				637	btf_put(map->btf);
				638	bpf_map_charge_move(&mem, &map->memory);
				639	map->ops->map_free(map);
				640	bpf_map_charge_finish(&mem);
				641	return err;
				642	}
				643
				644	/* if error is returned, fd is released.
				645	* On success caller should complete fd access with matching fdput()
				646	*/
				647	struct bpf_map *__bpf_map_get(struct fd f)
				648	{
				649	if (!f.file)
				650	return ERR_PTR(-EBADF);
				651	if (f.file->f_op != &bpf_map_fops) {
				652	fdput(f);
				653	return ERR_PTR(-EINVAL);
				654	}
				655
				656	return f.file->private_data;
				657	}
				658
				659	/* prog's and map's refcnt limit */
				660	#define BPF_MAX_REFCNT 32768
				661
				662	struct bpf_map bpf_map_inc(struct bpf_map map, bool uref)
				663	{
				664	if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
				665	atomic_dec(&map->refcnt);
				666	return ERR_PTR(-EBUSY);
				667	}
				668	if (uref)
				669	atomic_inc(&map->usercnt);
				670	return map;
				671	}
				672	EXPORT_SYMBOL_GPL(bpf_map_inc);
				673
				674	struct bpf_map *bpf_map_get_with_uref(u32 ufd)
				675	{
				676	struct fd f = fdget(ufd);
				677	struct bpf_map *map;
				678
				679	map = __bpf_map_get(f);
				680	if (IS_ERR(map))
				681	return map;
				682
				683	map = bpf_map_inc(map, true);
				684	fdput(f);
				685
				686	return map;
				687	}
				688
				689	/* map_idr_lock should have been held */
				690	static struct bpf_map __bpf_map_inc_not_zero(struct bpf_map map,
				691	bool uref)
				692	{
				693	int refold;
				694
				695	refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
				696
				697	if (refold >= BPF_MAX_REFCNT) {
				698	__bpf_map_put(map, false);
				699	return ERR_PTR(-EBUSY);
				700	}
				701
				702	if (!refold)
				703	return ERR_PTR(-ENOENT);
				704
				705	if (uref)
				706	atomic_inc(&map->usercnt);
				707
				708	return map;
				709	}
				710
				711	struct bpf_map bpf_map_inc_not_zero(struct bpf_map map, bool uref)
				712	{
				713	spin_lock_bh(&map_idr_lock);
				714	map = __bpf_map_inc_not_zero(map, uref);
				715	spin_unlock_bh(&map_idr_lock);
				716
				717	return map;
				718	}
				719	EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
				720
				721	int __weak bpf_stackmap_copy(struct bpf_map map, void key, void *value)
				722	{
				723	return -ENOTSUPP;
				724	}
				725
				726	static void __bpf_copy_key(void __user ukey, u64 key_size)
				727	{
				728	if (key_size)
				729	return memdup_user(ukey, key_size);
				730
				731	if (ukey)
				732	return ERR_PTR(-EINVAL);
				733
				734	return NULL;
				735	}
				736
				737	/* last field in 'union bpf_attr' used by this command */
				738	#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
				739
				740	static int map_lookup_elem(union bpf_attr *attr)
				741	{
				742	void __user *ukey = u64_to_user_ptr(attr->key);
				743	void __user *uvalue = u64_to_user_ptr(attr->value);
				744	int ufd = attr->map_fd;
				745	struct bpf_map *map;
				746	void key, value, *ptr;
				747	u32 value_size;
				748	struct fd f;
				749	int err;
				750
				751	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
				752	return -EINVAL;
				753
				754	if (attr->flags & ~BPF_F_LOCK)
				755	return -EINVAL;
				756
				757	f = fdget(ufd);
				758	map = __bpf_map_get(f);
				759	if (IS_ERR(map))
				760	return PTR_ERR(map);
				761	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
				762	err = -EPERM;
				763	goto err_put;
				764	}
				765
				766	if ((attr->flags & BPF_F_LOCK) &&
				767	!map_value_has_spin_lock(map)) {
				768	err = -EINVAL;
				769	goto err_put;
				770	}
				771
				772	key = __bpf_copy_key(ukey, map->key_size);
				773	if (IS_ERR(key)) {
				774	err = PTR_ERR(key);
				775	goto err_put;
				776	}
				777
				778	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				779	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
				780	map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY \|\|
				781	map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
				782	value_size = round_up(map->value_size, 8) * num_possible_cpus();
				783	else if (IS_FD_MAP(map))
				784	value_size = sizeof(u32);
				785	else
				786	value_size = map->value_size;
				787
				788	err = -ENOMEM;
				789	value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
				790	if (!value)
				791	goto free_key;
				792
				793	if (bpf_map_is_dev_bound(map)) {
				794	err = bpf_map_offload_lookup_elem(map, key, value);
				795	goto done;
				796	}
				797
				798	preempt_disable();
				799	this_cpu_inc(bpf_prog_active);
				800	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				801	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
				802	err = bpf_percpu_hash_copy(map, key, value);
				803	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
				804	err = bpf_percpu_array_copy(map, key, value);
				805	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
				806	err = bpf_percpu_cgroup_storage_copy(map, key, value);
				807	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
				808	err = bpf_stackmap_copy(map, key, value);
				809	} else if (IS_FD_ARRAY(map)) {
				810	err = bpf_fd_array_map_lookup_elem(map, key, value);
				811	} else if (IS_FD_HASH(map)) {
				812	err = bpf_fd_htab_map_lookup_elem(map, key, value);
				813	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
				814	err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
				815	} else if (map->map_type == BPF_MAP_TYPE_QUEUE \|\|
				816	map->map_type == BPF_MAP_TYPE_STACK) {
				817	err = map->ops->map_peek_elem(map, value);
				818	} else {
				819	rcu_read_lock();
				820	if (map->ops->map_lookup_elem_sys_only)
				821	ptr = map->ops->map_lookup_elem_sys_only(map, key);
				822	else
				823	ptr = map->ops->map_lookup_elem(map, key);
				824	if (IS_ERR(ptr)) {
				825	err = PTR_ERR(ptr);
				826	} else if (!ptr) {
				827	err = -ENOENT;
				828	} else {
				829	err = 0;
				830	if (attr->flags & BPF_F_LOCK)
				831	/* lock 'ptr' and copy everything but lock */
				832	copy_map_value_locked(map, value, ptr, true);
				833	else
				834	copy_map_value(map, value, ptr);
				835	/* mask lock, since value wasn't zero inited */
				836	check_and_init_map_lock(map, value);
				837	}
				838	rcu_read_unlock();
				839	}
				840	this_cpu_dec(bpf_prog_active);
				841	preempt_enable();
				842
				843	done:
				844	if (err)
				845	goto free_value;
				846
				847	err = -EFAULT;
				848	if (copy_to_user(uvalue, value, value_size) != 0)
				849	goto free_value;
				850
				851	err = 0;
				852
				853	free_value:
				854	kfree(value);
				855	free_key:
				856	kfree(key);
				857	err_put:
				858	fdput(f);
				859	return err;
				860	}
				861
				862	static void maybe_wait_bpf_programs(struct bpf_map *map)
				863	{
				864	/* Wait for any running BPF programs to complete so that
				865	* userspace, when we return to it, knows that all programs
				866	* that could be running use the new map value.
				867	*/
				868	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS \|\|
				869	map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
				870	synchronize_rcu();
				871	}
				872
				873	#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
				874
				875	static int map_update_elem(union bpf_attr *attr)
				876	{
				877	void __user *ukey = u64_to_user_ptr(attr->key);
				878	void __user *uvalue = u64_to_user_ptr(attr->value);
				879	int ufd = attr->map_fd;
				880	struct bpf_map *map;
				881	void key, value;
				882	u32 value_size;
				883	struct fd f;
				884	int err;
				885
				886	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
				887	return -EINVAL;
				888
				889	f = fdget(ufd);
				890	map = __bpf_map_get(f);
				891	if (IS_ERR(map))
				892	return PTR_ERR(map);
				893	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
				894	err = -EPERM;
				895	goto err_put;
				896	}
				897
				898	if ((attr->flags & BPF_F_LOCK) &&
				899	!map_value_has_spin_lock(map)) {
				900	err = -EINVAL;
				901	goto err_put;
				902	}
				903
				904	key = __bpf_copy_key(ukey, map->key_size);
				905	if (IS_ERR(key)) {
				906	err = PTR_ERR(key);
				907	goto err_put;
				908	}
				909
				910	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				911	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
				912	map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY \|\|
				913	map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
				914	value_size = round_up(map->value_size, 8) * num_possible_cpus();
				915	else
				916	value_size = map->value_size;
				917
				918	err = -ENOMEM;
				919	value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
				920	if (!value)
				921	goto free_key;
				922
				923	err = -EFAULT;
				924	if (copy_from_user(value, uvalue, value_size) != 0)
				925	goto free_value;
				926
				927	/* Need to create a kthread, thus must support schedule */
				928	if (bpf_map_is_dev_bound(map)) {
				929	err = bpf_map_offload_update_elem(map, key, value, attr->flags);
				930	goto out;
				931	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP \|\|
				932	map->map_type == BPF_MAP_TYPE_SOCKHASH \|\|
				933	map->map_type == BPF_MAP_TYPE_SOCKMAP) {
				934	err = map->ops->map_update_elem(map, key, value, attr->flags);
				935	goto out;
				936	}
				937
				938	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
				939	* inside bpf map update or delete otherwise deadlocks are possible
				940	*/
				941	preempt_disable();
				942	__this_cpu_inc(bpf_prog_active);
				943	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				944	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
				945	err = bpf_percpu_hash_update(map, key, value, attr->flags);
				946	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
				947	err = bpf_percpu_array_update(map, key, value, attr->flags);
				948	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
				949	err = bpf_percpu_cgroup_storage_update(map, key, value,
				950	attr->flags);
				951	} else if (IS_FD_ARRAY(map)) {
				952	rcu_read_lock();
				953	err = bpf_fd_array_map_update_elem(map, f.file, key, value,
				954	attr->flags);
				955	rcu_read_unlock();
				956	} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
				957	rcu_read_lock();
				958	err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
				959	attr->flags);
				960	rcu_read_unlock();
				961	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
				962	/* rcu_read_lock() is not needed */
				963	err = bpf_fd_reuseport_array_update_elem(map, key, value,
				964	attr->flags);
				965	} else if (map->map_type == BPF_MAP_TYPE_QUEUE \|\|
				966	map->map_type == BPF_MAP_TYPE_STACK) {
				967	err = map->ops->map_push_elem(map, value, attr->flags);
				968	} else {
				969	rcu_read_lock();
				970	err = map->ops->map_update_elem(map, key, value, attr->flags);
				971	rcu_read_unlock();
				972	}
				973	__this_cpu_dec(bpf_prog_active);
				974	preempt_enable();
				975	maybe_wait_bpf_programs(map);
				976	out:
				977	free_value:
				978	kfree(value);
				979	free_key:
				980	kfree(key);
				981	err_put:
				982	fdput(f);
				983	return err;
				984	}
				985
				986	#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
				987
				988	static int map_delete_elem(union bpf_attr *attr)
				989	{
				990	void __user *ukey = u64_to_user_ptr(attr->key);
				991	int ufd = attr->map_fd;
				992	struct bpf_map *map;
				993	struct fd f;
				994	void *key;
				995	int err;
				996
				997	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
				998	return -EINVAL;
				999
				1000	f = fdget(ufd);
				1001	map = __bpf_map_get(f);
				1002	if (IS_ERR(map))
				1003	return PTR_ERR(map);
				1004	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
				1005	err = -EPERM;
				1006	goto err_put;
				1007	}
				1008
				1009	key = __bpf_copy_key(ukey, map->key_size);
				1010	if (IS_ERR(key)) {
				1011	err = PTR_ERR(key);
				1012	goto err_put;
				1013	}
				1014
				1015	if (bpf_map_is_dev_bound(map)) {
				1016	err = bpf_map_offload_delete_elem(map, key);
				1017	goto out;
				1018	}
				1019
				1020	preempt_disable();
				1021	__this_cpu_inc(bpf_prog_active);
				1022	rcu_read_lock();
				1023	err = map->ops->map_delete_elem(map, key);
				1024	rcu_read_unlock();
				1025	__this_cpu_dec(bpf_prog_active);
				1026	preempt_enable();
				1027	maybe_wait_bpf_programs(map);
				1028	out:
				1029	kfree(key);
				1030	err_put:
				1031	fdput(f);
				1032	return err;
				1033	}
				1034
				1035	/* last field in 'union bpf_attr' used by this command */
				1036	#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
				1037
				1038	static int map_get_next_key(union bpf_attr *attr)
				1039	{
				1040	void __user *ukey = u64_to_user_ptr(attr->key);
				1041	void __user *unext_key = u64_to_user_ptr(attr->next_key);
				1042	int ufd = attr->map_fd;
				1043	struct bpf_map *map;
				1044	void key, next_key;
				1045	struct fd f;
				1046	int err;
				1047
				1048	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
				1049	return -EINVAL;
				1050
				1051	f = fdget(ufd);
				1052	map = __bpf_map_get(f);
				1053	if (IS_ERR(map))
				1054	return PTR_ERR(map);
				1055	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
				1056	err = -EPERM;
				1057	goto err_put;
				1058	}
				1059
				1060	if (ukey) {
				1061	key = __bpf_copy_key(ukey, map->key_size);
				1062	if (IS_ERR(key)) {
				1063	err = PTR_ERR(key);
				1064	goto err_put;
				1065	}
				1066	} else {
				1067	key = NULL;
				1068	}
				1069
				1070	err = -ENOMEM;
				1071	next_key = kmalloc(map->key_size, GFP_USER);
				1072	if (!next_key)
				1073	goto free_key;
				1074
				1075	if (bpf_map_is_dev_bound(map)) {
				1076	err = bpf_map_offload_get_next_key(map, key, next_key);
				1077	goto out;
				1078	}
				1079
				1080	rcu_read_lock();
				1081	err = map->ops->map_get_next_key(map, key, next_key);
				1082	rcu_read_unlock();
				1083	out:
				1084	if (err)
				1085	goto free_next_key;
				1086
				1087	err = -EFAULT;
				1088	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
				1089	goto free_next_key;
				1090
				1091	err = 0;
				1092
				1093	free_next_key:
				1094	kfree(next_key);
				1095	free_key:
				1096	kfree(key);
				1097	err_put:
				1098	fdput(f);
				1099	return err;
				1100	}
				1101
				1102	#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
				1103
				1104	static int map_lookup_and_delete_elem(union bpf_attr *attr)
				1105	{
				1106	void __user *ukey = u64_to_user_ptr(attr->key);
				1107	void __user *uvalue = u64_to_user_ptr(attr->value);
				1108	int ufd = attr->map_fd;
				1109	struct bpf_map *map;
				1110	void key, value;
				1111	u32 value_size;
				1112	struct fd f;
				1113	int err;
				1114
				1115	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
				1116	return -EINVAL;
				1117
				1118	f = fdget(ufd);
				1119	map = __bpf_map_get(f);
				1120	if (IS_ERR(map))
				1121	return PTR_ERR(map);
				1122	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) \|\|
				1123	!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
				1124	err = -EPERM;
				1125	goto err_put;
				1126	}
				1127
				1128	key = __bpf_copy_key(ukey, map->key_size);
				1129	if (IS_ERR(key)) {
				1130	err = PTR_ERR(key);
				1131	goto err_put;
				1132	}
				1133
				1134	value_size = map->value_size;
				1135
				1136	err = -ENOMEM;
				1137	value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
				1138	if (!value)
				1139	goto free_key;
				1140
				1141	if (map->map_type == BPF_MAP_TYPE_QUEUE \|\|
				1142	map->map_type == BPF_MAP_TYPE_STACK) {
				1143	err = map->ops->map_pop_elem(map, value);
				1144	} else {
				1145	err = -ENOTSUPP;
				1146	}
				1147
				1148	if (err)
				1149	goto free_value;
				1150
				1151	if (copy_to_user(uvalue, value, value_size) != 0) {
				1152	err = -EFAULT;
				1153	goto free_value;
				1154	}
				1155
				1156	err = 0;
				1157
				1158	free_value:
				1159	kfree(value);
				1160	free_key:
				1161	kfree(key);
				1162	err_put:
				1163	fdput(f);
				1164	return err;
				1165	}
				1166
				1167	#define BPF_MAP_FREEZE_LAST_FIELD map_fd
				1168
				1169	static int map_freeze(const union bpf_attr *attr)
				1170	{
				1171	int err = 0, ufd = attr->map_fd;
				1172	struct bpf_map *map;
				1173	struct fd f;
				1174
				1175	if (CHECK_ATTR(BPF_MAP_FREEZE))
				1176	return -EINVAL;
				1177
				1178	f = fdget(ufd);
				1179	map = __bpf_map_get(f);
				1180	if (IS_ERR(map))
				1181	return PTR_ERR(map);
				1182	if (READ_ONCE(map->frozen)) {
				1183	err = -EBUSY;
				1184	goto err_put;
				1185	}
				1186	if (!capable(CAP_SYS_ADMIN)) {
				1187	err = -EPERM;
				1188	goto err_put;
				1189	}
				1190
				1191	WRITE_ONCE(map->frozen, true);
				1192	err_put:
				1193	fdput(f);
				1194	return err;
				1195	}
				1196
				1197	static const struct bpf_prog_ops * const bpf_prog_types[] = {
				1198	#define BPF_PROG_TYPE(_id, _name) \
				1199	[_id] = & _name ## _prog_ops,
				1200	#define BPF_MAP_TYPE(_id, _ops)
				1201	#include <linux/bpf_types.h>
				1202	#undef BPF_PROG_TYPE
				1203	#undef BPF_MAP_TYPE
				1204	};
				1205
				1206	static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
				1207	{
				1208	const struct bpf_prog_ops *ops;
				1209
				1210	if (type >= ARRAY_SIZE(bpf_prog_types))
				1211	return -EINVAL;
				1212	type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
				1213	ops = bpf_prog_types[type];
				1214	if (!ops)
				1215	return -EINVAL;
				1216
				1217	if (!bpf_prog_is_dev_bound(prog->aux))
				1218	prog->aux->ops = ops;
				1219	else
				1220	prog->aux->ops = &bpf_offload_prog_ops;
				1221	prog->type = type;
				1222	return 0;
				1223	}
				1224
				1225	/* drop refcnt on maps used by eBPF program and free auxilary data */
				1226	static void free_used_maps(struct bpf_prog_aux *aux)
				1227	{
				1228	enum bpf_cgroup_storage_type stype;
				1229	int i;
				1230
				1231	for_each_cgroup_storage_type(stype) {
				1232	if (!aux->cgroup_storage[stype])
				1233	continue;
				1234	bpf_cgroup_storage_release(aux->prog,
				1235	aux->cgroup_storage[stype]);
				1236	}
				1237
				1238	for (i = 0; i < aux->used_map_cnt; i++)
				1239	bpf_map_put(aux->used_maps[i]);
				1240
				1241	kfree(aux->used_maps);
				1242	}
				1243
				1244	int __bpf_prog_charge(struct user_struct *user, u32 pages)
				1245	{
				1246	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				1247	unsigned long user_bufs;
				1248
				1249	if (user) {
				1250	user_bufs = atomic_long_add_return(pages, &user->locked_vm);
				1251	if (user_bufs > memlock_limit) {
				1252	atomic_long_sub(pages, &user->locked_vm);
				1253	return -EPERM;
				1254	}
				1255	}
				1256
				1257	return 0;
				1258	}
				1259
				1260	void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
				1261	{
				1262	if (user)
				1263	atomic_long_sub(pages, &user->locked_vm);
				1264	}
				1265
				1266	static int bpf_prog_charge_memlock(struct bpf_prog *prog)
				1267	{
				1268	struct user_struct *user = get_current_user();
				1269	int ret;
				1270
				1271	ret = __bpf_prog_charge(user, prog->pages);
				1272	if (ret) {
				1273	free_uid(user);
				1274	return ret;
				1275	}
				1276
				1277	prog->aux->user = user;
				1278	return 0;
				1279	}
				1280
				1281	static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
				1282	{
				1283	struct user_struct *user = prog->aux->user;
				1284
				1285	__bpf_prog_uncharge(user, prog->pages);
				1286	free_uid(user);
				1287	}
				1288
				1289	static int bpf_prog_alloc_id(struct bpf_prog *prog)
				1290	{
				1291	int id;
				1292
				1293	idr_preload(GFP_KERNEL);
				1294	spin_lock_bh(&prog_idr_lock);
				1295	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
				1296	if (id > 0)
				1297	prog->aux->id = id;
				1298	spin_unlock_bh(&prog_idr_lock);
				1299	idr_preload_end();
				1300
				1301	/* id is in [1, INT_MAX) */
				1302	if (WARN_ON_ONCE(!id))
				1303	return -ENOSPC;
				1304
				1305	return id > 0 ? 0 : id;
				1306	}
				1307
				1308	void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
				1309	{
				1310	/* cBPF to eBPF migrations are currently not in the idr store.
				1311	* Offloaded programs are removed from the store when their device
				1312	* disappears - even if someone grabs an fd to them they are unusable,
				1313	* simply waiting for refcnt to drop to be freed.
				1314	*/
				1315	if (!prog->aux->id)
				1316	return;
				1317
				1318	if (do_idr_lock)
				1319	spin_lock_bh(&prog_idr_lock);
				1320	else
				1321	__acquire(&prog_idr_lock);
				1322
				1323	idr_remove(&prog_idr, prog->aux->id);
				1324	prog->aux->id = 0;
				1325
				1326	if (do_idr_lock)
				1327	spin_unlock_bh(&prog_idr_lock);
				1328	else
				1329	__release(&prog_idr_lock);
				1330	}
				1331
				1332	static void __bpf_prog_put_rcu(struct rcu_head *rcu)
				1333	{
				1334	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
				1335
				1336	kvfree(aux->func_info);
				1337	free_used_maps(aux);
				1338	bpf_prog_uncharge_memlock(aux->prog);
				1339	security_bpf_prog_free(aux);
				1340	bpf_prog_free(aux->prog);
				1341	}
				1342
				1343	static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
				1344	{
				1345	bpf_prog_kallsyms_del_all(prog);
				1346	btf_put(prog->aux->btf);
				1347	bpf_prog_free_linfo(prog);
				1348
				1349	if (deferred)
				1350	call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
				1351	else
				1352	__bpf_prog_put_rcu(&prog->aux->rcu);
				1353	}
				1354
				1355	static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
				1356	{
				1357	if (atomic_dec_and_test(&prog->aux->refcnt)) {
				1358	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
				1359	/* bpf_prog_free_id() must be called first */
				1360	bpf_prog_free_id(prog, do_idr_lock);
				1361	__bpf_prog_put_noref(prog, true);
				1362	}
				1363	}
				1364
				1365	void bpf_prog_put(struct bpf_prog *prog)
				1366	{
				1367	__bpf_prog_put(prog, true);
				1368	}
				1369	EXPORT_SYMBOL_GPL(bpf_prog_put);
				1370
				1371	static int bpf_prog_release(struct inode inode, struct file filp)
				1372	{
				1373	struct bpf_prog *prog = filp->private_data;
				1374
				1375	bpf_prog_put(prog);
				1376	return 0;
				1377	}
				1378
				1379	static void bpf_prog_get_stats(const struct bpf_prog *prog,
				1380	struct bpf_prog_stats *stats)
				1381	{
				1382	u64 nsecs = 0, cnt = 0;
				1383	int cpu;
				1384
				1385	for_each_possible_cpu(cpu) {
				1386	const struct bpf_prog_stats *st;
				1387	unsigned int start;
				1388	u64 tnsecs, tcnt;
				1389
				1390	st = per_cpu_ptr(prog->aux->stats, cpu);
				1391	do {
				1392	start = u64_stats_fetch_begin_irq(&st->syncp);
				1393	tnsecs = st->nsecs;
				1394	tcnt = st->cnt;
				1395	} while (u64_stats_fetch_retry_irq(&st->syncp, start));
				1396	nsecs += tnsecs;
				1397	cnt += tcnt;
				1398	}
				1399	stats->nsecs = nsecs;
				1400	stats->cnt = cnt;
				1401	}
				1402
				1403	#ifdef CONFIG_PROC_FS
				1404	static void bpf_prog_show_fdinfo(struct seq_file m, struct file filp)
				1405	{
				1406	const struct bpf_prog *prog = filp->private_data;
				1407	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
				1408	struct bpf_prog_stats stats;
				1409
				1410	bpf_prog_get_stats(prog, &stats);
				1411	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
				1412	seq_printf(m,
				1413	"prog_type:\t%u\n"
				1414	"prog_jited:\t%u\n"
				1415	"prog_tag:\t%s\n"
				1416	"memlock:\t%llu\n"
				1417	"prog_id:\t%u\n"
				1418	"run_time_ns:\t%llu\n"
				1419	"run_cnt:\t%llu\n",
				1420	prog->type,
				1421	prog->jited,
				1422	prog_tag,
				1423	prog->pages * 1ULL << PAGE_SHIFT,
				1424	prog->aux->id,
				1425	stats.nsecs,
				1426	stats.cnt);
				1427	}
				1428	#endif
				1429
				1430	const struct file_operations bpf_prog_fops = {
				1431	#ifdef CONFIG_PROC_FS
				1432	.show_fdinfo = bpf_prog_show_fdinfo,
				1433	#endif
				1434	.release = bpf_prog_release,
				1435	.read = bpf_dummy_read,
				1436	.write = bpf_dummy_write,
				1437	};
				1438
				1439	int bpf_prog_new_fd(struct bpf_prog *prog)
				1440	{
				1441	int ret;
				1442
				1443	ret = security_bpf_prog(prog);
				1444	if (ret < 0)
				1445	return ret;
				1446
				1447	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
				1448	O_RDWR \| O_CLOEXEC);
				1449	}
				1450
				1451	static struct bpf_prog *____bpf_prog_get(struct fd f)
				1452	{
				1453	if (!f.file)
				1454	return ERR_PTR(-EBADF);
				1455	if (f.file->f_op != &bpf_prog_fops) {
				1456	fdput(f);
				1457	return ERR_PTR(-EINVAL);
				1458	}
				1459
				1460	return f.file->private_data;
				1461	}
				1462
				1463	struct bpf_prog bpf_prog_add(struct bpf_prog prog, int i)
				1464	{
				1465	if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
				1466	atomic_sub(i, &prog->aux->refcnt);
				1467	return ERR_PTR(-EBUSY);
				1468	}
				1469	return prog;
				1470	}
				1471	EXPORT_SYMBOL_GPL(bpf_prog_add);
				1472
				1473	void bpf_prog_sub(struct bpf_prog *prog, int i)
				1474	{
				1475	/* Only to be used for undoing previous bpf_prog_add() in some
				1476	* error path. We still know that another entity in our call
				1477	* path holds a reference to the program, thus atomic_sub() can
				1478	* be safely used in such cases!
				1479	*/
				1480	WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
				1481	}
				1482	EXPORT_SYMBOL_GPL(bpf_prog_sub);
				1483
				1484	struct bpf_prog bpf_prog_inc(struct bpf_prog prog)
				1485	{
				1486	return bpf_prog_add(prog, 1);
				1487	}
				1488	EXPORT_SYMBOL_GPL(bpf_prog_inc);
				1489
				1490	/* prog_idr_lock should have been held */
				1491	struct bpf_prog bpf_prog_inc_not_zero(struct bpf_prog prog)
				1492	{
				1493	int refold;
				1494
				1495	refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
				1496
				1497	if (refold >= BPF_MAX_REFCNT) {
				1498	__bpf_prog_put(prog, false);
				1499	return ERR_PTR(-EBUSY);
				1500	}
				1501
				1502	if (!refold)
				1503	return ERR_PTR(-ENOENT);
				1504
				1505	return prog;
				1506	}
				1507	EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
				1508
				1509	bool bpf_prog_get_ok(struct bpf_prog *prog,
				1510	enum bpf_prog_type *attach_type, bool attach_drv)
				1511	{
				1512	/* not an attachment, just a refcount inc, always allow */
				1513	if (!attach_type)
				1514	return true;
				1515
				1516	if (prog->type != *attach_type)
				1517	return false;
				1518	if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
				1519	return false;
				1520
				1521	return true;
				1522	}
				1523
				1524	static struct bpf_prog __bpf_prog_get(u32 ufd, enum bpf_prog_type attach_type,
				1525	bool attach_drv)
				1526	{
				1527	struct fd f = fdget(ufd);
				1528	struct bpf_prog *prog;
				1529
				1530	prog = ____bpf_prog_get(f);
				1531	if (IS_ERR(prog))
				1532	return prog;
				1533	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
				1534	prog = ERR_PTR(-EINVAL);
				1535	goto out;
				1536	}
				1537
				1538	prog = bpf_prog_inc(prog);
				1539	out:
				1540	fdput(f);
				1541	return prog;
				1542	}
				1543
				1544	struct bpf_prog *bpf_prog_get(u32 ufd)
				1545	{
				1546	return __bpf_prog_get(ufd, NULL, false);
				1547	}
				1548
				1549	struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
				1550	bool attach_drv)
				1551	{
				1552	return __bpf_prog_get(ufd, &type, attach_drv);
				1553	}
				1554	EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
				1555
				1556	/* Initially all BPF programs could be loaded w/o specifying
				1557	* expected_attach_type. Later for some of them specifying expected_attach_type
				1558	* at load time became required so that program could be validated properly.
				1559	* Programs of types that are allowed to be loaded both w/ and w/o (for
				1560	* backward compatibility) expected_attach_type, should have the default attach
				1561	* type assigned to expected_attach_type for the latter case, so that it can be
				1562	* validated later at attach time.
				1563	*
				1564	* bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
				1565	* prog type requires it but has some attach types that have to be backward
				1566	* compatible.
				1567	*/
				1568	static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
				1569	{
				1570	switch (attr->prog_type) {
				1571	case BPF_PROG_TYPE_CGROUP_SOCK:
				1572	/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
				1573	* exist so checking for non-zero is the way to go here.
				1574	*/
				1575	if (!attr->expected_attach_type)
				1576	attr->expected_attach_type =
				1577	BPF_CGROUP_INET_SOCK_CREATE;
				1578	break;
				1579	}
				1580	}
				1581
				1582	static int
				1583	bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
				1584	enum bpf_attach_type expected_attach_type)
				1585	{
				1586	switch (prog_type) {
				1587	case BPF_PROG_TYPE_CGROUP_SOCK:
				1588	switch (expected_attach_type) {
				1589	case BPF_CGROUP_INET_SOCK_CREATE:
				1590	case BPF_CGROUP_INET4_POST_BIND:
				1591	case BPF_CGROUP_INET6_POST_BIND:
				1592	return 0;
				1593	default:
				1594	return -EINVAL;
				1595	}
				1596	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
				1597	switch (expected_attach_type) {
				1598	case BPF_CGROUP_INET4_BIND:
				1599	case BPF_CGROUP_INET6_BIND:
				1600	case BPF_CGROUP_INET4_CONNECT:
				1601	case BPF_CGROUP_INET6_CONNECT:
				1602	case BPF_CGROUP_UDP4_SENDMSG:
				1603	case BPF_CGROUP_UDP6_SENDMSG:
				1604	case BPF_CGROUP_UDP4_RECVMSG:
				1605	case BPF_CGROUP_UDP6_RECVMSG:
				1606	return 0;
				1607	default:
				1608	return -EINVAL;
				1609	}
				1610	case BPF_PROG_TYPE_CGROUP_SKB:
				1611	switch (expected_attach_type) {
				1612	case BPF_CGROUP_INET_INGRESS:
				1613	case BPF_CGROUP_INET_EGRESS:
				1614	return 0;
				1615	default:
				1616	return -EINVAL;
				1617	}
				1618	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
				1619	switch (expected_attach_type) {
				1620	case BPF_CGROUP_SETSOCKOPT:
				1621	case BPF_CGROUP_GETSOCKOPT:
				1622	return 0;
				1623	default:
				1624	return -EINVAL;
				1625	}
				1626	default:
				1627	return 0;
				1628	}
				1629	}
				1630
				1631	/* last field in 'union bpf_attr' used by this command */
				1632	#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt
				1633
				1634	static int bpf_prog_load(union bpf_attr attr, union bpf_attr __user uattr)
				1635	{
				1636	enum bpf_prog_type type = attr->prog_type;
				1637	struct bpf_prog *prog;
				1638	int err;
				1639	char license[128];
				1640	bool is_gpl;
				1641
				1642	if (CHECK_ATTR(BPF_PROG_LOAD))
				1643	return -EINVAL;
				1644
				1645	if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT \|
				1646	BPF_F_ANY_ALIGNMENT \|
				1647	BPF_F_TEST_STATE_FREQ \|
				1648	BPF_F_TEST_RND_HI32))
				1649	return -EINVAL;
				1650
				1651	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
				1652	(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
				1653	!capable(CAP_SYS_ADMIN))
				1654	return -EPERM;
				1655
				1656	/* copy eBPF program license from user space */
				1657	if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
				1658	sizeof(license) - 1) < 0)
				1659	return -EFAULT;
				1660	license[sizeof(license) - 1] = 0;
				1661
				1662	/* eBPF programs must be GPL compatible to use GPL-ed functions */
				1663	is_gpl = license_is_gpl_compatible(license);
				1664
				1665	if (attr->insn_cnt == 0 \|\|
				1666	attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
				1667	return -E2BIG;
				1668	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
				1669	type != BPF_PROG_TYPE_CGROUP_SKB &&
				1670	!capable(CAP_SYS_ADMIN))
				1671	return -EPERM;
				1672
				1673	bpf_prog_load_fixup_attach_type(attr);
				1674	if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
				1675	return -EINVAL;
				1676
				1677	/* plain bpf_prog allocation */
				1678	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
				1679	if (!prog)
				1680	return -ENOMEM;
				1681
				1682	prog->expected_attach_type = attr->expected_attach_type;
				1683
				1684	prog->aux->offload_requested = !!attr->prog_ifindex;
				1685
				1686	err = security_bpf_prog_alloc(prog->aux);
				1687	if (err)
				1688	goto free_prog_nouncharge;
				1689
				1690	err = bpf_prog_charge_memlock(prog);
				1691	if (err)
				1692	goto free_prog_sec;
				1693
				1694	prog->len = attr->insn_cnt;
				1695
				1696	err = -EFAULT;
				1697	if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
				1698	bpf_prog_insn_size(prog)) != 0)
				1699	goto free_prog;
				1700
				1701	prog->orig_prog = NULL;
				1702	prog->jited = 0;
				1703
				1704	atomic_set(&prog->aux->refcnt, 1);
				1705	prog->gpl_compatible = is_gpl ? 1 : 0;
				1706
				1707	if (bpf_prog_is_dev_bound(prog->aux)) {
				1708	err = bpf_prog_offload_init(prog, attr);
				1709	if (err)
				1710	goto free_prog;
				1711	}
				1712
				1713	/* find program type: socket_filter vs tracing_filter */
				1714	err = find_prog_type(type, prog);
				1715	if (err < 0)
				1716	goto free_prog;
				1717
				1718	prog->aux->load_time = ktime_get_boottime_ns();
				1719	err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
				1720	if (err)
				1721	goto free_prog;
				1722
				1723	/* run eBPF verifier */
				1724	err = bpf_check(&prog, attr, uattr);
				1725	if (err < 0)
				1726	goto free_used_maps;
				1727
				1728	prog = bpf_prog_select_runtime(prog, &err);
				1729	if (err < 0)
				1730	goto free_used_maps;
				1731
				1732	err = bpf_prog_alloc_id(prog);
				1733	if (err)
				1734	goto free_used_maps;
				1735
				1736	/* Upon success of bpf_prog_alloc_id(), the BPF prog is
				1737	* effectively publicly exposed. However, retrieving via
				1738	* bpf_prog_get_fd_by_id() will take another reference,
				1739	* therefore it cannot be gone underneath us.
				1740	*
				1741	* Only for the time /after/ successful bpf_prog_new_fd()
				1742	* and before returning to userspace, we might just hold
				1743	* one reference and any parallel close on that fd could
				1744	* rip everything out. Hence, below notifications must
				1745	* happen before bpf_prog_new_fd().
				1746	*
				1747	* Also, any failure handling from this point onwards must
				1748	* be using bpf_prog_put() given the program is exposed.
				1749	*/
				1750	bpf_prog_kallsyms_add(prog);
				1751	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
				1752
				1753	err = bpf_prog_new_fd(prog);
				1754	if (err < 0)
				1755	bpf_prog_put(prog);
				1756	return err;
				1757
				1758	free_used_maps:
				1759	/* In case we have subprogs, we need to wait for a grace
				1760	* period before we can tear down JIT memory since symbols
				1761	* are already exposed under kallsyms.
				1762	*/
				1763	__bpf_prog_put_noref(prog, prog->aux->func_cnt);
				1764	return err;
				1765	free_prog:
				1766	bpf_prog_uncharge_memlock(prog);
				1767	free_prog_sec:
				1768	security_bpf_prog_free(prog->aux);
				1769	free_prog_nouncharge:
				1770	bpf_prog_free(prog);
				1771	return err;
				1772	}
				1773
				1774	#define BPF_OBJ_LAST_FIELD file_flags
				1775
				1776	static int bpf_obj_pin(const union bpf_attr *attr)
				1777	{
				1778	if (CHECK_ATTR(BPF_OBJ) \|\| attr->file_flags != 0)
				1779	return -EINVAL;
				1780
				1781	return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
				1782	}
				1783
				1784	static int bpf_obj_get(const union bpf_attr *attr)
				1785	{
				1786	if (CHECK_ATTR(BPF_OBJ) \|\| attr->bpf_fd != 0 \|\|
				1787	attr->file_flags & ~BPF_OBJ_FLAG_MASK)
				1788	return -EINVAL;
				1789
				1790	return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
				1791	attr->file_flags);
				1792	}
				1793
				1794	struct bpf_raw_tracepoint {
				1795	struct bpf_raw_event_map *btp;
				1796	struct bpf_prog *prog;
				1797	};
				1798
				1799	static int bpf_raw_tracepoint_release(struct inode inode, struct file filp)
				1800	{
				1801	struct bpf_raw_tracepoint *raw_tp = filp->private_data;
				1802
				1803	if (raw_tp->prog) {
				1804	bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
				1805	bpf_prog_put(raw_tp->prog);
				1806	}
				1807	bpf_put_raw_tracepoint(raw_tp->btp);
				1808	kfree(raw_tp);
				1809	return 0;
				1810	}
				1811
				1812	static const struct file_operations bpf_raw_tp_fops = {
				1813	.release = bpf_raw_tracepoint_release,
				1814	.read = bpf_dummy_read,
				1815	.write = bpf_dummy_write,
				1816	};
				1817
				1818	#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
				1819
				1820	static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
				1821	{
				1822	struct bpf_raw_tracepoint *raw_tp;
				1823	struct bpf_raw_event_map *btp;
				1824	struct bpf_prog *prog;
				1825	char tp_name[128];
				1826	int tp_fd, err;
				1827
				1828	if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
				1829	sizeof(tp_name) - 1) < 0)
				1830	return -EFAULT;
				1831	tp_name[sizeof(tp_name) - 1] = 0;
				1832
				1833	btp = bpf_get_raw_tracepoint(tp_name);
				1834	if (!btp)
				1835	return -ENOENT;
				1836
				1837	raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
				1838	if (!raw_tp) {
				1839	err = -ENOMEM;
				1840	goto out_put_btp;
				1841	}
				1842	raw_tp->btp = btp;
				1843
				1844	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
				1845	if (IS_ERR(prog)) {
				1846	err = PTR_ERR(prog);
				1847	goto out_free_tp;
				1848	}
				1849	if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
				1850	prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
				1851	err = -EINVAL;
				1852	goto out_put_prog;
				1853	}
				1854
				1855	err = bpf_probe_register(raw_tp->btp, prog);
				1856	if (err)
				1857	goto out_put_prog;
				1858
				1859	raw_tp->prog = prog;
				1860	tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
				1861	O_CLOEXEC);
				1862	if (tp_fd < 0) {
				1863	bpf_probe_unregister(raw_tp->btp, prog);
				1864	err = tp_fd;
				1865	goto out_put_prog;
				1866	}
				1867	return tp_fd;
				1868
				1869	out_put_prog:
				1870	bpf_prog_put(prog);
				1871	out_free_tp:
				1872	kfree(raw_tp);
				1873	out_put_btp:
				1874	bpf_put_raw_tracepoint(btp);
				1875	return err;
				1876	}
				1877
				1878	static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
				1879	enum bpf_attach_type attach_type)
				1880	{
				1881	switch (prog->type) {
				1882	case BPF_PROG_TYPE_CGROUP_SOCK:
				1883	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
				1884	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
				1885	return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
				1886	case BPF_PROG_TYPE_CGROUP_SKB:
				1887	return prog->enforce_expected_attach_type &&
				1888	prog->expected_attach_type != attach_type ?
				1889	-EINVAL : 0;
				1890	default:
				1891	return 0;
				1892	}
				1893	}
				1894
				1895	#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
				1896
				1897	#define BPF_F_ATTACH_MASK \
				1898	(BPF_F_ALLOW_OVERRIDE \| BPF_F_ALLOW_MULTI)
				1899
				1900	static int bpf_prog_attach(const union bpf_attr *attr)
				1901	{
				1902	enum bpf_prog_type ptype;
				1903	struct bpf_prog *prog;
				1904	int ret;
				1905
				1906	if (!capable(CAP_NET_ADMIN))
				1907	return -EPERM;
				1908
				1909	if (CHECK_ATTR(BPF_PROG_ATTACH))
				1910	return -EINVAL;
				1911
				1912	if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
				1913	return -EINVAL;
				1914
				1915	switch (attr->attach_type) {
				1916	case BPF_CGROUP_INET_INGRESS:
				1917	case BPF_CGROUP_INET_EGRESS:
				1918	ptype = BPF_PROG_TYPE_CGROUP_SKB;
				1919	break;
				1920	case BPF_CGROUP_INET_SOCK_CREATE:
				1921	case BPF_CGROUP_INET4_POST_BIND:
				1922	case BPF_CGROUP_INET6_POST_BIND:
				1923	ptype = BPF_PROG_TYPE_CGROUP_SOCK;
				1924	break;
				1925	case BPF_CGROUP_INET4_BIND:
				1926	case BPF_CGROUP_INET6_BIND:
				1927	case BPF_CGROUP_INET4_CONNECT:
				1928	case BPF_CGROUP_INET6_CONNECT:
				1929	case BPF_CGROUP_UDP4_SENDMSG:
				1930	case BPF_CGROUP_UDP6_SENDMSG:
				1931	case BPF_CGROUP_UDP4_RECVMSG:
				1932	case BPF_CGROUP_UDP6_RECVMSG:
				1933	ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
				1934	break;
				1935	case BPF_CGROUP_SOCK_OPS:
				1936	ptype = BPF_PROG_TYPE_SOCK_OPS;
				1937	break;
				1938	case BPF_CGROUP_DEVICE:
				1939	ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
				1940	break;
				1941	case BPF_SK_MSG_VERDICT:
				1942	ptype = BPF_PROG_TYPE_SK_MSG;
				1943	break;
				1944	case BPF_SK_SKB_STREAM_PARSER:
				1945	case BPF_SK_SKB_STREAM_VERDICT:
				1946	ptype = BPF_PROG_TYPE_SK_SKB;
				1947	break;
				1948	case BPF_LIRC_MODE2:
				1949	ptype = BPF_PROG_TYPE_LIRC_MODE2;
				1950	break;
				1951	case BPF_FLOW_DISSECTOR:
				1952	ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
				1953	break;
				1954	case BPF_CGROUP_SYSCTL:
				1955	ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
				1956	break;
				1957	case BPF_CGROUP_GETSOCKOPT:
				1958	case BPF_CGROUP_SETSOCKOPT:
				1959	ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
				1960	break;
				1961	default:
				1962	return -EINVAL;
				1963	}
				1964
				1965	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
				1966	if (IS_ERR(prog))
				1967	return PTR_ERR(prog);
				1968
				1969	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
				1970	bpf_prog_put(prog);
				1971	return -EINVAL;
				1972	}
				1973
				1974	switch (ptype) {
				1975	case BPF_PROG_TYPE_SK_SKB:
				1976	case BPF_PROG_TYPE_SK_MSG:
				1977	ret = sock_map_get_from_fd(attr, prog);
				1978	break;
				1979	case BPF_PROG_TYPE_LIRC_MODE2:
				1980	ret = lirc_prog_attach(attr, prog);
				1981	break;
				1982	case BPF_PROG_TYPE_FLOW_DISSECTOR:
				1983	ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
				1984	break;
				1985	default:
				1986	ret = cgroup_bpf_prog_attach(attr, ptype, prog);
				1987	}
				1988
				1989	if (ret)
				1990	bpf_prog_put(prog);
				1991	return ret;
				1992	}
				1993
				1994	#define BPF_PROG_DETACH_LAST_FIELD attach_type
				1995
				1996	static int bpf_prog_detach(const union bpf_attr *attr)
				1997	{
				1998	enum bpf_prog_type ptype;
				1999
				2000	if (!capable(CAP_NET_ADMIN))
				2001	return -EPERM;
				2002
				2003	if (CHECK_ATTR(BPF_PROG_DETACH))
				2004	return -EINVAL;
				2005
				2006	switch (attr->attach_type) {
				2007	case BPF_CGROUP_INET_INGRESS:
				2008	case BPF_CGROUP_INET_EGRESS:
				2009	ptype = BPF_PROG_TYPE_CGROUP_SKB;
				2010	break;
				2011	case BPF_CGROUP_INET_SOCK_CREATE:
				2012	case BPF_CGROUP_INET4_POST_BIND:
				2013	case BPF_CGROUP_INET6_POST_BIND:
				2014	ptype = BPF_PROG_TYPE_CGROUP_SOCK;
				2015	break;
				2016	case BPF_CGROUP_INET4_BIND:
				2017	case BPF_CGROUP_INET6_BIND:
				2018	case BPF_CGROUP_INET4_CONNECT:
				2019	case BPF_CGROUP_INET6_CONNECT:
				2020	case BPF_CGROUP_UDP4_SENDMSG:
				2021	case BPF_CGROUP_UDP6_SENDMSG:
				2022	case BPF_CGROUP_UDP4_RECVMSG:
				2023	case BPF_CGROUP_UDP6_RECVMSG:
				2024	ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
				2025	break;
				2026	case BPF_CGROUP_SOCK_OPS:
				2027	ptype = BPF_PROG_TYPE_SOCK_OPS;
				2028	break;
				2029	case BPF_CGROUP_DEVICE:
				2030	ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
				2031	break;
				2032	case BPF_SK_MSG_VERDICT:
				2033	return sock_map_prog_detach(attr, BPF_PROG_TYPE_SK_MSG);
				2034	case BPF_SK_SKB_STREAM_PARSER:
				2035	case BPF_SK_SKB_STREAM_VERDICT:
				2036	return sock_map_prog_detach(attr, BPF_PROG_TYPE_SK_SKB);
				2037	case BPF_LIRC_MODE2:
				2038	return lirc_prog_detach(attr);
				2039	case BPF_FLOW_DISSECTOR:
				2040	return skb_flow_dissector_bpf_prog_detach(attr);
				2041	case BPF_CGROUP_SYSCTL:
				2042	ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
				2043	break;
				2044	case BPF_CGROUP_GETSOCKOPT:
				2045	case BPF_CGROUP_SETSOCKOPT:
				2046	ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
				2047	break;
				2048	default:
				2049	return -EINVAL;
				2050	}
				2051
				2052	return cgroup_bpf_prog_detach(attr, ptype);
				2053	}
				2054
				2055	#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
				2056
				2057	static int bpf_prog_query(const union bpf_attr *attr,
				2058	union bpf_attr __user *uattr)
				2059	{
				2060	if (!capable(CAP_NET_ADMIN))
				2061	return -EPERM;
				2062	if (CHECK_ATTR(BPF_PROG_QUERY))
				2063	return -EINVAL;
				2064	if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
				2065	return -EINVAL;
				2066
				2067	switch (attr->query.attach_type) {
				2068	case BPF_CGROUP_INET_INGRESS:
				2069	case BPF_CGROUP_INET_EGRESS:
				2070	case BPF_CGROUP_INET_SOCK_CREATE:
				2071	case BPF_CGROUP_INET4_BIND:
				2072	case BPF_CGROUP_INET6_BIND:
				2073	case BPF_CGROUP_INET4_POST_BIND:
				2074	case BPF_CGROUP_INET6_POST_BIND:
				2075	case BPF_CGROUP_INET4_CONNECT:
				2076	case BPF_CGROUP_INET6_CONNECT:
				2077	case BPF_CGROUP_UDP4_SENDMSG:
				2078	case BPF_CGROUP_UDP6_SENDMSG:
				2079	case BPF_CGROUP_UDP4_RECVMSG:
				2080	case BPF_CGROUP_UDP6_RECVMSG:
				2081	case BPF_CGROUP_SOCK_OPS:
				2082	case BPF_CGROUP_DEVICE:
				2083	case BPF_CGROUP_SYSCTL:
				2084	case BPF_CGROUP_GETSOCKOPT:
				2085	case BPF_CGROUP_SETSOCKOPT:
				2086	break;
				2087	case BPF_LIRC_MODE2:
				2088	return lirc_prog_query(attr, uattr);
				2089	case BPF_FLOW_DISSECTOR:
				2090	return skb_flow_dissector_prog_query(attr, uattr);
				2091	default:
				2092	return -EINVAL;
				2093	}
				2094
				2095	return cgroup_bpf_prog_query(attr, uattr);
				2096	}
				2097
				2098	#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
				2099
				2100	static int bpf_prog_test_run(const union bpf_attr *attr,
				2101	union bpf_attr __user *uattr)
				2102	{
				2103	struct bpf_prog *prog;
				2104	int ret = -ENOTSUPP;
				2105
				2106	if (!capable(CAP_SYS_ADMIN))
				2107	return -EPERM;
				2108	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
				2109	return -EINVAL;
				2110
				2111	if ((attr->test.ctx_size_in && !attr->test.ctx_in) \|\|
				2112	(!attr->test.ctx_size_in && attr->test.ctx_in))
				2113	return -EINVAL;
				2114
				2115	if ((attr->test.ctx_size_out && !attr->test.ctx_out) \|\|
				2116	(!attr->test.ctx_size_out && attr->test.ctx_out))
				2117	return -EINVAL;
				2118
				2119	prog = bpf_prog_get(attr->test.prog_fd);
				2120	if (IS_ERR(prog))
				2121	return PTR_ERR(prog);
				2122
				2123	if (prog->aux->ops->test_run)
				2124	ret = prog->aux->ops->test_run(prog, attr, uattr);
				2125
				2126	bpf_prog_put(prog);
				2127	return ret;
				2128	}
				2129
				2130	#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
				2131
				2132	static int bpf_obj_get_next_id(const union bpf_attr *attr,
				2133	union bpf_attr __user *uattr,
				2134	struct idr *idr,
				2135	spinlock_t *lock)
				2136	{
				2137	u32 next_id = attr->start_id;
				2138	int err = 0;
				2139
				2140	if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) \|\| next_id >= INT_MAX)
				2141	return -EINVAL;
				2142
				2143	if (!capable(CAP_SYS_ADMIN))
				2144	return -EPERM;
				2145
				2146	next_id++;
				2147	spin_lock_bh(lock);
				2148	if (!idr_get_next(idr, &next_id))
				2149	err = -ENOENT;
				2150	spin_unlock_bh(lock);
				2151
				2152	if (!err)
				2153	err = put_user(next_id, &uattr->next_id);
				2154
				2155	return err;
				2156	}
				2157
				2158	#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
				2159
				2160	static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
				2161	{
				2162	struct bpf_prog *prog;
				2163	u32 id = attr->prog_id;
				2164	int fd;
				2165
				2166	if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
				2167	return -EINVAL;
				2168
				2169	if (!capable(CAP_SYS_ADMIN))
				2170	return -EPERM;
				2171
				2172	spin_lock_bh(&prog_idr_lock);
				2173	prog = idr_find(&prog_idr, id);
				2174	if (prog)
				2175	prog = bpf_prog_inc_not_zero(prog);
				2176	else
				2177	prog = ERR_PTR(-ENOENT);
				2178	spin_unlock_bh(&prog_idr_lock);
				2179
				2180	if (IS_ERR(prog))
				2181	return PTR_ERR(prog);
				2182
				2183	fd = bpf_prog_new_fd(prog);
				2184	if (fd < 0)
				2185	bpf_prog_put(prog);
				2186
				2187	return fd;
				2188	}
				2189
				2190	#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
				2191
				2192	static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
				2193	{
				2194	struct bpf_map *map;
				2195	u32 id = attr->map_id;
				2196	int f_flags;
				2197	int fd;
				2198
				2199	if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) \|\|
				2200	attr->open_flags & ~BPF_OBJ_FLAG_MASK)
				2201	return -EINVAL;
				2202
				2203	if (!capable(CAP_SYS_ADMIN))
				2204	return -EPERM;
				2205
				2206	f_flags = bpf_get_file_flag(attr->open_flags);
				2207	if (f_flags < 0)
				2208	return f_flags;
				2209
				2210	spin_lock_bh(&map_idr_lock);
				2211	map = idr_find(&map_idr, id);
				2212	if (map)
				2213	map = __bpf_map_inc_not_zero(map, true);
				2214	else
				2215	map = ERR_PTR(-ENOENT);
				2216	spin_unlock_bh(&map_idr_lock);
				2217
				2218	if (IS_ERR(map))
				2219	return PTR_ERR(map);
				2220
				2221	fd = bpf_map_new_fd(map, f_flags);
				2222	if (fd < 0)
				2223	bpf_map_put_with_uref(map);
				2224
				2225	return fd;
				2226	}
				2227
				2228	static const struct bpf_map bpf_map_from_imm(const struct bpf_prog prog,
				2229	unsigned long addr, u32 *off,
				2230	u32 *type)
				2231	{
				2232	const struct bpf_map *map;
				2233	int i;
				2234
				2235	for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
				2236	map = prog->aux->used_maps[i];
				2237	if (map == (void *)addr) {
				2238	*type = BPF_PSEUDO_MAP_FD;
				2239	return map;
				2240	}
				2241	if (!map->ops->map_direct_value_meta)
				2242	continue;
				2243	if (!map->ops->map_direct_value_meta(map, addr, off)) {
				2244	*type = BPF_PSEUDO_MAP_VALUE;
				2245	return map;
				2246	}
				2247	}
				2248
				2249	return NULL;
				2250	}
				2251
				2252	static struct bpf_insn bpf_insn_prepare_dump(const struct bpf_prog prog,
				2253	const struct cred *f_cred)
				2254	{
				2255	const struct bpf_map *map;
				2256	struct bpf_insn *insns;
				2257	u32 off, type;
				2258	u64 imm;
				2259	int i;
				2260
				2261	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
				2262	GFP_USER);
				2263	if (!insns)
				2264	return insns;
				2265
				2266	for (i = 0; i < prog->len; i++) {
				2267	if (insns[i].code == (BPF_JMP \| BPF_TAIL_CALL)) {
				2268	insns[i].code = BPF_JMP \| BPF_CALL;
				2269	insns[i].imm = BPF_FUNC_tail_call;
				2270	/* fall-through */
				2271	}
				2272	if (insns[i].code == (BPF_JMP \| BPF_CALL) \|\|
				2273	insns[i].code == (BPF_JMP \| BPF_CALL_ARGS)) {
				2274	if (insns[i].code == (BPF_JMP \| BPF_CALL_ARGS))
				2275	insns[i].code = BPF_JMP \| BPF_CALL;
				2276	if (!bpf_dump_raw_ok(f_cred))
				2277	insns[i].imm = 0;
				2278	continue;
				2279	}
				2280
				2281	if (insns[i].code != (BPF_LD \| BPF_IMM \| BPF_DW))
				2282	continue;
				2283
				2284	imm = ((u64)insns[i + 1].imm << 32) \| (u32)insns[i].imm;
				2285	map = bpf_map_from_imm(prog, imm, &off, &type);
				2286	if (map) {
				2287	insns[i].src_reg = type;
				2288	insns[i].imm = map->id;
				2289	insns[i + 1].imm = off;
				2290	continue;
				2291	}
				2292	}
				2293
				2294	return insns;
				2295	}
				2296
				2297	static int set_info_rec_size(struct bpf_prog_info *info)
				2298	{
				2299	/*
				2300	* Ensure info.*_rec_size is the same as kernel expected size
				2301	*
				2302	* or
				2303	*
				2304	* Only allow zero *_rec_size if both _rec_size and _cnt are
				2305	* zero. In this case, the kernel will set the expected
				2306	* _rec_size back to the info.
				2307	*/
				2308
				2309	if ((info->nr_func_info \|\| info->func_info_rec_size) &&
				2310	info->func_info_rec_size != sizeof(struct bpf_func_info))
				2311	return -EINVAL;
				2312
				2313	if ((info->nr_line_info \|\| info->line_info_rec_size) &&
				2314	info->line_info_rec_size != sizeof(struct bpf_line_info))
				2315	return -EINVAL;
				2316
				2317	if ((info->nr_jited_line_info \|\| info->jited_line_info_rec_size) &&
				2318	info->jited_line_info_rec_size != sizeof(__u64))
				2319	return -EINVAL;
				2320
				2321	info->func_info_rec_size = sizeof(struct bpf_func_info);
				2322	info->line_info_rec_size = sizeof(struct bpf_line_info);
				2323	info->jited_line_info_rec_size = sizeof(__u64);
				2324
				2325	return 0;
				2326	}
				2327
				2328	static int bpf_prog_get_info_by_fd(struct file *file,
				2329	struct bpf_prog *prog,
				2330	const union bpf_attr *attr,
				2331	union bpf_attr __user *uattr)
				2332	{
				2333	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
				2334	struct bpf_prog_info info;
				2335	u32 info_len = attr->info.info_len;
				2336	struct bpf_prog_stats stats;
				2337	char __user *uinsns;
				2338	u32 ulen;
				2339	int err;
				2340
				2341	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
				2342	if (err)
				2343	return err;
				2344	info_len = min_t(u32, sizeof(info), info_len);
				2345
				2346	memset(&info, 0, sizeof(info));
				2347	if (copy_from_user(&info, uinfo, info_len))
				2348	return -EFAULT;
				2349
				2350	info.type = prog->type;
				2351	info.id = prog->aux->id;
				2352	info.load_time = prog->aux->load_time;
				2353	info.created_by_uid = from_kuid_munged(current_user_ns(),
				2354	prog->aux->user->uid);
				2355	info.gpl_compatible = prog->gpl_compatible;
				2356
				2357	memcpy(info.tag, prog->tag, sizeof(prog->tag));
				2358	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
				2359
				2360	ulen = info.nr_map_ids;
				2361	info.nr_map_ids = prog->aux->used_map_cnt;
				2362	ulen = min_t(u32, info.nr_map_ids, ulen);
				2363	if (ulen) {
				2364	u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
				2365	u32 i;
				2366
				2367	for (i = 0; i < ulen; i++)
				2368	if (put_user(prog->aux->used_maps[i]->id,
				2369	&user_map_ids[i]))
				2370	return -EFAULT;
				2371	}
				2372
				2373	err = set_info_rec_size(&info);
				2374	if (err)
				2375	return err;
				2376
				2377	bpf_prog_get_stats(prog, &stats);
				2378	info.run_time_ns = stats.nsecs;
				2379	info.run_cnt = stats.cnt;
				2380
				2381	if (!capable(CAP_SYS_ADMIN)) {
				2382	info.jited_prog_len = 0;
				2383	info.xlated_prog_len = 0;
				2384	info.nr_jited_ksyms = 0;
				2385	info.nr_jited_func_lens = 0;
				2386	info.nr_func_info = 0;
				2387	info.nr_line_info = 0;
				2388	info.nr_jited_line_info = 0;
				2389	goto done;
				2390	}
				2391
				2392	ulen = info.xlated_prog_len;
				2393	info.xlated_prog_len = bpf_prog_insn_size(prog);
				2394	if (info.xlated_prog_len && ulen) {
				2395	struct bpf_insn *insns_sanitized;
				2396	bool fault;
				2397
				2398	if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
				2399	info.xlated_prog_insns = 0;
				2400	goto done;
				2401	}
				2402	insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
				2403	if (!insns_sanitized)
				2404	return -ENOMEM;
				2405	uinsns = u64_to_user_ptr(info.xlated_prog_insns);
				2406	ulen = min_t(u32, info.xlated_prog_len, ulen);
				2407	fault = copy_to_user(uinsns, insns_sanitized, ulen);
				2408	kfree(insns_sanitized);
				2409	if (fault)
				2410	return -EFAULT;
				2411	}
				2412
				2413	if (bpf_prog_is_dev_bound(prog->aux)) {
				2414	err = bpf_prog_offload_info_fill(&info, prog);
				2415	if (err)
				2416	return err;
				2417	goto done;
				2418	}
				2419
				2420	/* NOTE: the following code is supposed to be skipped for offload.
				2421	* bpf_prog_offload_info_fill() is the place to fill similar fields
				2422	* for offload.
				2423	*/
				2424	ulen = info.jited_prog_len;
				2425	if (prog->aux->func_cnt) {
				2426	u32 i;
				2427
				2428	info.jited_prog_len = 0;
				2429	for (i = 0; i < prog->aux->func_cnt; i++)
				2430	info.jited_prog_len += prog->aux->func[i]->jited_len;
				2431	} else {
				2432	info.jited_prog_len = prog->jited_len;
				2433	}
				2434
				2435	if (info.jited_prog_len && ulen) {
				2436	if (bpf_dump_raw_ok(file->f_cred)) {
				2437	uinsns = u64_to_user_ptr(info.jited_prog_insns);
				2438	ulen = min_t(u32, info.jited_prog_len, ulen);
				2439
				2440	/* for multi-function programs, copy the JITed
				2441	* instructions for all the functions
				2442	*/
				2443	if (prog->aux->func_cnt) {
				2444	u32 len, free, i;
				2445	u8 *img;
				2446
				2447	free = ulen;
				2448	for (i = 0; i < prog->aux->func_cnt; i++) {
				2449	len = prog->aux->func[i]->jited_len;
				2450	len = min_t(u32, len, free);
				2451	img = (u8 *) prog->aux->func[i]->bpf_func;
				2452	if (copy_to_user(uinsns, img, len))
				2453	return -EFAULT;
				2454	uinsns += len;
				2455	free -= len;
				2456	if (!free)
				2457	break;
				2458	}
				2459	} else {
				2460	if (copy_to_user(uinsns, prog->bpf_func, ulen))
				2461	return -EFAULT;
				2462	}
				2463	} else {
				2464	info.jited_prog_insns = 0;
				2465	}
				2466	}
				2467
				2468	ulen = info.nr_jited_ksyms;
				2469	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
				2470	if (ulen) {
				2471	if (bpf_dump_raw_ok(file->f_cred)) {
				2472	unsigned long ksym_addr;
				2473	u64 __user *user_ksyms;
				2474	u32 i;
				2475
				2476	/* copy the address of the kernel symbol
				2477	* corresponding to each function
				2478	*/
				2479	ulen = min_t(u32, info.nr_jited_ksyms, ulen);
				2480	user_ksyms = u64_to_user_ptr(info.jited_ksyms);
				2481	if (prog->aux->func_cnt) {
				2482	for (i = 0; i < ulen; i++) {
				2483	ksym_addr = (unsigned long)
				2484	prog->aux->func[i]->bpf_func;
				2485	if (put_user((u64) ksym_addr,
				2486	&user_ksyms[i]))
				2487	return -EFAULT;
				2488	}
				2489	} else {
				2490	ksym_addr = (unsigned long) prog->bpf_func;
				2491	if (put_user((u64) ksym_addr, &user_ksyms[0]))
				2492	return -EFAULT;
				2493	}
				2494	} else {
				2495	info.jited_ksyms = 0;
				2496	}
				2497	}
				2498
				2499	ulen = info.nr_jited_func_lens;
				2500	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
				2501	if (ulen) {
				2502	if (bpf_dump_raw_ok(file->f_cred)) {
				2503	u32 __user *user_lens;
				2504	u32 func_len, i;
				2505
				2506	/* copy the JITed image lengths for each function */
				2507	ulen = min_t(u32, info.nr_jited_func_lens, ulen);
				2508	user_lens = u64_to_user_ptr(info.jited_func_lens);
				2509	if (prog->aux->func_cnt) {
				2510	for (i = 0; i < ulen; i++) {
				2511	func_len =
				2512	prog->aux->func[i]->jited_len;
				2513	if (put_user(func_len, &user_lens[i]))
				2514	return -EFAULT;
				2515	}
				2516	} else {
				2517	func_len = prog->jited_len;
				2518	if (put_user(func_len, &user_lens[0]))
				2519	return -EFAULT;
				2520	}
				2521	} else {
				2522	info.jited_func_lens = 0;
				2523	}
				2524	}
				2525
				2526	if (prog->aux->btf)
				2527	info.btf_id = btf_id(prog->aux->btf);
				2528
				2529	ulen = info.nr_func_info;
				2530	info.nr_func_info = prog->aux->func_info_cnt;
				2531	if (info.nr_func_info && ulen) {
				2532	char __user *user_finfo;
				2533
				2534	user_finfo = u64_to_user_ptr(info.func_info);
				2535	ulen = min_t(u32, info.nr_func_info, ulen);
				2536	if (copy_to_user(user_finfo, prog->aux->func_info,
				2537	info.func_info_rec_size * ulen))
				2538	return -EFAULT;
				2539	}
				2540
				2541	ulen = info.nr_line_info;
				2542	info.nr_line_info = prog->aux->nr_linfo;
				2543	if (info.nr_line_info && ulen) {
				2544	__u8 __user *user_linfo;
				2545
				2546	user_linfo = u64_to_user_ptr(info.line_info);
				2547	ulen = min_t(u32, info.nr_line_info, ulen);
				2548	if (copy_to_user(user_linfo, prog->aux->linfo,
				2549	info.line_info_rec_size * ulen))
				2550	return -EFAULT;
				2551	}
				2552
				2553	ulen = info.nr_jited_line_info;
				2554	if (prog->aux->jited_linfo)
				2555	info.nr_jited_line_info = prog->aux->nr_linfo;
				2556	else
				2557	info.nr_jited_line_info = 0;
				2558	if (info.nr_jited_line_info && ulen) {
				2559	if (bpf_dump_raw_ok(file->f_cred)) {
				2560	__u64 __user *user_linfo;
				2561	u32 i;
				2562
				2563	user_linfo = u64_to_user_ptr(info.jited_line_info);
				2564	ulen = min_t(u32, info.nr_jited_line_info, ulen);
				2565	for (i = 0; i < ulen; i++) {
				2566	if (put_user((__u64)(long)prog->aux->jited_linfo[i],
				2567	&user_linfo[i]))
				2568	return -EFAULT;
				2569	}
				2570	} else {
				2571	info.jited_line_info = 0;
				2572	}
				2573	}
				2574
				2575	ulen = info.nr_prog_tags;
				2576	info.nr_prog_tags = prog->aux->func_cnt ? : 1;
				2577	if (ulen) {
				2578	__u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
				2579	u32 i;
				2580
				2581	user_prog_tags = u64_to_user_ptr(info.prog_tags);
				2582	ulen = min_t(u32, info.nr_prog_tags, ulen);
				2583	if (prog->aux->func_cnt) {
				2584	for (i = 0; i < ulen; i++) {
				2585	if (copy_to_user(user_prog_tags[i],
				2586	prog->aux->func[i]->tag,
				2587	BPF_TAG_SIZE))
				2588	return -EFAULT;
				2589	}
				2590	} else {
				2591	if (copy_to_user(user_prog_tags[0],
				2592	prog->tag, BPF_TAG_SIZE))
				2593	return -EFAULT;
				2594	}
				2595	}
				2596
				2597	done:
				2598	if (copy_to_user(uinfo, &info, info_len) \|\|
				2599	put_user(info_len, &uattr->info.info_len))
				2600	return -EFAULT;
				2601
				2602	return 0;
				2603	}
				2604
				2605	static int bpf_map_get_info_by_fd(struct file *file,
				2606	struct bpf_map *map,
				2607	const union bpf_attr *attr,
				2608	union bpf_attr __user *uattr)
				2609	{
				2610	struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
				2611	struct bpf_map_info info;
				2612	u32 info_len = attr->info.info_len;
				2613	int err;
				2614
				2615	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
				2616	if (err)
				2617	return err;
				2618	info_len = min_t(u32, sizeof(info), info_len);
				2619
				2620	memset(&info, 0, sizeof(info));
				2621	info.type = map->map_type;
				2622	info.id = map->id;
				2623	info.key_size = map->key_size;
				2624	info.value_size = map->value_size;
				2625	info.max_entries = map->max_entries;
				2626	info.map_flags = map->map_flags;
				2627	memcpy(info.name, map->name, sizeof(map->name));
				2628
				2629	if (map->btf) {
				2630	info.btf_id = btf_id(map->btf);
				2631	info.btf_key_type_id = map->btf_key_type_id;
				2632	info.btf_value_type_id = map->btf_value_type_id;
				2633	}
				2634
				2635	if (bpf_map_is_dev_bound(map)) {
				2636	err = bpf_map_offload_info_fill(&info, map);
				2637	if (err)
				2638	return err;
				2639	}
				2640
				2641	if (copy_to_user(uinfo, &info, info_len) \|\|
				2642	put_user(info_len, &uattr->info.info_len))
				2643	return -EFAULT;
				2644
				2645	return 0;
				2646	}
				2647
				2648	static int bpf_btf_get_info_by_fd(struct file *file,
				2649	struct btf *btf,
				2650	const union bpf_attr *attr,
				2651	union bpf_attr __user *uattr)
				2652	{
				2653	struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
				2654	u32 info_len = attr->info.info_len;
				2655	int err;
				2656
				2657	err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
				2658	if (err)
				2659	return err;
				2660
				2661	return btf_get_info_by_fd(btf, attr, uattr);
				2662	}
				2663
				2664	#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
				2665
				2666	static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
				2667	union bpf_attr __user *uattr)
				2668	{
				2669	int ufd = attr->info.bpf_fd;
				2670	struct fd f;
				2671	int err;
				2672
				2673	if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
				2674	return -EINVAL;
				2675
				2676	f = fdget(ufd);
				2677	if (!f.file)
				2678	return -EBADFD;
				2679
				2680	if (f.file->f_op == &bpf_prog_fops)
				2681	err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
				2682	uattr);
				2683	else if (f.file->f_op == &bpf_map_fops)
				2684	err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
				2685	uattr);
				2686	else if (f.file->f_op == &btf_fops)
				2687	err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
				2688	else
				2689	err = -EINVAL;
				2690
				2691	fdput(f);
				2692	return err;
				2693	}
				2694
				2695	#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
				2696
				2697	static int bpf_btf_load(const union bpf_attr *attr)
				2698	{
				2699	if (CHECK_ATTR(BPF_BTF_LOAD))
				2700	return -EINVAL;
				2701
				2702	if (!capable(CAP_SYS_ADMIN))
				2703	return -EPERM;
				2704
				2705	return btf_new_fd(attr);
				2706	}
				2707
				2708	#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
				2709
				2710	static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
				2711	{
				2712	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
				2713	return -EINVAL;
				2714
				2715	if (!capable(CAP_SYS_ADMIN))
				2716	return -EPERM;
				2717
				2718	return btf_get_fd_by_id(attr->btf_id);
				2719	}
				2720
				2721	static int bpf_task_fd_query_copy(const union bpf_attr *attr,
				2722	union bpf_attr __user *uattr,
				2723	u32 prog_id, u32 fd_type,
				2724	const char *buf, u64 probe_offset,
				2725	u64 probe_addr)
				2726	{
				2727	char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
				2728	u32 len = buf ? strlen(buf) : 0, input_len;
				2729	int err = 0;
				2730
				2731	if (put_user(len, &uattr->task_fd_query.buf_len))
				2732	return -EFAULT;
				2733	input_len = attr->task_fd_query.buf_len;
				2734	if (input_len && ubuf) {
				2735	if (!len) {
				2736	/* nothing to copy, just make ubuf NULL terminated */
				2737	char zero = '\0';
				2738
				2739	if (put_user(zero, ubuf))
				2740	return -EFAULT;
				2741	} else if (input_len >= len + 1) {
				2742	/* ubuf can hold the string with NULL terminator */
				2743	if (copy_to_user(ubuf, buf, len + 1))
				2744	return -EFAULT;
				2745	} else {
				2746	/* ubuf cannot hold the string with NULL terminator,
				2747	* do a partial copy with NULL terminator.
				2748	*/
				2749	char zero = '\0';
				2750
				2751	err = -ENOSPC;
				2752	if (copy_to_user(ubuf, buf, input_len - 1))
				2753	return -EFAULT;
				2754	if (put_user(zero, ubuf + input_len - 1))
				2755	return -EFAULT;
				2756	}
				2757	}
				2758
				2759	if (put_user(prog_id, &uattr->task_fd_query.prog_id) \|\|
				2760	put_user(fd_type, &uattr->task_fd_query.fd_type) \|\|
				2761	put_user(probe_offset, &uattr->task_fd_query.probe_offset) \|\|
				2762	put_user(probe_addr, &uattr->task_fd_query.probe_addr))
				2763	return -EFAULT;
				2764
				2765	return err;
				2766	}
				2767
				2768	#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
				2769
				2770	static int bpf_task_fd_query(const union bpf_attr *attr,
				2771	union bpf_attr __user *uattr)
				2772	{
				2773	pid_t pid = attr->task_fd_query.pid;
				2774	u32 fd = attr->task_fd_query.fd;
				2775	const struct perf_event *event;
				2776	struct files_struct *files;
				2777	struct task_struct *task;
				2778	struct file *file;
				2779	int err;
				2780
				2781	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
				2782	return -EINVAL;
				2783
				2784	if (!capable(CAP_SYS_ADMIN))
				2785	return -EPERM;
				2786
				2787	if (attr->task_fd_query.flags != 0)
				2788	return -EINVAL;
				2789
				2790	rcu_read_lock();
				2791	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
				2792	rcu_read_unlock();
				2793	if (!task)
				2794	return -ENOENT;
				2795
				2796	files = get_files_struct(task);
				2797	put_task_struct(task);
				2798	if (!files)
				2799	return -ENOENT;
				2800
				2801	err = 0;
				2802	spin_lock(&files->file_lock);
				2803	file = fcheck_files(files, fd);
				2804	if (!file)
				2805	err = -EBADF;
				2806	else
				2807	get_file(file);
				2808	spin_unlock(&files->file_lock);
				2809	put_files_struct(files);
				2810
				2811	if (err)
				2812	goto out;
				2813
				2814	if (file->f_op == &bpf_raw_tp_fops) {
				2815	struct bpf_raw_tracepoint *raw_tp = file->private_data;
				2816	struct bpf_raw_event_map *btp = raw_tp->btp;
				2817
				2818	err = bpf_task_fd_query_copy(attr, uattr,
				2819	raw_tp->prog->aux->id,
				2820	BPF_FD_TYPE_RAW_TRACEPOINT,
				2821	btp->tp->name, 0, 0);
				2822	goto put_file;
				2823	}
				2824
				2825	event = perf_get_event(file);
				2826	if (!IS_ERR(event)) {
				2827	u64 probe_offset, probe_addr;
				2828	u32 prog_id, fd_type;
				2829	const char *buf;
				2830
				2831	err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
				2832	&buf, &probe_offset,
				2833	&probe_addr);
				2834	if (!err)
				2835	err = bpf_task_fd_query_copy(attr, uattr, prog_id,
				2836	fd_type, buf,
				2837	probe_offset,
				2838	probe_addr);
				2839	goto put_file;
				2840	}
				2841
				2842	err = -ENOTSUPP;
				2843	put_file:
				2844	fput(file);
				2845	out:
				2846	return err;
				2847	}
				2848
				2849	SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
				2850	{
				2851	union bpf_attr attr;
				2852	int err;
				2853
				2854	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
				2855	return -EPERM;
				2856
				2857	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
				2858	if (err)
				2859	return err;
				2860	size = min_t(u32, size, sizeof(attr));
				2861
				2862	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
				2863	memset(&attr, 0, sizeof(attr));
				2864	if (copy_from_user(&attr, uattr, size) != 0)
				2865	return -EFAULT;
				2866
				2867	err = security_bpf(cmd, &attr, size);
				2868	if (err < 0)
				2869	return err;
				2870
				2871	switch (cmd) {
				2872	case BPF_MAP_CREATE:
				2873	err = map_create(&attr);
				2874	break;
				2875	case BPF_MAP_LOOKUP_ELEM:
				2876	err = map_lookup_elem(&attr);
				2877	break;
				2878	case BPF_MAP_UPDATE_ELEM:
				2879	err = map_update_elem(&attr);
				2880	break;
				2881	case BPF_MAP_DELETE_ELEM:
				2882	err = map_delete_elem(&attr);
				2883	break;
				2884	case BPF_MAP_GET_NEXT_KEY:
				2885	err = map_get_next_key(&attr);
				2886	break;
				2887	case BPF_MAP_FREEZE:
				2888	err = map_freeze(&attr);
				2889	break;
				2890	case BPF_PROG_LOAD:
				2891	err = bpf_prog_load(&attr, uattr);
				2892	break;
				2893	case BPF_OBJ_PIN:
				2894	err = bpf_obj_pin(&attr);
				2895	break;
				2896	case BPF_OBJ_GET:
				2897	err = bpf_obj_get(&attr);
				2898	break;
				2899	case BPF_PROG_ATTACH:
				2900	err = bpf_prog_attach(&attr);
				2901	break;
				2902	case BPF_PROG_DETACH:
				2903	err = bpf_prog_detach(&attr);
				2904	break;
				2905	case BPF_PROG_QUERY:
				2906	err = bpf_prog_query(&attr, uattr);
				2907	break;
				2908	case BPF_PROG_TEST_RUN:
				2909	err = bpf_prog_test_run(&attr, uattr);
				2910	break;
				2911	case BPF_PROG_GET_NEXT_ID:
				2912	err = bpf_obj_get_next_id(&attr, uattr,
				2913	&prog_idr, &prog_idr_lock);
				2914	break;
				2915	case BPF_MAP_GET_NEXT_ID:
				2916	err = bpf_obj_get_next_id(&attr, uattr,
				2917	&map_idr, &map_idr_lock);
				2918	break;
				2919	case BPF_BTF_GET_NEXT_ID:
				2920	err = bpf_obj_get_next_id(&attr, uattr,
				2921	&btf_idr, &btf_idr_lock);
				2922	break;
				2923	case BPF_PROG_GET_FD_BY_ID:
				2924	err = bpf_prog_get_fd_by_id(&attr);
				2925	break;
				2926	case BPF_MAP_GET_FD_BY_ID:
				2927	err = bpf_map_get_fd_by_id(&attr);
				2928	break;
				2929	case BPF_OBJ_GET_INFO_BY_FD:
				2930	err = bpf_obj_get_info_by_fd(&attr, uattr);
				2931	break;
				2932	case BPF_RAW_TRACEPOINT_OPEN:
				2933	err = bpf_raw_tracepoint_open(&attr);
				2934	break;
				2935	case BPF_BTF_LOAD:
				2936	err = bpf_btf_load(&attr);
				2937	break;
				2938	case BPF_BTF_GET_FD_BY_ID:
				2939	err = bpf_btf_get_fd_by_id(&attr);
				2940	break;
				2941	case BPF_TASK_FD_QUERY:
				2942	err = bpf_task_fd_query(&attr, uattr);
				2943	break;
				2944	case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
				2945	err = map_lookup_and_delete_elem(&attr);
				2946	break;
				2947	default:
				2948	err = -EINVAL;
				2949	break;
				2950	}
				2951
				2952	return err;
				2953	}