blob: 5007eaba207d53840101cda281df759181918692 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Operations on the network namespace
4 */
5#ifndef __NET_NET_NAMESPACE_H
6#define __NET_NET_NAMESPACE_H
7
8#include <linux/atomic.h>
9#include <linux/refcount.h>
10#include <linux/workqueue.h>
11#include <linux/list.h>
12#include <linux/sysctl.h>
13#include <linux/uidgid.h>
14
15#include <net/flow.h>
16#include <net/netns/core.h>
17#include <net/netns/mib.h>
18#include <net/netns/unix.h>
19#include <net/netns/packet.h>
20#include <net/netns/ipv4.h>
21#include <net/netns/ipv6.h>
22#include <net/netns/ieee802154_6lowpan.h>
23#include <net/netns/sctp.h>
24#include <net/netns/dccp.h>
25#include <net/netns/netfilter.h>
26#include <net/netns/x_tables.h>
27#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
28#include <net/netns/conntrack.h>
29#endif
30#include <net/netns/nftables.h>
31#include <net/netns/xfrm.h>
32#include <net/netns/mpls.h>
33#include <net/netns/can.h>
34#include <linux/ns_common.h>
35#include <linux/idr.h>
36#include <linux/skbuff.h>
37
38struct user_namespace;
39struct proc_dir_entry;
40struct net_device;
41struct sock;
42struct ctl_table_header;
43struct net_generic;
44struct uevent_sock;
45struct netns_ipvs;
46
47
48#define NETDEV_HASHBITS 8
49#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
50
51struct net {
52 refcount_t passive; /* To decided when the network
53 * namespace should be freed.
54 */
55 refcount_t count; /* To decided when the network
56 * namespace should be shut down.
57 */
58 spinlock_t rules_mod_lock;
59
60 u32 hash_mix;
61 atomic64_t cookie_gen;
62
63 struct list_head list; /* list of network namespaces */
64 struct list_head exit_list; /* To linked to call pernet exit
65 * methods on dead net (
66 * pernet_ops_rwsem read locked),
67 * or to unregister pernet ops
68 * (pernet_ops_rwsem write locked).
69 */
70 struct llist_node cleanup_list; /* namespaces on death row */
71
72 struct user_namespace *user_ns; /* Owning user namespace */
73 struct ucounts *ucounts;
74 spinlock_t nsid_lock;
75 struct idr netns_ids;
76
77 struct ns_common ns;
78
79 struct proc_dir_entry *proc_net;
80 struct proc_dir_entry *proc_net_stat;
81
82#ifdef CONFIG_SYSCTL
83 struct ctl_table_set sysctls;
84#endif
85
86 struct sock *rtnl; /* rtnetlink socket */
87 struct sock *genl_sock;
88
89 struct uevent_sock *uevent_sock; /* uevent socket */
90
91 struct list_head dev_base_head;
92 struct hlist_head *dev_name_head;
93 struct hlist_head *dev_index_head;
94 unsigned int dev_base_seq; /* protected by rtnl_mutex */
95 int ifindex;
96 unsigned int dev_unreg_count;
97
98 /* core fib_rules */
99 struct list_head rules_ops;
100
101 struct list_head fib_notifier_ops; /* Populated by
102 * register_pernet_subsys()
103 */
104 struct net_device *loopback_dev; /* The loopback */
105 struct netns_core core;
106 struct netns_mib mib;
107 struct netns_packet packet;
108 struct netns_unix unx;
109 struct netns_ipv4 ipv4;
110#if IS_ENABLED(CONFIG_IPV6)
111 struct netns_ipv6 ipv6;
112#endif
113#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
114 struct netns_ieee802154_lowpan ieee802154_lowpan;
115#endif
116#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
117 struct netns_sctp sctp;
118#endif
119#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
120 struct netns_dccp dccp;
121#endif
122#ifdef CONFIG_NETFILTER
123 struct netns_nf nf;
124 struct netns_xt xt;
125#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
126 struct netns_ct ct;
127#endif
128#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
129 struct netns_nftables nft;
130#endif
131#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
132 struct netns_nf_frag nf_frag;
133 struct ctl_table_header *nf_frag_frags_hdr;
134#endif
135 struct sock *nfnl;
136 struct sock *nfnl_stash;
137#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
138 struct list_head nfnl_acct_list;
139#endif
140#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
141 struct list_head nfct_timeout_list;
142#endif
143#endif
144#ifdef CONFIG_WEXT_CORE
145 struct sk_buff_head wext_nlevents;
146#endif
147 struct net_generic __rcu *gen;
148
149 /* Note : following structs are cache line aligned */
150#ifdef CONFIG_XFRM
151 struct netns_xfrm xfrm;
152#endif
153#if IS_ENABLED(CONFIG_IP_VS)
154 struct netns_ipvs *ipvs;
155#endif
156#if IS_ENABLED(CONFIG_MPLS)
157 struct netns_mpls mpls;
158#endif
159#if IS_ENABLED(CONFIG_CAN)
160 struct netns_can can;
161#endif
162 struct sock *diag_nlsk;
163 atomic_t fnhe_genid;
164} __randomize_layout;
165
166#include <linux/seq_file_net.h>
167
168/* Init's network namespace */
169extern struct net init_net;
170
171#ifdef CONFIG_NET_NS
172struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
173 struct net *old_net);
174
175void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
176
177void net_ns_barrier(void);
178#else /* CONFIG_NET_NS */
179#include <linux/sched.h>
180#include <linux/nsproxy.h>
181static inline struct net *copy_net_ns(unsigned long flags,
182 struct user_namespace *user_ns, struct net *old_net)
183{
184 if (flags & CLONE_NEWNET)
185 return ERR_PTR(-EINVAL);
186 return old_net;
187}
188
189static inline void net_ns_get_ownership(const struct net *net,
190 kuid_t *uid, kgid_t *gid)
191{
192 *uid = GLOBAL_ROOT_UID;
193 *gid = GLOBAL_ROOT_GID;
194}
195
196static inline void net_ns_barrier(void) {}
197#endif /* CONFIG_NET_NS */
198
199
200extern struct list_head net_namespace_list;
201
202struct net *get_net_ns_by_pid(pid_t pid);
203struct net *get_net_ns_by_fd(int fd);
204
205#ifdef CONFIG_SYSCTL
206void ipx_register_sysctl(void);
207void ipx_unregister_sysctl(void);
208#else
209#define ipx_register_sysctl()
210#define ipx_unregister_sysctl()
211#endif
212
213#ifdef CONFIG_NET_NS
214void __put_net(struct net *net);
215
216static inline struct net *get_net(struct net *net)
217{
218 refcount_inc(&net->count);
219 return net;
220}
221
222static inline struct net *maybe_get_net(struct net *net)
223{
224 /* Used when we know struct net exists but we
225 * aren't guaranteed a previous reference count
226 * exists. If the reference count is zero this
227 * function fails and returns NULL.
228 */
229 if (!refcount_inc_not_zero(&net->count))
230 net = NULL;
231 return net;
232}
233
234static inline void put_net(struct net *net)
235{
236 if (refcount_dec_and_test(&net->count))
237 __put_net(net);
238}
239
240static inline
241int net_eq(const struct net *net1, const struct net *net2)
242{
243 return net1 == net2;
244}
245
246static inline int check_net(const struct net *net)
247{
248 return refcount_read(&net->count) != 0;
249}
250
251void net_drop_ns(void *);
252
253#else
254
255static inline struct net *get_net(struct net *net)
256{
257 return net;
258}
259
260static inline void put_net(struct net *net)
261{
262}
263
264static inline struct net *maybe_get_net(struct net *net)
265{
266 return net;
267}
268
269static inline
270int net_eq(const struct net *net1, const struct net *net2)
271{
272 return 1;
273}
274
275static inline int check_net(const struct net *net)
276{
277 return 1;
278}
279
280#define net_drop_ns NULL
281#endif
282
283
284typedef struct {
285#ifdef CONFIG_NET_NS
286 struct net *net;
287#endif
288} possible_net_t;
289
290static inline void write_pnet(possible_net_t *pnet, struct net *net)
291{
292#ifdef CONFIG_NET_NS
293 pnet->net = net;
294#endif
295}
296
297static inline struct net *read_pnet(const possible_net_t *pnet)
298{
299#ifdef CONFIG_NET_NS
300 return pnet->net;
301#else
302 return &init_net;
303#endif
304}
305
306/* Protected by net_rwsem */
307#define for_each_net(VAR) \
308 list_for_each_entry(VAR, &net_namespace_list, list)
309
310#define for_each_net_rcu(VAR) \
311 list_for_each_entry_rcu(VAR, &net_namespace_list, list)
312
313#ifdef CONFIG_NET_NS
314#define __net_init
315#define __net_exit
316#define __net_initdata
317#define __net_initconst
318#else
319#define __net_init __init
320#define __net_exit __ref
321#define __net_initdata __initdata
322#define __net_initconst __initconst
323#endif
324
325int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
326int peernet2id(struct net *net, struct net *peer);
327bool peernet_has_id(struct net *net, struct net *peer);
328struct net *get_net_ns_by_id(struct net *net, int id);
329
330struct pernet_operations {
331 struct list_head list;
332 /*
333 * Below methods are called without any exclusive locks.
334 * More than one net may be constructed and destructed
335 * in parallel on several cpus. Every pernet_operations
336 * have to keep in mind all other pernet_operations and
337 * to introduce a locking, if they share common resources.
338 *
339 * The only time they are called with exclusive lock is
340 * from register_pernet_subsys(), unregister_pernet_subsys()
341 * register_pernet_device() and unregister_pernet_device().
342 *
343 * Exit methods using blocking RCU primitives, such as
344 * synchronize_rcu(), should be implemented via exit_batch.
345 * Then, destruction of a group of net requires single
346 * synchronize_rcu() related to these pernet_operations,
347 * instead of separate synchronize_rcu() for every net.
348 * Please, avoid synchronize_rcu() at all, where it's possible.
349 */
350 int (*init)(struct net *net);
351 void (*exit)(struct net *net);
352 void (*exit_batch)(struct list_head *net_exit_list);
353 unsigned int *id;
354 size_t size;
355};
356
357/*
358 * Use these carefully. If you implement a network device and it
359 * needs per network namespace operations use device pernet operations,
360 * otherwise use pernet subsys operations.
361 *
362 * Network interfaces need to be removed from a dying netns _before_
363 * subsys notifiers can be called, as most of the network code cleanup
364 * (which is done from subsys notifiers) runs with the assumption that
365 * dev_remove_pack has been called so no new packets will arrive during
366 * and after the cleanup functions have been called. dev_remove_pack
367 * is not per namespace so instead the guarantee of no more packets
368 * arriving in a network namespace is provided by ensuring that all
369 * network devices and all sockets have left the network namespace
370 * before the cleanup methods are called.
371 *
372 * For the longest time the ipv4 icmp code was registered as a pernet
373 * device which caused kernel oops, and panics during network
374 * namespace cleanup. So please don't get this wrong.
375 */
376int register_pernet_subsys(struct pernet_operations *);
377void unregister_pernet_subsys(struct pernet_operations *);
378int register_pernet_device(struct pernet_operations *);
379void unregister_pernet_device(struct pernet_operations *);
380
381struct ctl_table;
382struct ctl_table_header;
383
384#ifdef CONFIG_SYSCTL
385int net_sysctl_init(void);
386struct ctl_table_header *register_net_sysctl(struct net *net, const char *path,
387 struct ctl_table *table);
388void unregister_net_sysctl_table(struct ctl_table_header *header);
389#else
390static inline int net_sysctl_init(void) { return 0; }
391static inline struct ctl_table_header *register_net_sysctl(struct net *net,
392 const char *path, struct ctl_table *table)
393{
394 return NULL;
395}
396static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
397{
398}
399#endif
400
401static inline int rt_genid_ipv4(struct net *net)
402{
403 return atomic_read(&net->ipv4.rt_genid);
404}
405
406static inline void rt_genid_bump_ipv4(struct net *net)
407{
408 atomic_inc(&net->ipv4.rt_genid);
409}
410
411extern void (*__fib6_flush_trees)(struct net *net);
412static inline void rt_genid_bump_ipv6(struct net *net)
413{
414 if (__fib6_flush_trees)
415 __fib6_flush_trees(net);
416}
417
418#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
419static inline struct netns_ieee802154_lowpan *
420net_ieee802154_lowpan(struct net *net)
421{
422 return &net->ieee802154_lowpan;
423}
424#endif
425
426/* For callers who don't really care about whether it's IPv4 or IPv6 */
427static inline void rt_genid_bump_all(struct net *net)
428{
429 rt_genid_bump_ipv4(net);
430 rt_genid_bump_ipv6(net);
431}
432
433static inline int fnhe_genid(struct net *net)
434{
435 return atomic_read(&net->fnhe_genid);
436}
437
438static inline void fnhe_genid_bump(struct net *net)
439{
440 atomic_inc(&net->fnhe_genid);
441}
442
443#endif /* __NET_NET_NAMESPACE_H */