Blame - ap/os/linux/linux-3.4.x/net/ipv4/icmp.c - T106_DC

blob: 9a37732e8f32ea4ff8500fcb1bdac535507775d9 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* NET3: Implementation of the ICMP protocol layer.
				3	*
				4	* Alan Cox, <alan@lxorguk.ukuu.org.uk>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	* Some of the function names and the icmp unreach table for this
				12	* module were derived from [icmp.c 1.0.11 06/02/93] by
				13	* Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
				14	* Other than that this module is a complete rewrite.
				15	*
				16	* Fixes:
				17	* Clemens Fruhwirth : introduce global icmp rate limiting
				18	* with icmp type masking ability instead
				19	* of broken per type icmp timeouts.
				20	* Mike Shaver : RFC1122 checks.
				21	* Alan Cox : Multicast ping reply as self.
				22	* Alan Cox : Fix atomicity lockup in ip_build_xmit
				23	* call.
				24	* Alan Cox : Added 216,128 byte paths to the MTU
				25	* code.
				26	* Martin Mares : RFC1812 checks.
				27	* Martin Mares : Can be configured to follow redirects
				28	* if acting as a router _without_ a
				29	* routing protocol (RFC 1812).
				30	* Martin Mares : Echo requests may be configured to
				31	* be ignored (RFC 1812).
				32	* Martin Mares : Limitation of ICMP error message
				33	* transmit rate (RFC 1812).
				34	* Martin Mares : TOS and Precedence set correctly
				35	* (RFC 1812).
				36	* Martin Mares : Now copying as much data from the
				37	* original packet as we can without
				38	* exceeding 576 bytes (RFC 1812).
				39	* Willy Konynenberg : Transparent proxying support.
				40	* Keith Owens : RFC1191 correction for 4.2BSD based
				41	* path MTU bug.
				42	* Thomas Quinot : ICMP Dest Unreach codes up to 15 are
				43	* valid (RFC 1812).
				44	* Andi Kleen : Check all packet lengths properly
				45	* and moved all kfree_skb() up to
				46	* icmp_rcv.
				47	* Andi Kleen : Move the rate limit bookkeeping
				48	* into the dest entry and use a token
				49	* bucket filter (thanks to ANK). Make
				50	* the rates sysctl configurable.
				51	* Yu Tianli : Fixed two ugly bugs in icmp_send
				52	* - IP option length was accounted wrongly
				53	* - ICMP header length was not accounted
				54	* at all.
				55	* Tristan Greaves : Added sysctl option to ignore bogus
				56	* broadcast responses from broken routers.
				57	*
				58	* To Fix:
				59	*
				60	* - Should use skb_pull() instead of all the manual checking.
				61	* This would also greatly simply some upper layer error handlers. --AK
				62	*
				63	*/
				64
				65	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				66
				67	#include <linux/module.h>
				68	#include <linux/types.h>
				69	#include <linux/jiffies.h>
				70	#include <linux/kernel.h>
				71	#include <linux/fcntl.h>
				72	#include <linux/sysrq.h>
				73	#include <linux/socket.h>
				74	#include <linux/in.h>
				75	#include <linux/inet.h>
				76	#include <linux/inetdevice.h>
				77	#include <linux/netdevice.h>
				78	#include <linux/string.h>
				79	#include <linux/netfilter_ipv4.h>
				80	#include <linux/slab.h>
				81	#include <net/snmp.h>
				82	#include <net/ip.h>
				83	#include <net/route.h>
				84	#include <net/protocol.h>
				85	#include <net/icmp.h>
				86	#include <net/tcp.h>
				87	#include <net/udp.h>
				88	#include <net/raw.h>
				89	#include <net/ping.h>
				90	#include <linux/skbuff.h>
				91	#include <net/sock.h>
				92	#include <linux/errno.h>
				93	#include <linux/timer.h>
				94	#include <linux/init.h>
				95	#include <asm/uaccess.h>
				96	#include <net/checksum.h>
				97	#include <net/xfrm.h>
				98	#include <net/inet_common.h>
				99
				100	/*
				101	* Build xmit assembly blocks
				102	*/
				103
				104	struct icmp_bxm {
				105	struct sk_buff *skb;
				106	int offset;
				107	int data_len;
				108
				109	struct {
				110	struct icmphdr icmph;
				111	__be32 times[3];
				112	} data;
				113	int head_len;
				114	struct ip_options_data replyopts;
				115	};
				116
				117	/* An array of errno for error messages from dest unreach. */
				118	/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
				119
				120	const struct icmp_err icmp_err_convert[] = {
				121	{
				122	.errno = ENETUNREACH, /* ICMP_NET_UNREACH */
				123	.fatal = 0,
				124	},
				125	{
				126	.errno = EHOSTUNREACH, /* ICMP_HOST_UNREACH */
				127	.fatal = 0,
				128	},
				129	{
				130	.errno = ENOPROTOOPT /* ICMP_PROT_UNREACH */,
				131	.fatal = 1,
				132	},
				133	{
				134	.errno = ECONNREFUSED, /* ICMP_PORT_UNREACH */
				135	.fatal = 1,
				136	},
				137	{
				138	.errno = EMSGSIZE, /* ICMP_FRAG_NEEDED */
				139	.fatal = 0,
				140	},
				141	{
				142	.errno = EOPNOTSUPP, /* ICMP_SR_FAILED */
				143	.fatal = 0,
				144	},
				145	{
				146	.errno = ENETUNREACH, /* ICMP_NET_UNKNOWN */
				147	.fatal = 1,
				148	},
				149	{
				150	.errno = EHOSTDOWN, /* ICMP_HOST_UNKNOWN */
				151	.fatal = 1,
				152	},
				153	{
				154	.errno = ENONET, /* ICMP_HOST_ISOLATED */
				155	.fatal = 1,
				156	},
				157	{
				158	.errno = ENETUNREACH, /* ICMP_NET_ANO */
				159	.fatal = 1,
				160	},
				161	{
				162	.errno = EHOSTUNREACH, /* ICMP_HOST_ANO */
				163	.fatal = 1,
				164	},
				165	{
				166	.errno = ENETUNREACH, /* ICMP_NET_UNR_TOS */
				167	.fatal = 0,
				168	},
				169	{
				170	.errno = EHOSTUNREACH, /* ICMP_HOST_UNR_TOS */
				171	.fatal = 0,
				172	},
				173	{
				174	.errno = EHOSTUNREACH, /* ICMP_PKT_FILTERED */
				175	.fatal = 1,
				176	},
				177	{
				178	.errno = EHOSTUNREACH, /* ICMP_PREC_VIOLATION */
				179	.fatal = 1,
				180	},
				181	{
				182	.errno = EHOSTUNREACH, /* ICMP_PREC_CUTOFF */
				183	.fatal = 1,
				184	},
				185	};
				186	EXPORT_SYMBOL(icmp_err_convert);
				187
				188	/*
				189	* ICMP control array. This specifies what to do with each ICMP.
				190	*/
				191
				192	struct icmp_control {
				193	void (handler)(struct sk_buff skb);
				194	short error; /* This ICMP is classed as an error message */
				195	};
				196
				197	static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
				198
				199	/*
				200	* The ICMP socket(s). This is the most convenient way to flow control
				201	* our ICMP output as well as maintain a clean interface throughout
				202	* all layers. All Socketless IP sends will soon be gone.
				203	*
				204	* On SMP we have one ICMP socket per-cpu.
				205	*/
				206	static struct sock icmp_sk(struct net net)
				207	{
				208	return net->ipv4.icmp_sk[smp_processor_id()];
				209	}
				210
				211	static inline struct sock icmp_xmit_lock(struct net net)
				212	{
				213	struct sock *sk;
				214
				215	local_bh_disable();
				216
				217	sk = icmp_sk(net);
				218
				219	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
				220	/* This can happen if the output path signals a
				221	* dst_link_failure() for an outgoing ICMP packet.
				222	*/
				223	local_bh_enable();
				224	return NULL;
				225	}
				226	return sk;
				227	}
				228
				229	static inline void icmp_xmit_unlock(struct sock *sk)
				230	{
				231	spin_unlock_bh(&sk->sk_lock.slock);
				232	}
				233
				234	/*
				235	* Send an ICMP frame.
				236	*/
				237
				238	static inline bool icmpv4_xrlim_allow(struct net net, struct rtable rt,
				239	struct flowi4 *fl4, int type, int code)
				240	{
				241	struct dst_entry *dst = &rt->dst;
				242	bool rc = true;
				243
				244	if (type > NR_ICMP_TYPES)
				245	goto out;
				246
				247	/* Don't limit PMTU discovery. */
				248	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
				249	goto out;
				250
				251	/* No rate limit on loopback */
				252	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
				253	goto out;
				254
				255	/* Limit if icmp type is enabled in ratemask. */
				256	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
				257	if (!rt->peer)
				258	rt_bind_peer(rt, fl4->daddr, 1);
				259	rc = inet_peer_xrlim_allow(rt->peer,
				260	net->ipv4.sysctl_icmp_ratelimit);
				261	}
				262	out:
				263	return rc;
				264	}
				265
				266	/*
				267	* Maintain the counters used in the SNMP statistics for outgoing ICMP
				268	*/
				269	void icmp_out_count(struct net *net, unsigned char type)
				270	{
				271	ICMPMSGOUT_INC_STATS(net, type);
				272	ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
				273	}
				274
				275	/*
				276	* Checksum each fragment, and on the first include the headers and final
				277	* checksum.
				278	*/
				279	static int icmp_glue_bits(void from, char to, int offset, int len, int odd,
				280	struct sk_buff *skb)
				281	{
				282	struct icmp_bxm icmp_param = (struct icmp_bxm )from;
				283	__wsum csum;
				284
				285	csum = skb_copy_and_csum_bits(icmp_param->skb,
				286	icmp_param->offset + offset,
				287	to, len, 0);
				288
				289	skb->csum = csum_block_add(skb->csum, csum, odd);
				290	if (icmp_pointers[icmp_param->data.icmph.type].error)
				291	nf_ct_attach(skb, icmp_param->skb);
				292	return 0;
				293	}
				294
				295	static void icmp_push_reply(struct icmp_bxm *icmp_param,
				296	struct flowi4 *fl4,
				297	struct ipcm_cookie ipc, struct rtable *rt)
				298	{
				299	struct sock *sk;
				300	struct sk_buff *skb;
				301
				302	sk = icmp_sk(dev_net((*rt)->dst.dev));
				303	if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
				304	icmp_param->data_len+icmp_param->head_len,
				305	icmp_param->head_len,
				306	ipc, rt, MSG_DONTWAIT) < 0) {
				307	ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
				308	ip_flush_pending_frames(sk);
				309	} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
				310	struct icmphdr *icmph = icmp_hdr(skb);
				311	__wsum csum = 0;
				312	struct sk_buff *skb1;
				313
				314	skb_queue_walk(&sk->sk_write_queue, skb1) {
				315	csum = csum_add(csum, skb1->csum);
				316	}
				317	csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
				318	(char *)icmph,
				319	icmp_param->head_len, csum);
				320	icmph->checksum = csum_fold(csum);
				321	skb->ip_summed = CHECKSUM_NONE;
				322	ip_push_pending_frames(sk, fl4);
				323	}
				324	}
				325
				326	/*
				327	* Driving logic for building and sending ICMP messages.
				328	*/
				329
				330	static void icmp_reply(struct icmp_bxm icmp_param, struct sk_buff skb)
				331	{
				332	struct ipcm_cookie ipc;
				333	struct rtable *rt = skb_rtable(skb);
				334	struct net *net = dev_net(rt->dst.dev);
				335	struct flowi4 fl4;
				336	struct sock *sk;
				337	struct inet_sock *inet;
				338	__be32 daddr;
				339
				340	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
				341	return;
				342
				343	sk = icmp_xmit_lock(net);
				344	if (sk == NULL)
				345	return;
				346	inet = inet_sk(sk);
				347
				348	icmp_param->data.icmph.checksum = 0;
				349
				350	inet->tos = ip_hdr(skb)->tos;
				351	daddr = ipc.addr = ip_hdr(skb)->saddr;
				352	ipc.opt = NULL;
				353	ipc.tx_flags = 0;
				354	if (icmp_param->replyopts.opt.opt.optlen) {
				355	ipc.opt = &icmp_param->replyopts.opt;
				356	if (ipc.opt->opt.srr)
				357	daddr = icmp_param->replyopts.opt.opt.faddr;
				358	}
				359	memset(&fl4, 0, sizeof(fl4));
				360	fl4.daddr = daddr;
				361	fl4.saddr = rt->rt_spec_dst;
				362	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
				363	fl4.flowi4_proto = IPPROTO_ICMP;
				364	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
				365	rt = ip_route_output_key(net, &fl4);
				366	if (IS_ERR(rt))
				367	goto out_unlock;
				368	if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type,
				369	icmp_param->data.icmph.code))
				370	icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
				371	ip_rt_put(rt);
				372	out_unlock:
				373	icmp_xmit_unlock(sk);
				374	}
				375
				376	static struct rtable icmp_route_lookup(struct net net,
				377	struct flowi4 *fl4,
				378	struct sk_buff *skb_in,
				379	const struct iphdr *iph,
				380	__be32 saddr, u8 tos,
				381	int type, int code,
				382	struct icmp_bxm *param)
				383	{
				384	struct rtable rt, rt2;
				385	struct flowi4 fl4_dec;
				386	int err;
				387
				388	memset(fl4, 0, sizeof(*fl4));
				389	fl4->daddr = (param->replyopts.opt.opt.srr ?
				390	param->replyopts.opt.opt.faddr : iph->saddr);
				391	fl4->saddr = saddr;
				392	fl4->flowi4_tos = RT_TOS(tos);
				393	fl4->flowi4_proto = IPPROTO_ICMP;
				394	fl4->fl4_icmp_type = type;
				395	fl4->fl4_icmp_code = code;
				396	security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
				397	rt = __ip_route_output_key(net, fl4);
				398	if (IS_ERR(rt))
				399	return rt;
				400
				401	/* No need to clone since we're just using its address. */
				402	rt2 = rt;
				403
				404	rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
				405	flowi4_to_flowi(fl4), NULL, 0);
				406	if (!IS_ERR(rt)) {
				407	if (rt != rt2)
				408	return rt;
				409	} else if (PTR_ERR(rt) == -EPERM) {
				410	rt = NULL;
				411	} else
				412	return rt;
				413
				414	err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
				415	if (err)
				416	goto relookup_failed;
				417
				418	if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) {
				419	rt2 = __ip_route_output_key(net, &fl4_dec);
				420	if (IS_ERR(rt2))
				421	err = PTR_ERR(rt2);
				422	} else {
				423	struct flowi4 fl4_2 = {};
				424	unsigned long orefdst;
				425
				426	fl4_2.daddr = fl4_dec.saddr;
				427	rt2 = ip_route_output_key(net, &fl4_2);
				428	if (IS_ERR(rt2)) {
				429	err = PTR_ERR(rt2);
				430	goto relookup_failed;
				431	}
				432	/* Ugh! */
				433	orefdst = skb_in->_skb_refdst; /* save old refdst */
				434	err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
				435	RT_TOS(tos), rt2->dst.dev);
				436
				437	dst_release(&rt2->dst);
				438	rt2 = skb_rtable(skb_in);
				439	skb_in->_skb_refdst = orefdst; /* restore old refdst */
				440	}
				441
				442	if (err)
				443	goto relookup_failed;
				444
				445	rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
				446	flowi4_to_flowi(&fl4_dec), NULL,
				447	XFRM_LOOKUP_ICMP);
				448	if (!IS_ERR(rt2)) {
				449	dst_release(&rt->dst);
				450	memcpy(fl4, &fl4_dec, sizeof(*fl4));
				451	rt = rt2;
				452	} else if (PTR_ERR(rt2) == -EPERM) {
				453	if (rt)
				454	dst_release(&rt->dst);
				455	return rt2;
				456	} else {
				457	err = PTR_ERR(rt2);
				458	goto relookup_failed;
				459	}
				460	return rt;
				461
				462	relookup_failed:
				463	if (rt)
				464	return rt;
				465	return ERR_PTR(err);
				466	}
				467
				468	/*
				469	* Send an ICMP message in response to a situation
				470	*
				471	* RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header.
				472	* MAY send more (we do).
				473	* MUST NOT change this header information.
				474	* MUST NOT reply to a multicast/broadcast IP address.
				475	* MUST NOT reply to a multicast/broadcast MAC address.
				476	* MUST reply to only the first fragment.
				477	*/
				478
				479	void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
				480	{
				481	struct iphdr *iph;
				482	int room;
				483	struct icmp_bxm icmp_param;
				484	struct rtable *rt = skb_rtable(skb_in);
				485	struct ipcm_cookie ipc;
				486	struct flowi4 fl4;
				487	__be32 saddr;
				488	u8 tos;
				489	struct net *net;
				490	struct sock *sk;
				491
				492	if (!rt)
				493	goto out;
				494	net = dev_net(rt->dst.dev);
				495
				496	/*
				497	* Find the original header. It is expected to be valid, of course.
				498	* Check this, icmp_send is called from the most obscure devices
				499	* sometimes.
				500	*/
				501	iph = ip_hdr(skb_in);
				502
				503	if ((u8 *)iph < skb_in->head \|\|
				504	(skb_in->network_header + sizeof(*iph)) > skb_in->tail)
				505	goto out;
				506
				507	/*
				508	* No replies to physical multicast/broadcast
				509	*/
				510	if (skb_in->pkt_type != PACKET_HOST)
				511	goto out;
				512
				513	/*
				514	* Now check at the protocol level
				515	*/
				516	if (rt->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
				517	goto out;
				518
				519	/*
				520	* Only reply to fragment 0. We byte re-order the constant
				521	* mask for efficiency.
				522	*/
				523	if (iph->frag_off & htons(IP_OFFSET))
				524	goto out;
				525
				526	/*
				527	* If we send an ICMP error to an ICMP error a mess would result..
				528	*/
				529	if (icmp_pointers[type].error) {
				530	/*
				531	* We are an error, check if we are replying to an
				532	* ICMP error
				533	*/
				534	if (iph->protocol == IPPROTO_ICMP) {
				535	u8 _inner_type, *itp;
				536
				537	itp = skb_header_pointer(skb_in,
				538	skb_network_header(skb_in) +
				539	(iph->ihl << 2) +
				540	offsetof(struct icmphdr,
				541	type) -
				542	skb_in->data,
				543	sizeof(_inner_type),
				544	&_inner_type);
				545	if (itp == NULL)
				546	goto out;
				547
				548	/*
				549	* Assume any unknown ICMP type is an error. This
				550	* isn't specified by the RFC, but think about it..
				551	*/
				552	if (*itp > NR_ICMP_TYPES \|\|
				553	icmp_pointers[*itp].error)
				554	goto out;
				555	}
				556	}
				557
				558	sk = icmp_xmit_lock(net);
				559	if (sk == NULL)
				560	return;
				561
				562	/*
				563	* Construct source address and options.
				564	*/
				565
				566	saddr = iph->daddr;
				567	if (!(rt->rt_flags & RTCF_LOCAL)) {
				568	struct net_device *dev = NULL;
				569
				570	rcu_read_lock();
				571	if (rt_is_input_route(rt) &&
				572	net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
				573	dev = dev_get_by_index_rcu(net, rt->rt_iif);
				574
				575	if (dev)
				576	saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
				577	else
				578	saddr = 0;
				579	rcu_read_unlock();
				580	}
				581
				582	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) \|
				583	IPTOS_PREC_INTERNETCONTROL) :
				584	iph->tos;
				585
				586	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
				587	goto out_unlock;
				588
				589
				590	/*
				591	* Prepare data for ICMP header.
				592	*/
				593
				594	icmp_param.data.icmph.type = type;
				595	icmp_param.data.icmph.code = code;
				596	icmp_param.data.icmph.un.gateway = info;
				597	icmp_param.data.icmph.checksum = 0;
				598	icmp_param.skb = skb_in;
				599	icmp_param.offset = skb_network_offset(skb_in);
				600	inet_sk(sk)->tos = tos;
				601	ipc.addr = iph->saddr;
				602	ipc.opt = &icmp_param.replyopts.opt;
				603	ipc.tx_flags = 0;
				604
				605	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
				606	type, code, &icmp_param);
				607	if (IS_ERR(rt))
				608	goto out_unlock;
				609
				610	if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
				611	goto ende;
				612
				613	/* RFC says return as much as we can without exceeding 576 bytes. */
				614
				615	room = dst_mtu(&rt->dst);
				616	if (room > 576)
				617	room = 576;
				618	room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
				619	room -= sizeof(struct icmphdr);
				620
				621	icmp_param.data_len = skb_in->len - icmp_param.offset;
				622	if (icmp_param.data_len > room)
				623	icmp_param.data_len = room;
				624	icmp_param.head_len = sizeof(struct icmphdr);
				625
				626	icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
				627	ende:
				628	ip_rt_put(rt);
				629	out_unlock:
				630	icmp_xmit_unlock(sk);
				631	out:;
				632	}
				633	EXPORT_SYMBOL(icmp_send);
				634
				635
				636	/*
				637	* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
				638	*/
				639
				640	static void icmp_unreach(struct sk_buff *skb)
				641	{
				642	const struct iphdr *iph;
				643	struct icmphdr *icmph;
				644	int hash, protocol;
				645	const struct net_protocol *ipprot;
				646	u32 info = 0;
				647	struct net *net;
				648
				649	net = dev_net(skb_dst(skb)->dev);
				650
				651	/*
				652	* Incomplete header ?
				653	* Only checks for the IP header, there should be an
				654	* additional check for longer headers in upper levels.
				655	*/
				656
				657	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
				658	goto out_err;
				659
				660	icmph = icmp_hdr(skb);
				661	iph = (const struct iphdr *)skb->data;
				662
				663	if (iph->ihl < 5) /* Mangled header, drop. */
				664	goto out_err;
				665
				666	if (icmph->type == ICMP_DEST_UNREACH) {
				667	switch (icmph->code & 15) {
				668	case ICMP_NET_UNREACH:
				669	case ICMP_HOST_UNREACH:
				670	case ICMP_PROT_UNREACH:
				671	case ICMP_PORT_UNREACH:
				672	break;
				673	case ICMP_FRAG_NEEDED:
				674	if (ipv4_config.no_pmtu_disc) {
				675	LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
				676	&iph->daddr);
				677	} else {
				678	info = ip_rt_frag_needed(net, iph,
				679	ntohs(icmph->un.frag.mtu),
				680	skb->dev);
				681	if (!info)
				682	goto out;
				683	}
				684	break;
				685	case ICMP_SR_FAILED:
				686	LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"),
				687	&iph->daddr);
				688	break;
				689	default:
				690	break;
				691	}
				692	if (icmph->code > NR_ICMP_UNREACH)
				693	goto out;
				694	} else if (icmph->type == ICMP_PARAMETERPROB)
				695	info = ntohl(icmph->un.gateway) >> 24;
				696
				697	/*
				698	* Throw it at our lower layers
				699	*
				700	* RFC 1122: 3.2.2 MUST extract the protocol ID from the passed
				701	* header.
				702	* RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the
				703	* transport layer.
				704	* RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to
				705	* transport layer.
				706	*/
				707
				708	/*
				709	* Check the other end isn't violating RFC 1122. Some routers send
				710	* bogus responses to broadcast frames. If you see this message
				711	* first check your netmask matches at both ends, if it does then
				712	* get the other vendor to fix their kit.
				713	*/
				714
				715	if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
				716	inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
				717	if (net_ratelimit())
				718	pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
				719	&ip_hdr(skb)->saddr,
				720	icmph->type, icmph->code,
				721	&iph->daddr, skb->dev->name);
				722	goto out;
				723	}
				724
				725	/* Checkin full IP header plus 8 bytes of protocol to
				726	* avoid additional coding at protocol handlers.
				727	*/
				728	if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
				729	goto out;
				730
				731	iph = (const struct iphdr *)skb->data;
				732	protocol = iph->protocol;
				733
				734	/*
				735	* Deliver ICMP message to raw sockets. Pretty useless feature?
				736	*/
				737	raw_icmp_error(skb, protocol, info);
				738
				739	hash = protocol & (MAX_INET_PROTOS - 1);
				740	rcu_read_lock();
				741	ipprot = rcu_dereference(inet_protos[hash]);
				742	if (ipprot && ipprot->err_handler)
				743	ipprot->err_handler(skb, info);
				744	rcu_read_unlock();
				745
				746	out:
				747	return;
				748	out_err:
				749	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
				750	goto out;
				751	}
				752
				753
				754	/*
				755	* Handle ICMP_REDIRECT.
				756	*/
				757
				758	static void icmp_redirect(struct sk_buff *skb)
				759	{
				760	const struct iphdr *iph;
				761
				762	if (skb->len < sizeof(struct iphdr))
				763	goto out_err;
				764
				765	/*
				766	* Get the copied header of the packet that caused the redirect
				767	*/
				768	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
				769	goto out;
				770
				771	iph = (const struct iphdr *)skb->data;
				772
				773	switch (icmp_hdr(skb)->code & 7) {
				774	case ICMP_REDIR_NET:
				775	case ICMP_REDIR_NETTOS:
				776	/*
				777	* As per RFC recommendations now handle it as a host redirect.
				778	*/
				779	case ICMP_REDIR_HOST:
				780	case ICMP_REDIR_HOSTTOS:
				781	ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
				782	icmp_hdr(skb)->un.gateway,
				783	iph->saddr, skb->dev);
				784	break;
				785	}
				786
				787	/* Ping wants to see redirects.
				788	* Let's pretend they are errors of sorts... */
				789	if (iph->protocol == IPPROTO_ICMP &&
				790	iph->ihl >= 5 &&
				791	pskb_may_pull(skb, (iph->ihl<<2)+8)) {
				792	ping_err(skb, icmp_hdr(skb)->un.gateway);
				793	}
				794
				795	out:
				796	return;
				797	out_err:
				798	ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
				799	goto out;
				800	}
				801
				802	/*
				803	* 32bit and 64bit have different timestamp length, so we check for
				804	* the cookie at offset 20 and verify it is repeated at offset 50
				805	*/
				806	#define CO_POS0 20
				807	#define CO_POS1 50
				808	#define CO_SIZE sizeof(int)
				809	#define ICMP_SYSRQ_SIZE 57
				810
				811	/*
				812	* We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
				813	* pattern and if it matches send the next byte as a trigger to sysrq.
				814	*/
				815	static void icmp_check_sysrq(struct net net, struct sk_buff skb)
				816	{
				817	int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
				818	char *p = skb->data;
				819
				820	if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
				821	!memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
				822	p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
				823	handle_sysrq(p[CO_POS0 + CO_SIZE]);
				824	}
				825
				826	/*
				827	* Handle ICMP_ECHO ("ping") requests.
				828	*
				829	* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
				830	* requests.
				831	* RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be
				832	* included in the reply.
				833	* RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
				834	* echo requests, MUST have default=NOT.
				835	* See also WRT handling of options once they are done and working.
				836	*/
				837
				838	static void icmp_echo(struct sk_buff *skb)
				839	{
				840	struct net *net;
				841
				842	net = dev_net(skb_dst(skb)->dev);
				843	if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
				844	struct icmp_bxm icmp_param;
				845
				846	icmp_param.data.icmph = *icmp_hdr(skb);
				847	icmp_param.data.icmph.type = ICMP_ECHOREPLY;
				848	icmp_param.skb = skb;
				849	icmp_param.offset = 0;
				850	icmp_param.data_len = skb->len;
				851	icmp_param.head_len = sizeof(struct icmphdr);
				852	icmp_reply(&icmp_param, skb);
				853
				854	if (skb->len == ICMP_SYSRQ_SIZE &&
				855	net->ipv4.sysctl_icmp_echo_sysrq) {
				856	icmp_check_sysrq(net, skb);
				857	}
				858	}
				859	}
				860
				861	/*
				862	* Handle ICMP Timestamp requests.
				863	* RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
				864	* SHOULD be in the kernel for minimum random latency.
				865	* MUST be accurate to a few minutes.
				866	* MUST be updated at least at 15Hz.
				867	*/
				868	static void icmp_timestamp(struct sk_buff *skb)
				869	{
				870	struct timespec tv;
				871	struct icmp_bxm icmp_param;
				872	/*
				873	* Too short.
				874	*/
				875	if (skb->len < 4)
				876	goto out_err;
				877
				878	/*
				879	* Fill in the current time as ms since midnight UT:
				880	*/
				881	getnstimeofday(&tv);
				882	icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC +
				883	tv.tv_nsec / NSEC_PER_MSEC);
				884	icmp_param.data.times[2] = icmp_param.data.times[1];
				885	if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
				886	BUG();
				887	icmp_param.data.icmph = *icmp_hdr(skb);
				888	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
				889	icmp_param.data.icmph.code = 0;
				890	icmp_param.skb = skb;
				891	icmp_param.offset = 0;
				892	icmp_param.data_len = 0;
				893	icmp_param.head_len = sizeof(struct icmphdr) + 12;
				894	icmp_reply(&icmp_param, skb);
				895	out:
				896	return;
				897	out_err:
				898	ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
				899	goto out;
				900	}
				901
				902
				903	/*
				904	* Handle ICMP_ADDRESS_MASK requests. (RFC950)
				905	*
				906	* RFC1122 (3.2.2.9). A host MUST only send replies to
				907	* ADDRESS_MASK requests if it's been configured as an address mask
				908	* agent. Receiving a request doesn't constitute implicit permission to
				909	* act as one. Of course, implementing this correctly requires (SHOULD)
				910	* a way to turn the functionality on and off. Another one for sysctl(),
				911	* I guess. -- MS
				912	*
				913	* RFC1812 (4.3.3.9). A router MUST implement it.
				914	* A router SHOULD have switch turning it on/off.
				915	* This switch MUST be ON by default.
				916	*
				917	* Gratuitous replies, zero-source replies are not implemented,
				918	* that complies with RFC. DO NOT implement them!!! All the idea
				919	* of broadcast addrmask replies as specified in RFC950 is broken.
				920	* The problem is that it is not uncommon to have several prefixes
				921	* on one physical interface. Moreover, addrmask agent can even be
				922	* not aware of existing another prefixes.
				923	* If source is zero, addrmask agent cannot choose correct prefix.
				924	* Gratuitous mask announcements suffer from the same problem.
				925	* RFC1812 explains it, but still allows to use ADDRMASK,
				926	* that is pretty silly. --ANK
				927	*
				928	* All these rules are so bizarre, that I removed kernel addrmask
				929	* support at all. It is wrong, it is obsolete, nobody uses it in
				930	* any case. --ANK
				931	*
				932	* Furthermore you can do it with a usermode address agent program
				933	* anyway...
				934	*/
				935
				936	static void icmp_address(struct sk_buff *skb)
				937	{
				938	#if 0
				939	if (net_ratelimit())
				940	printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
				941	#endif
				942	}
				943
				944	/*
				945	* RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain
				946	* loudly if an inconsistency is found.
				947	* called with rcu_read_lock()
				948	*/
				949
				950	static void icmp_address_reply(struct sk_buff *skb)
				951	{
				952	struct rtable *rt = skb_rtable(skb);
				953	struct net_device *dev = skb->dev;
				954	struct in_device *in_dev;
				955	struct in_ifaddr *ifa;
				956
				957	if (skb->len < 4 \|\| !(rt->rt_flags&RTCF_DIRECTSRC))
				958	return;
				959
				960	in_dev = __in_dev_get_rcu(dev);
				961	if (!in_dev)
				962	return;
				963
				964	if (in_dev->ifa_list &&
				965	IN_DEV_LOG_MARTIANS(in_dev) &&
				966	IN_DEV_FORWARD(in_dev)) {
				967	__be32 _mask, *mp;
				968
				969	mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
				970	BUG_ON(mp == NULL);
				971	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
				972	if (*mp == ifa->ifa_mask &&
				973	inet_ifa_match(ip_hdr(skb)->saddr, ifa))
				974	break;
				975	}
				976	if (!ifa && net_ratelimit()) {
				977	pr_info("Wrong address mask %pI4 from %s/%pI4\n",
				978	mp, dev->name, &ip_hdr(skb)->saddr);
				979	}
				980	}
				981	}
				982
				983	static void icmp_discard(struct sk_buff *skb)
				984	{
				985	}
				986
				987	/*
				988	* Deal with incoming ICMP packets.
				989	*/
				990	int icmp_rcv(struct sk_buff *skb)
				991	{
				992	struct icmphdr *icmph;
				993	struct rtable *rt = skb_rtable(skb);
				994	struct net *net = dev_net(rt->dst.dev);
				995
				996	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
				997	struct sec_path *sp = skb_sec_path(skb);
				998	int nh;
				999
				1000	if (!(sp && sp->xvec[sp->len - 1]->props.flags &
				1001	XFRM_STATE_ICMP))
				1002	goto drop;
				1003
				1004	if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
				1005	goto drop;
				1006
				1007	nh = skb_network_offset(skb);
				1008	skb_set_network_header(skb, sizeof(*icmph));
				1009
				1010	if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
				1011	goto drop;
				1012
				1013	skb_set_network_header(skb, nh);
				1014	}
				1015
				1016	ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
				1017
				1018	switch (skb->ip_summed) {
				1019	case CHECKSUM_COMPLETE:
				1020	if (!csum_fold(skb->csum))
				1021	break;
				1022	/* fall through */
				1023	case CHECKSUM_NONE:
				1024	skb->csum = 0;
				1025	if (__skb_checksum_complete(skb))
				1026	goto error;
				1027	}
				1028
				1029	if (!pskb_pull(skb, sizeof(*icmph)))
				1030	goto error;
				1031
				1032	icmph = icmp_hdr(skb);
				1033
				1034	ICMPMSGIN_INC_STATS_BH(net, icmph->type);
				1035	/*
				1036	* 18 is the highest 'known' ICMP type. Anything else is a mystery
				1037	*
				1038	* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
				1039	* discarded.
				1040	*/
				1041	if (icmph->type > NR_ICMP_TYPES)
				1042	goto error;
				1043
				1044
				1045	/*
				1046	* Parse the ICMP message
				1047	*/
				1048
				1049	if (rt->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST)) {
				1050	/*
				1051	* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
				1052	* silently ignored (we let user decide with a sysctl).
				1053	* RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
				1054	* discarded if to broadcast/multicast.
				1055	*/
				1056	if ((icmph->type == ICMP_ECHO \|\|
				1057	icmph->type == ICMP_TIMESTAMP) &&
				1058	net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
				1059	goto error;
				1060	}
				1061	if (icmph->type != ICMP_ECHO &&
				1062	icmph->type != ICMP_TIMESTAMP &&
				1063	icmph->type != ICMP_ADDRESS &&
				1064	icmph->type != ICMP_ADDRESSREPLY) {
				1065	goto error;
				1066	}
				1067	}
				1068
				1069	icmp_pointers[icmph->type].handler(skb);
				1070
				1071	drop:
				1072	kfree_skb(skb);
				1073	return 0;
				1074	error:
				1075	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
				1076	goto drop;
				1077	}
				1078
				1079	/*
				1080	* This table is the definition of how we handle ICMP.
				1081	*/
				1082	static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
				1083	[ICMP_ECHOREPLY] = {
				1084	.handler = ping_rcv,
				1085	},
				1086	[1] = {
				1087	.handler = icmp_discard,
				1088	.error = 1,
				1089	},
				1090	[2] = {
				1091	.handler = icmp_discard,
				1092	.error = 1,
				1093	},
				1094	[ICMP_DEST_UNREACH] = {
				1095	.handler = icmp_unreach,
				1096	.error = 1,
				1097	},
				1098	[ICMP_SOURCE_QUENCH] = {
				1099	.handler = icmp_unreach,
				1100	.error = 1,
				1101	},
				1102	[ICMP_REDIRECT] = {
				1103	.handler = icmp_redirect,
				1104	.error = 1,
				1105	},
				1106	[6] = {
				1107	.handler = icmp_discard,
				1108	.error = 1,
				1109	},
				1110	[7] = {
				1111	.handler = icmp_discard,
				1112	.error = 1,
				1113	},
				1114	[ICMP_ECHO] = {
				1115	.handler = icmp_echo,
				1116	},
				1117	[9] = {
				1118	.handler = icmp_discard,
				1119	.error = 1,
				1120	},
				1121	[10] = {
				1122	.handler = icmp_discard,
				1123	.error = 1,
				1124	},
				1125	[ICMP_TIME_EXCEEDED] = {
				1126	.handler = icmp_unreach,
				1127	.error = 1,
				1128	},
				1129	[ICMP_PARAMETERPROB] = {
				1130	.handler = icmp_unreach,
				1131	.error = 1,
				1132	},
				1133	[ICMP_TIMESTAMP] = {
				1134	.handler = icmp_timestamp,
				1135	},
				1136	[ICMP_TIMESTAMPREPLY] = {
				1137	.handler = icmp_discard,
				1138	},
				1139	[ICMP_INFO_REQUEST] = {
				1140	.handler = icmp_discard,
				1141	},
				1142	[ICMP_INFO_REPLY] = {
				1143	.handler = icmp_discard,
				1144	},
				1145	[ICMP_ADDRESS] = {
				1146	.handler = icmp_address,
				1147	},
				1148	[ICMP_ADDRESSREPLY] = {
				1149	.handler = icmp_address_reply,
				1150	},
				1151	};
				1152
				1153	static void __net_exit icmp_sk_exit(struct net *net)
				1154	{
				1155	int i;
				1156
				1157	for_each_possible_cpu(i)
				1158	inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
				1159	kfree(net->ipv4.icmp_sk);
				1160	net->ipv4.icmp_sk = NULL;
				1161	}
				1162
				1163	static int __net_init icmp_sk_init(struct net *net)
				1164	{
				1165	int i, err;
				1166
				1167	net->ipv4.icmp_sk =
				1168	kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
				1169	if (net->ipv4.icmp_sk == NULL)
				1170	return -ENOMEM;
				1171
				1172	for_each_possible_cpu(i) {
				1173	struct sock *sk;
				1174
				1175	err = inet_ctl_sock_create(&sk, PF_INET,
				1176	SOCK_RAW, IPPROTO_ICMP, net);
				1177	if (err < 0)
				1178	goto fail;
				1179
				1180	net->ipv4.icmp_sk[i] = sk;
				1181
				1182	/* Enough space for 2 64K ICMP packets, including
				1183	* sk_buff/skb_shared_info struct overhead.
				1184	*/
				1185	sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
				1186
				1187	/*
				1188	* Speedup sock_wfree()
				1189	*/
				1190	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
				1191	inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
				1192	}
				1193
				1194	/* Control parameters for ECHO replies. */
				1195	net->ipv4.sysctl_icmp_echo_ignore_all = 0;
				1196	net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
				1197
				1198	/* Control parameter - ignore bogus broadcast responses? */
				1199	net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
				1200
				1201	/*
				1202	* Configurable global rate limit.
				1203	*
				1204	* ratelimit defines tokens/packet consumed for dst->rate_token
				1205	* bucket ratemask defines which icmp types are ratelimited by
				1206	* setting it's bit position.
				1207	*
				1208	* default:
				1209	* dest unreachable (3), source quench (4),
				1210	* time exceeded (11), parameter problem (12)
				1211	*/
				1212
				1213	net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
				1214	net->ipv4.sysctl_icmp_ratemask = 0x1818;
				1215	net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
				1216
				1217	return 0;
				1218
				1219	fail:
				1220	for_each_possible_cpu(i)
				1221	inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
				1222	kfree(net->ipv4.icmp_sk);
				1223	return err;
				1224	}
				1225
				1226	static struct pernet_operations __net_initdata icmp_sk_ops = {
				1227	.init = icmp_sk_init,
				1228	.exit = icmp_sk_exit,
				1229	};
				1230
				1231	int __init icmp_init(void)
				1232	{
				1233	return register_pernet_subsys(&icmp_sk_ops);
				1234	}