Blame - ap/app/iproute2/iproute2-3.4.0/doc/ip-cref.tex - T106_DC

blob: d8fed66e23af19246bfb970a589894853338ad97 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	\documentstyle[12pt,twoside]{article}
				2	\def\TITLE{IP Command Reference}
				3	\input preamble
				4	\begin{center}
				5	\Large\bf IP Command Reference.
				6	\end{center}
				7
				8
				9	\begin{center}
				10	{ \large Alexey~N.~Kuznetsov } \\
				11	\em Institute for Nuclear Research, Moscow \\
				12	\verb\|kuznet@ms2.inr.ac.ru\| \\
				13	\rm April 14, 1999
				14	\end{center}
				15
				16	\vspace{5mm}
				17
				18	\tableofcontents
				19
				20	\newpage
				21
				22	\section{About this document}
				23
				24	This document presents a comprehensive description of the \verb\|ip\| utility
				25	from the \verb\|iproute2\| package. It is not a tutorial or user's guide.
				26	It is a {\em dictionary\/}, not explaining terms,
				27	but translating them into other terms, which may also be unknown to the reader.
				28	However, the document is self-contained and the reader, provided they have a
				29	basic networking background, will find enough information
				30	and examples to understand and configure Linux-2.2 IP and IPv6
				31	networking.
				32
				33	This document is split into sections explaining \verb\|ip\| commands
				34	and options, decrypting \verb\|ip\| output and containing a few examples.
				35	More voluminous examples and some topics, which require more elaborate
				36	discussion, are in the appendix.
				37
				38	The paragraphs beginning with NB contain side notes, warnings about
				39	bugs and design drawbacks. They may be skipped at the first reading.
				40
				41	\section{{\tt ip} --- command syntax}
				42
				43	The generic form of an \verb\|ip\| command is:
				44	\begin{verbatim}
				45	ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]]
				46	\end{verbatim}
				47	where \verb\|OPTIONS\| is a set of optional modifiers affecting the
				48	general behaviour of the \verb\|ip\| utility or changing its output. All options
				49	begin with the character \verb\|'-'\| and may be used in either long or abbreviated
				50	forms. Currently, the following options are available:
				51
				52	\begin{itemize}
				53	\item \verb\|-V\|, \verb\|-Version\|
				54
				55	--- print the version of the \verb\|ip\| utility and exit.
				56
				57
				58	\item \verb\|-s\|, \verb\|-stats\|, \verb\|-statistics\|
				59
				60	--- output more information. If the option
				61	appears twice or more, the amount of information increases.
				62	As a rule, the information is statistics or some time values.
				63
				64
				65	\item \verb\|-f\|, \verb\|-family\| followed by a protocol family
				66	identifier: \verb\|inet\|, \verb\|inet6\| or \verb\|link\|.
				67
				68	--- enforce the protocol family to use. If the option is not present,
				69	the protocol family is guessed from other arguments. If the rest of the command
				70	line does not give enough information to guess the family, \verb\|ip\| falls back to the default
				71	one, usually \verb\|inet\| or \verb\|any\|. \verb\|link\| is a special family
				72	identifier meaning that no networking protocol is involved.
				73
				74	\item \verb\|-4\|
				75
				76	--- shortcut for \verb\|-family inet\|.
				77
				78	\item \verb\|-6\|
				79
				80	--- shortcut for \verb\|-family inet6\|.
				81
				82	\item \verb\|-0\|
				83
				84	--- shortcut for \verb\|-family link\|.
				85
				86
				87	\item \verb\|-o\|, \verb\|-oneline\|
				88
				89	--- output each record on a single line, replacing line feeds
				90	with the \verb\|'\'\| character. This is convenient when you want to
				91	count records with \verb\|wc\| or to \verb\|grep\| the output. The trivial
				92	script \verb\|rtpr\| converts the output back into readable form.
				93
				94	\item \verb\|-r\|, \verb\|-resolve\|
				95
				96	--- use the system's name resolver to print DNS names instead of
				97	host addresses.
				98
				99	\begin{NB}
				100	Do not use this option when reporting bugs or asking for advice.
				101	\end{NB}
				102	\begin{NB}
				103	\verb\|ip\| never uses DNS to resolve names to addresses.
				104	\end{NB}
				105
				106	\end{itemize}
				107
				108	\verb\|OBJECT\| is the object to manage or to get information about.
				109	The object types currently understood by \verb\|ip\| are:
				110
				111	\begin{itemize}
				112	\item \verb\|link\| --- network device
				113	\item \verb\|address\| --- protocol (IP or IPv6) address on a device
				114	\item \verb\|neighbour\| --- ARP or NDISC cache entry
				115	\item \verb\|route\| --- routing table entry
				116	\item \verb\|rule\| --- rule in routing policy database
				117	\item \verb\|maddress\| --- multicast address
				118	\item \verb\|mroute\| --- multicast routing cache entry
				119	\item \verb\|tunnel\| --- tunnel over IP
				120	\end{itemize}
				121
				122	Again, the names of all objects may be written in full or
				123	abbreviated form, f.e.\ \verb\|address\| is abbreviated as \verb\|addr\|
				124	or just \verb\|a\|.
				125
				126	\verb\|COMMAND\| specifies the action to perform on the object.
				127	The set of possible actions depends on the object type.
				128	As a rule, it is possible to \verb\|add\|, \verb\|delete\| and
				129	\verb\|show\| (or \verb\|list\|) objects, but some objects
				130	do not allow all of these operations or have some additional commands.
				131	The \verb\|help\| command is available for all objects. It prints
				132	out a list of available commands and argument syntax conventions.
				133
				134	If no command is given, some default command is assumed.
				135	Usually it is \verb\|list\| or, if the objects of this class
				136	cannot be listed, \verb\|help\|.
				137
				138	\verb\|ARGUMENTS\| is a list of arguments to the command.
				139	The arguments depend on the command and object. There are two types of arguments:
				140	{\em flags\/}, consisting of a single keyword, and {\em parameters\/},
				141	consisting of a keyword followed by a value. For convenience,
				142	each command has some {\em default parameter\/}
				143	which may be omitted. F.e.\ parameter \verb\|dev\| is the default
				144	for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent
				145	to {\tt ip link ls dev eth0}.
				146	In the command descriptions below such parameters
				147	are distinguished with the marker: ``(default)''.
				148
				149	Almost all keywords may be abbreviated with several first (or even single)
				150	letters. The shortcuts are convenient when \verb\|ip\| is used interactively,
				151	but they are not recommended in scripts or when reporting bugs
				152	or asking for advice. ``Officially'' allowed abbreviations are listed
				153	in the document body.
				154
				155
				156
				157	\section{{\tt ip} --- error messages}
				158
				159	\verb\|ip\| may fail for one of the following reasons:
				160
				161	\begin{itemize}
				162	\item
				163	A syntax error on the command line: an unknown keyword, incorrectly formatted
				164	IP address {\em et al\/}. In this case \verb\|ip\| prints an error message
				165	and exits. As a rule, the error message will contain information
				166	about the reason for the failure. Sometimes it also prints a help page.
				167
				168	\item
				169	The arguments did not pass verification for self-consistency.
				170
				171	\item
				172	\verb\|ip\| failed to compile a kernel request from the arguments
				173	because the user didn't give enough information.
				174
				175	\item
				176	The kernel returned an error to some syscall. In this case \verb\|ip\|
				177	prints the error message, as it is output with \verb\|perror(3)\|,
				178	prefixed with a comment and a syscall identifier.
				179
				180	\item
				181	The kernel returned an error to some RTNETLINK request.
				182	In this case \verb\|ip\| prints the error message, as it is output
				183	with \verb\|perror(3)\| prefixed with ``RTNETLINK answers:''.
				184
				185	\end{itemize}
				186
				187	All the operations are atomic, i.e.\
				188	if the \verb\|ip\| utility fails, it does not change anything
				189	in the system. One harmful exception is \verb\|ip link\| command
				190	(Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}),
				191	which may change only some of the device parameters given
				192	on command line.
				193
				194	It is difficult to list all the error messages (especially
				195	syntax errors). However, as a rule, their meaning is clear
				196	from the context of the command.
				197
				198	The most common mistakes are:
				199
				200	\begin{enumerate}
				201	\item Netlink is not configured in the kernel. The message is:
				202	\begin{verbatim}
				203	Cannot open netlink socket: Invalid value
				204	\end{verbatim}
				205
				206	\item RTNETLINK is not configured in the kernel. In this case
				207	one of the following messages may be printed, depending on the command:
				208	\begin{verbatim}
				209	Cannot talk to rtnetlink: Connection refused
				210	Cannot send dump request: Connection refused
				211	\end{verbatim}
				212
				213	\item The \verb\|CONFIG_IP_MULTIPLE_TABLES\| option was not selected
				214	when configuring the kernel. In this case any attempt to use the
				215	\verb\|ip\| \verb\|rule\| command will fail, f.e.
				216	\begin{verbatim}
				217	kuznet@kaiser $ ip rule list
				218	RTNETLINK error: Invalid argument
				219	dump terminated
				220	\end{verbatim}
				221
				222	\end{enumerate}
				223
				224
				225	\section{{\tt ip link} --- network device configuration}
				226	\label{IP-LINK}
				227
				228	\paragraph{Object:} A \verb\|link\| is a network device and the corresponding
				229	commands display and change the state of devices.
				230
				231	\paragraph{Commands:} \verb\|set\| and \verb\|show\| (or \verb\|list\|).
				232
				233	\subsection{{\tt ip link set} --- change device attributes}
				234
				235	\paragraph{Abbreviations:} \verb\|set\|, \verb\|s\|.
				236
				237	\paragraph{Arguments:}
				238
				239	\begin{itemize}
				240	\item \verb\|dev NAME\| (default)
				241
				242	--- \verb\|NAME\| specifies the network device on which to operate.
				243
				244	\item \verb\|up\| and \verb\|down\|
				245
				246	--- change the state of the device to \verb\|UP\| or \verb\|DOWN\|.
				247
				248	\item \verb\|arp on\| or \verb\|arp off\|
				249
				250	--- change the \verb\|NOARP\| flag on the device.
				251
				252	\begin{NB}
				253	This operation is {\em not allowed\/} if the device is in state \verb\|UP\|.
				254	Though neither the \verb\|ip\| utility nor the kernel check for this condition.
				255	You can get unpredictable results changing this flag while the
				256	device is running.
				257	\end{NB}
				258
				259	\item \verb\|multicast on\| or \verb\|multicast off\|
				260
				261	--- change the \verb\|MULTICAST\| flag on the device.
				262
				263	\item \verb\|dynamic on\| or \verb\|dynamic off\|
				264
				265	--- change the \verb\|DYNAMIC\| flag on the device.
				266
				267	\item \verb\|name NAME\|
				268
				269	--- change the name of the device. This operation is not
				270	recommended if the device is running or has some addresses
				271	already configured.
				272
				273	\item \verb\|txqueuelen NUMBER\| or \verb\|txqlen NUMBER\|
				274
				275	--- change the transmit queue length of the device.
				276
				277	\item \verb\|mtu NUMBER\|
				278
				279	--- change the MTU of the device.
				280
				281	\item \verb\|address LLADDRESS\|
				282
				283	--- change the station address of the interface.
				284
				285	\item \verb\|broadcast LLADDRESS\|, \verb\|brd LLADDRESS\| or \verb\|peer LLADDRESS\|
				286
				287	--- change the link layer broadcast address or the peer address when
				288	the interface is \verb\|POINTOPOINT\|.
				289
				290	\vskip 1mm
				291	\begin{NB}
				292	For most devices (f.e.\ for Ethernet) changing the link layer
				293	broadcast address will break networking.
				294	Do not use it, if you do not understand what this operation really does.
				295	\end{NB}
				296
				297	\item \verb\|netns PID\|
				298
				299	--- move the device to the network namespace associated with the process PID.
				300
				301	\end{itemize}
				302
				303	\vskip 1mm
				304	\begin{NB}
				305	The \verb\|PROMISC\| and \verb\|ALLMULTI\| flags are considered
				306	obsolete and should not be changed administratively, though
				307	the {\tt ip} utility will allow that.
				308	\end{NB}
				309
				310	\paragraph{Warning:} If multiple parameter changes are requested,
				311	\verb\|ip\| aborts immediately after any of the changes have failed.
				312	This is the only case when \verb\|ip\| can move the system to
				313	an unpredictable state. The solution is to avoid changing
				314	several parameters with one {\tt ip link set} call.
				315
				316	\paragraph{Examples:}
				317	\begin{itemize}
				318	\item \verb\|ip link set dummy address 00:00:00:00:00:01\|
				319
				320	--- change the station address of the interface \verb\|dummy\|.
				321
				322	\item \verb\|ip link set dummy up\|
				323
				324	--- start the interface \verb\|dummy\|.
				325
				326	\end{itemize}
				327
				328
				329	\subsection{{\tt ip link show} --- display device attributes}
				330	\label{IP-LINK-SHOW}
				331
				332	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|lst\|, \verb\|sh\|, \verb\|ls\|,
				333	\verb\|l\|.
				334
				335	\paragraph{Arguments:}
				336	\begin{itemize}
				337	\item \verb\|dev NAME\| (default)
				338
				339	--- \verb\|NAME\| specifies the network device to show.
				340	If this argument is omitted all devices are listed.
				341
				342	\item \verb\|up\|
				343
				344	--- only display running interfaces.
				345
				346	\end{itemize}
				347
				348
				349	\paragraph{Output format:}
				350
				351	\begin{verbatim}
				352	kuznet@alisa:~ $ ip link ls eth0
				353	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				354	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				355	kuznet@alisa:~ $ ip link ls sit0
				356	5: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue
				357	link/sit 0.0.0.0 brd 0.0.0.0
				358	kuznet@alisa:~ $ ip link ls dummy
				359	2: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop
				360	link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
				361	kuznet@alisa:~ $
				362	\end{verbatim}
				363
				364
				365	The number before each colon is an {\em interface index\/} or {\em ifindex\/}.
				366	This number uniquely identifies the interface. This is followed by the {\em interface name\/}
				367	(\verb\|eth0\|, \verb\|sit0\| etc.). The interface name is also
				368	unique at every given moment. However, the interface may disappear from the
				369	list (f.e.\ when the corresponding driver module is unloaded) and another
				370	one with the same name may be created later. Besides that,
				371	the administrator may change the name of any device with
				372	\verb\|ip\| \verb\|link\| \verb\|set\| \verb\|name\|
				373	to make it more intelligible.
				374
				375	The interface name may have another name or \verb\|NONE\| appended
				376	after the \verb\|@\| sign. This means that this device is bound to some other
				377	device,
				378	i.e.\ packets send through it are encapsulated and sent via the ``master''
				379	device. If the name is \verb\|NONE\|, the master is unknown.
				380
				381	Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines
				382	the maximal size of data which can be sent as a single packet over this interface.
				383
				384	{\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used
				385	on the interface. Particularly, \verb\|noqueue\| means that this interface
				386	does not queue anything and \verb\|noop\| means that the interface is in blackhole
				387	mode i.e.\ all packets sent to it are immediately discarded.
				388	{\em qlen\/} is the default transmit queue length of the device measured
				389	in packets.
				390
				391	The interface flags are summarized in the angle brackets.
				392
				393	\begin{itemize}
				394	\item \verb\|UP\| --- the device is turned on. It is ready to accept
				395	packets for transmission and it may inject into the kernel packets received
				396	from other nodes on the network.
				397
				398	\item \verb\|LOOPBACK\| --- the interface does not communicate with other
				399	hosts. All packets sent through it will be returned
				400	and nothing but bounced packets can be received.
				401
				402	\item \verb\|BROADCAST\| --- the device has the facility to send packets
				403	to all hosts sharing the same link. A typical example is an Ethernet link.
				404
				405	\item \verb\|POINTOPOINT\| --- the link has only two ends with one node
				406	attached to each end. All packets sent to this link will reach the peer
				407	and all packets received by us came from this single peer.
				408
				409	If neither \verb\|LOOPBACK\| nor \verb\|BROADCAST\| nor \verb\|POINTOPOINT\|
				410	are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access).
				411	This is the most generic type of device and the most complicated one, because
				412	the host attached to a NBMA link has no means to send to anyone
				413	without additionally configured information.
				414
				415	\item \verb\|MULTICAST\| --- is an advisory flag indicating that the interface
				416	is aware of multicasting i.e.\ sending packets to some subset of neighbouring
				417	nodes. Broadcasting is a particular case of multicasting, where the multicast
				418	group consists of all nodes on the link. It is important to emphasize
				419	that software {\em must not\/} interpret the absence of this flag as the inability
				420	to use multicasting on this interface. Any \verb\|POINTOPOINT\| and
				421	\verb\|BROADCAST\| link is multicasting by definition, because we have
				422	direct access to all the neighbours and, hence, to any part of them.
				423	Certainly, the use of high bandwidth multicast transfers is not recommended
				424	on broadcast-only links because of high expense, but it is not strictly
				425	prohibited.
				426
				427	\item \verb\|PROMISC\| --- the device listens to and feeds to the kernel all
				428	traffic on the link even if it is not destined for us, not broadcasted
				429	and not destined for a multicast group of which we are member. Usually
				430	this mode exists only on broadcast links and is used by bridges and for network
				431	monitoring.
				432
				433	\item \verb\|ALLMULTI\| --- the device receives all multicast packets
				434	wandering on the link. This mode is used by multicast routers.
				435
				436	\item \verb\|NOARP\| --- this flag is different from the other ones. It has
				437	no invariant value and its interpretation depends on the network protocols
				438	involved. As a rule, it indicates that the device needs no address
				439	resolution and that the software or hardware knows how to deliver packets
				440	without any help from the protocol stacks.
				441
				442	\item \verb\|DYNAMIC\| --- is an advisory flag indicating that the interface is
				443	dynamically created and destroyed.
				444
				445	\item \verb\|SLAVE\| --- this interface is bonded to some other interfaces
				446	to share link capacities.
				447
				448	\end{itemize}
				449
				450	\vskip 1mm
				451	\begin{NB}
				452	There are other flags but they are either obsolete (\verb\|NOTRAILERS\|)
				453	or not implemented (\verb\|DEBUG\|) or specific to some devices
				454	(\verb\|MASTER\|, \verb\|AUTOMEDIA\| and \verb\|PORTSEL\|). We do not discuss
				455	them here.
				456	\end{NB}
				457
				458
				459	The second line contains information on the link layer addresses
				460	associated with the device. The first word (\verb\|ether\|, \verb\|sit\|)
				461	defines the interface hardware type. This type determines the format and semantics
				462	of the addresses and is logically part of the address.
				463	The default format of the station address and the broadcast address
				464	(or the peer address for pointopoint links) is a
				465	sequence of hexadecimal bytes separated by colons, but some link
				466	types may have their natural address format, f.e.\ addresses
				467	of tunnels over IP are printed as dotted-quad IP addresses.
				468
				469	\vskip 1mm
				470	\begin{NB}
				471	NBMA links have no well-defined broadcast or peer address,
				472	however this field may contain useful information, f.e.\
				473	about the address of broadcast relay or about the address of the ARP server.
				474	\end{NB}
				475	\begin{NB}
				476	Multicast addresses are not shown by this command, see
				477	\verb\|ip maddr ls\| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this
				478	document).
				479	\end{NB}
				480
				481
				482	\paragraph{Statistics:} With the \verb\|-statistics\| option, \verb\|ip\| also
				483	prints interface statistics:
				484
				485	\begin{verbatim}
				486	kuznet@alisa:~ $ ip -s link ls eth0
				487	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				488	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				489	RX: bytes packets errors dropped overrun mcast
				490	2449949362 2786187 0 0 0 0
				491	TX: bytes packets errors dropped carrier collsns
				492	178558497 1783945 332 0 332 35172
				493	kuznet@alisa:~ $
				494	\end{verbatim}
				495	\verb\|RX:\| and \verb\|TX:\| lines summarize receiver and transmitter
				496	statistics. They contain:
				497	\begin{itemize}
				498	\item \verb\|bytes\| --- the total number of bytes received or transmitted
				499	on the interface. This number wraps when the maximal length of the data type
				500	natural for the architecture is exceeded, so continuous monitoring requires
				501	a user level daemon snapping it periodically.
				502	\item \verb\|packets\| --- the total number of packets received or transmitted
				503	on the interface.
				504	\item \verb\|errors\| --- the total number of receiver or transmitter errors.
				505	\item \verb\|dropped\| --- the total number of packets dropped due to lack
				506	of resources.
				507	\item \verb\|overrun\| --- the total number of receiver overruns resulting
				508	in dropped packets. As a rule, if the interface is overrun, it means
				509	serious problems in the kernel or that your machine is too slow
				510	for this interface.
				511	\item \verb\|mcast\| --- the total number of received multicast packets. This option
				512	is only supported by a few devices.
				513	\item \verb\|carrier\| --- total number of link media failures f.e.\ because
				514	of lost carrier.
				515	\item \verb\|collsns\| --- the total number of collision events
				516	on Ethernet-like media. This number may have a different sense on other
				517	link types.
				518	\item \verb\|compressed\| --- the total number of compressed packets. This is
				519	available only for links using VJ header compression.
				520	\end{itemize}
				521
				522
				523	If the \verb\|-s\| option is entered twice or more,
				524	\verb\|ip\| prints more detailed statistics on receiver
				525	and transmitter errors.
				526
				527	\begin{verbatim}
				528	kuznet@alisa:~ $ ip -s -s link ls eth0
				529	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				530	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				531	RX: bytes packets errors dropped overrun mcast
				532	2449949362 2786187 0 0 0 0
				533	RX errors: length crc frame fifo missed
				534	0 0 0 0 0
				535	TX: bytes packets errors dropped carrier collsns
				536	178558497 1783945 332 0 332 35172
				537	TX errors: aborted fifo window heartbeat
				538	0 0 0 332
				539	kuznet@alisa:~ $
				540	\end{verbatim}
				541	These error names are pure Ethernetisms. Other devices
				542	may have non zero values in these fields but they may be
				543	interpreted differently.
				544
				545
				546	\section{{\tt ip address} --- protocol address management}
				547
				548	\paragraph{Abbreviations:} \verb\|address\|, \verb\|addr\|, \verb\|a\|.
				549
				550	\paragraph{Object:} The \verb\|address\| is a protocol (IP or IPv6) address attached
				551	to a network device. Each device must have at least one address
				552	to use the corresponding protocol. It is possible to have several
				553	different addresses attached to one device. These addresses are not
				554	discriminated, so that the term {\em alias\/} is not quite appropriate
				555	for them and we do not use it in this document.
				556
				557	The \verb\|ip addr\| command displays addresses and their properties,
				558	adds new addresses and deletes old ones.
				559
				560	\paragraph{Commands:} \verb\|add\|, \verb\|delete\|, \verb\|flush\| and \verb\|show\|
				561	(or \verb\|list\|).
				562
				563
				564	\subsection{{\tt ip address add} --- add a new protocol address}
				565	\label{IP-ADDR-ADD}
				566
				567	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|.
				568
				569	\paragraph{Arguments:}
				570
				571	\begin{itemize}
				572	\item \verb\|dev NAME\|
				573
				574	\noindent--- the name of the device to add the address to.
				575
				576	\item \verb\|local ADDRESS\| (default)
				577
				578	--- the address of the interface. The format of the address depends
				579	on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords
				580	separated by colons for IPv6. The \verb\|ADDRESS\| may be followed by
				581	a slash and a decimal number which encodes the network prefix length.
				582
				583
				584	\item \verb\|peer ADDRESS\|
				585
				586	--- the address of the remote endpoint for pointopoint interfaces.
				587	Again, the \verb\|ADDRESS\| may be followed by a slash and a decimal number,
				588	encoding the network prefix length. If a peer address is specified,
				589	the local address {\em cannot\/} have a prefix length. The network prefix is associated
				590	with the peer rather than with the local address.
				591
				592
				593	\item \verb\|broadcast ADDRESS\|
				594
				595	--- the broadcast address on the interface.
				596
				597	It is possible to use the special symbols \verb\|'+'\| and \verb\|'-'\|
				598	instead of the broadcast address. In this case, the broadcast address
				599	is derived by setting/resetting the host bits of the interface prefix.
				600
				601	\vskip 1mm
				602	\begin{NB}
				603	Unlike \verb\|ifconfig\|, the \verb\|ip\| utility {\em does not\/} set any broadcast
				604	address unless explicitly requested.
				605	\end{NB}
				606
				607
				608	\item \verb\|label NAME\|
				609
				610	--- Each address may be tagged with a label string.
				611	In order to preserve compatibility with Linux-2.0 net aliases,
				612	this string must coincide with the name of the device or must be prefixed
				613	with the device name followed by colon.
				614
				615
				616	\item \verb\|scope SCOPE_VALUE\|
				617
				618	--- the scope of the area where this address is valid.
				619	The available scopes are listed in file \verb\|/etc/iproute2/rt_scopes\|.
				620	Predefined scope values are:
				621
				622	\begin{itemize}
				623	\item \verb\|global\| --- the address is globally valid.
				624	\item \verb\|site\| --- (IPv6 only) the address is site local,
				625	i.e.\ it is valid inside this site.
				626	\item \verb\|link\| --- the address is link local, i.e.\
				627	it is valid only on this device.
				628	\item \verb\|host\| --- the address is valid only inside this host.
				629	\end{itemize}
				630
				631	Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document)
				632	contains more details on address scopes.
				633
				634	\end{itemize}
				635
				636	\paragraph{Examples:}
				637	\begin{itemize}
				638	\item \verb\|ip addr add 127.0.0.1/8 dev lo brd + scope host\|
				639
				640	--- add the usual loopback address to the loopback device.
				641
				642	\item \verb\|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias\|
				643
				644	--- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask
				645	\verb\|255.255.255.0\|), standard broadcast and label \verb\|eth0:Alias\|
				646	to the interface \verb\|eth0\|.
				647	\end{itemize}
				648
				649
				650	\subsection{{\tt ip address delete} --- delete a protocol address}
				651
				652	\paragraph{Abbreviations:} \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				653
				654	\paragraph{Arguments:} coincide with the arguments of \verb\|ip addr add\|.
				655	The device name is a required argument. The rest are optional.
				656	If no arguments are given, the first address is deleted.
				657
				658	\paragraph{Examples:}
				659	\begin{itemize}
				660	\item \verb\|ip addr del 127.0.0.1/8 dev lo\|
				661
				662	--- deletes the loopback address from the loopback device.
				663	It would be best not to repeat this experiment.
				664
				665	\item Disable IP on the interface \verb\|eth0\|:
				666	\begin{verbatim}
				667	while ip -f inet addr del dev eth0; do
				668	: nothing
				669	done
				670	\end{verbatim}
				671	Another method to disable IP on an interface using {\tt ip addr flush}
				672	may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}.
				673
				674	\end{itemize}
				675
				676
				677	\subsection{{\tt ip address show} --- display protocol addresses}
				678
				679	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|lst\|, \verb\|sh\|, \verb\|ls\|,
				680	\verb\|l\|.
				681
				682	\paragraph{Arguments:}
				683
				684	\begin{itemize}
				685	\item \verb\|dev NAME\| (default)
				686
				687	--- the name of the device.
				688
				689	\item \verb\|scope SCOPE_VAL\|
				690
				691	--- only list addresses with this scope.
				692
				693	\item \verb\|to PREFIX\|
				694
				695	--- only list addresses matching this prefix.
				696
				697	\item \verb\|label PATTERN\|
				698
				699	--- only list addresses with labels matching the \verb\|PATTERN\|.
				700	\verb\|PATTERN\| is a usual shell style pattern.
				701
				702
				703	\item \verb\|dynamic\| and \verb\|permanent\|
				704
				705	--- (IPv6 only) only list addresses installed due to stateless
				706	address configuration or only list permanent (not dynamic) addresses.
				707
				708	\item \verb\|tentative\|
				709
				710	--- (IPv6 only) only list addresses which did not pass duplicate
				711	address detection.
				712
				713	\item \verb\|deprecated\|
				714
				715	--- (IPv6 only) only list deprecated addresses.
				716
				717
				718	\item \verb\|primary\| and \verb\|secondary\|
				719
				720	--- only list primary (or secondary) addresses.
				721
				722	\end{itemize}
				723
				724
				725	\paragraph{Output format:}
				726
				727	\begin{verbatim}
				728	kuznet@alisa:~ $ ip addr ls eth0
				729	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				730	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				731	inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0
				732	inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic
				733	valid_lft forever preferred_lft 604746sec
				734	inet6 fe80::2a0:ccff:fe66:1878/10 scope link
				735	kuznet@alisa:~ $
				736	\end{verbatim}
				737
				738	The first two lines coincide with the output of \verb\|ip link ls\|.
				739	It is natural to interpret link layer addresses
				740	as addresses of the protocol family \verb\|AF_PACKET\|.
				741
				742	Then the list of IP and IPv6 addresses follows, accompanied by
				743	additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD},
				744	p.\pageref{IP-ADDR-ADD} above), flags and the address label.
				745
				746	Address flags are set by the kernel and cannot be changed
				747	administratively. Currently, the following flags are defined:
				748
				749	\begin{enumerate}
				750	\item \verb\|secondary\|
				751
				752	--- the address is not used when selecting the default source address
				753	of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.).
				754	An IP address becomes secondary if another address with the same
				755	prefix bits already exists. The first address is primary.
				756	It is the leader of the group of all secondary addresses. When the leader
				757	is deleted, all secondaries are purged too.
				758	There is a tweak in \verb\|/proc/sys/net/ipv4/conf/<dev>/promote_secondaries\|
				759	which activate secondaries promotion when a primary is deleted.
				760	To permanently enable this feature on all devices add
				761	\verb\|net.ipv4.conf.all.promote_secondaries=1\| to \verb\|/etc/sysctl.conf\|.
				762	This tweak is available in linux 2.6.15 and later.
				763
				764
				765	\item \verb\|dynamic\|
				766
				767	--- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}.
				768	In this case the output also contains information on times, when
				769	the address is still valid. After \verb\|preferred_lft\| expires the address is
				770	moved to the deprecated state. After \verb\|valid_lft\| expires the address
				771	is finally invalidated.
				772
				773	\item \verb\|deprecated\|
				774
				775	--- the address is deprecated, i.e.\ it is still valid, but cannot
				776	be used by newly created connections.
				777
				778	\item \verb\|tentative\|
				779
				780	--- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF}
				781	is still not complete or failed.
				782
				783	\end{enumerate}
				784
				785
				786	\subsection{{\tt ip address flush} --- flush protocol addresses}
				787	\label{IP-ADDR-FLUSH}
				788
				789	\paragraph{Abbreviations:} \verb\|flush\|, \verb\|f\|.
				790
				791	\paragraph{Description:}This command flushes the protocol addresses
				792	selected by some criteria.
				793
				794	\paragraph{Arguments:} This command has the same arguments as \verb\|show\|.
				795	The difference is that it does not run when no arguments are given.
				796
				797	\paragraph{Warning:} This command (and other \verb\|flush\| commands
				798	described below) is pretty dangerous. If you make a mistake, it will
				799	not forgive it, but will cruelly purge all the addresses.
				800
				801	\paragraph{Statistics:} With the \verb\|-statistics\| option, the command
				802	becomes verbose. It prints out the number of deleted addresses and the number
				803	of rounds made to flush the address list. If this option is given
				804	twice, \verb\|ip addr flush\| also dumps all the deleted addresses
				805	in the format described in the previous subsection.
				806
				807	\paragraph{Example:} Delete all the addresses from the private network
				808	10.0.0.0/8:
				809	\begin{verbatim}
				810	netadm@amber:~ # ip -s -s a f to 10/8
				811	2: dummy inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy
				812	3: eth0 inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0
				813	4: eth1 inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1
				814
				815	* Round 1, deleting 3 addresses *
				816	* Flush is complete after 1 round *
				817	netadm@amber:~ #
				818	\end{verbatim}
				819	Another instructive example is disabling IP on all the Ethernets:
				820	\begin{verbatim}
				821	netadm@amber:~ # ip -4 addr flush label "eth*"
				822	\end{verbatim}
				823	And the last example shows how to flush all the IPv6 addresses
				824	acquired by the host from stateless address autoconfiguration
				825	after you enabled forwarding or disabled autoconfiguration.
				826	\begin{verbatim}
				827	netadm@amber:~ # ip -6 addr flush dynamic
				828	\end{verbatim}
				829
				830
				831
				832	\section{{\tt ip neighbour} --- neighbour/arp tables management}
				833
				834	\paragraph{Abbreviations:} \verb\|neighbour\|, \verb\|neighbor\|, \verb\|neigh\|,
				835	\verb\|n\|.
				836
				837	\paragraph{Object:} \verb\|neighbour\| objects establish bindings between protocol
				838	addresses and link layer addresses for hosts sharing the same link.
				839	Neighbour entries are organized into tables. The IPv4 neighbour table
				840	is known by another name --- the ARP table.
				841
				842	The corresponding commands display neighbour bindings
				843	and their properties, add new neighbour entries and delete old ones.
				844
				845	\paragraph{Commands:} \verb\|add\|, \verb\|change\|, \verb\|replace\|,
				846	\verb\|delete\|, \verb\|flush\| and \verb\|show\| (or \verb\|list\|).
				847
				848	\paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH}
				849	describes how to manage proxy ARP/NDISC with the \verb\|ip\| utility.
				850
				851
				852	\subsection{{\tt ip neighbour add} --- add a new neighbour entry\\
				853	{\tt ip neighbour change} --- change an existing entry\\
				854	{\tt ip neighbour replace} --- add a new entry or change an existing one}
				855
				856	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|change\|, \verb\|chg\|;
				857	\verb\|replace\|, \verb\|repl\|.
				858
				859	\paragraph{Description:} These commands create new neighbour records
				860	or update existing ones.
				861
				862	\paragraph{Arguments:}
				863
				864	\begin{itemize}
				865	\item \verb\|to ADDRESS\| (default)
				866
				867	--- the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
				868
				869	\item \verb\|dev NAME\|
				870
				871	--- the interface to which this neighbour is attached.
				872
				873
				874	\item \verb\|lladdr LLADDRESS\|
				875
				876	--- the link layer address of the neighbour. \verb\|LLADDRESS\| can also be
				877	\verb\|null\|.
				878
				879	\item \verb\|nud NUD_STATE\|
				880
				881	--- the state of the neighbour entry. \verb\|nud\| is an abbreviation for ``Neighbour
				882	Unreachability Detection''. The state can take one of the following values:
				883
				884	\begin{enumerate}
				885	\item \verb\|permanent\| --- the neighbour entry is valid forever and can be only be removed
				886	administratively.
				887	\item \verb\|noarp\| --- the neighbour entry is valid. No attempts to validate
				888	this entry will be made but it can be removed when its lifetime expires.
				889	\item \verb\|reachable\| --- the neighbour entry is valid until the reachability
				890	timeout expires.
				891	\item \verb\|stale\| --- the neighbour entry is valid but suspicious.
				892	This option to \verb\|ip neigh\| does not change the neighbour state if
				893	it was valid and the address is not changed by this command.
				894	\end{enumerate}
				895
				896	\end{itemize}
				897
				898	\paragraph{Examples:}
				899	\begin{itemize}
				900	\item \verb\|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm\|
				901
				902	--- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb\|eth0\|.
				903
				904	\item \verb\|ip neigh chg 10.0.0.3 dev eth0 nud reachable\|
				905
				906	--- change its state to \verb\|reachable\|.
				907	\end{itemize}
				908
				909
				910	\subsection{{\tt ip neighbour delete} --- delete a neighbour entry}
				911
				912	\paragraph{Abbreviations:} \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				913
				914	\paragraph{Description:} This command invalidates a neighbour entry.
				915
				916	\paragraph{Arguments:} The arguments are the same as with \verb\|ip neigh add\|,
				917	except that \verb\|lladdr\| and \verb\|nud\| are ignored.
				918
				919
				920	\paragraph{Example:}
				921	\begin{itemize}
				922	\item \verb\|ip neigh del 10.0.0.3 dev eth0\|
				923
				924	--- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb\|eth0\|.
				925
				926	\end{itemize}
				927
				928	\begin{NB}
				929	The deleted neighbour entry will not disappear from the tables
				930	immediately. If it is in use it cannot be deleted until the last
				931	client releases it. Otherwise it will be destroyed during
				932	the next garbage collection.
				933	\end{NB}
				934
				935
				936	\paragraph{Warning:} Attempts to delete or manually change
				937	a \verb\|noarp\| entry created by the kernel may result in unpredictable behaviour.
				938	Particularly, the kernel may try to resolve this address even
				939	on a \verb\|NOARP\| interface or if the address is multicast or broadcast.
				940
				941
				942	\subsection{{\tt ip neighbour show} --- list neighbour entries}
				943
				944	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|.
				945
				946	\paragraph{Description:}This commands displays neighbour tables.
				947
				948	\paragraph{Arguments:}
				949
				950	\begin{itemize}
				951
				952	\item \verb\|to ADDRESS\| (default)
				953
				954	--- the prefix selecting the neighbours to list.
				955
				956	\item \verb\|dev NAME\|
				957
				958	--- only list the neighbours attached to this device.
				959
				960	\item \verb\|unused\|
				961
				962	--- only list neighbours which are not currently in use.
				963
				964	\item \verb\|nud NUD_STATE\|
				965
				966	--- only list neighbour entries in this state. \verb\|NUD_STATE\| takes
				967	values listed below or the special value \verb\|all\| which means all states.
				968	This option may occur more than once. If this option is absent, \verb\|ip\|
				969	lists all entries except for \verb\|none\| and \verb\|noarp\|.
				970
				971	\end{itemize}
				972
				973
				974	\paragraph{Output format:}
				975
				976	\begin{verbatim}
				977	kuznet@alisa:~ $ ip neigh ls
				978	:: dev lo lladdr 00:00:00:00:00:00 nud noarp
				979	fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \
				980	nud stale
				981	0.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp
				982	193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable
				983	193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale
				984	kuznet@alisa:~ $
				985	\end{verbatim}
				986
				987	The first word of each line is the protocol address of the neighbour.
				988	Then the device name follows. The rest of the line describes the contents of
				989	the neighbour entry identified by the pair (device, address).
				990
				991	\verb\|lladdr\| is the link layer address of the neighbour.
				992
				993	\verb\|nud\| is the state of the ``neighbour unreachability detection'' machine
				994	for this entry. The detailed description of the neighbour
				995	state machine can be found in~\cite{RFC-NDISC}. Here is the full list
				996	of the states with short descriptions:
				997
				998	\begin{enumerate}
				999	\item\verb\|none\| --- the state of the neighbour is void.
				1000	\item\verb\|incomplete\| --- the neighbour is in the process of resolution.
				1001	\item\verb\|reachable\| --- the neighbour is valid and apparently reachable.
				1002	\item\verb\|stale\| --- the neighbour is valid, but is probably already
				1003	unreachable, so the kernel will try to check it at the first transmission.
				1004	\item\verb\|delay\| --- a packet has been sent to the stale neighbour and the kernel is waiting
				1005	for confirmation.
				1006	\item\verb\|probe\| --- the delay timer expired but no confirmation was received.
				1007	The kernel has started to probe the neighbour with ARP/NDISC messages.
				1008	\item\verb\|failed\| --- resolution has failed.
				1009	\item\verb\|noarp\| --- the neighbour is valid. No attempts to check the entry
				1010	will be made.
				1011	\item\verb\|permanent\| --- it is a \verb\|noarp\| entry, but only the administrator
				1012	may remove the entry from the neighbour table.
				1013	\end{enumerate}
				1014
				1015	The link layer address is valid in all states except for \verb\|none\|,
				1016	\verb\|failed\| and \verb\|incomplete\|.
				1017
				1018	IPv6 neighbours can be marked with the additional flag \verb\|router\|
				1019	which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}.
				1020
				1021	\paragraph{Statistics:} The \verb\|-statistics\| option displays some usage
				1022	statistics, f.e.\
				1023
				1024	\begin{verbatim}
				1025	kuznet@alisa:~ $ ip -s n ls 193.233.7.254
				1026	193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
				1027	nud reachable
				1028	kuznet@alisa:~ $
				1029	\end{verbatim}
				1030
				1031	Here \verb\|ref\| is the number of users of this entry
				1032	and \verb\|used\| is a triplet of time intervals in seconds
				1033	separated by slashes. In this case they show that:
				1034
				1035	\begin{enumerate}
				1036	\item the entry was used 12 seconds ago.
				1037	\item the entry was confirmed 13 seconds ago.
				1038	\item the entry was updated 20 seconds ago.
				1039	\end{enumerate}
				1040
				1041	\subsection{{\tt ip neighbour flush} --- flush neighbour entries}
				1042
				1043	\paragraph{Abbreviations:} \verb\|flush\|, \verb\|f\|.
				1044
				1045	\paragraph{Description:}This command flushes neighbour tables, selecting
				1046	entries to flush by some criteria.
				1047
				1048	\paragraph{Arguments:} This command has the same arguments as \verb\|show\|.
				1049	The differences are that it does not run when no arguments are given,
				1050	and that the default neighbour states to be flushed do not include
				1051	\verb\|permanent\| and \verb\|noarp\|.
				1052
				1053
				1054	\paragraph{Statistics:} With the \verb\|-statistics\| option, the command
				1055	becomes verbose. It prints out the number of deleted neighbours and the number
				1056	of rounds made to flush the neighbour table. If the option is given
				1057	twice, \verb\|ip neigh flush\| also dumps all the deleted neighbours
				1058	in the format described in the previous subsection.
				1059
				1060	\paragraph{Example:}
				1061	\begin{verbatim}
				1062	netadm@alisa:~ # ip -s -s n f 193.233.7.254
				1063	193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
				1064	nud reachable
				1065
				1066	* Round 1, deleting 1 entries *
				1067	* Flush is complete after 1 round *
				1068	netadm@alisa:~ #
				1069	\end{verbatim}
				1070
				1071
				1072	\section{{\tt ip route} --- routing table management}
				1073	\label{IP-ROUTE}
				1074
				1075	\paragraph{Abbreviations:} \verb\|route\|, \verb\|ro\|, \verb\|r\|.
				1076
				1077	\paragraph{Object:} \verb\|route\| entries in the kernel routing tables keep
				1078	information about paths to other networked nodes.
				1079
				1080	Each route entry has a {\em key\/} consisting of a {\em prefix\/}
				1081	(i.e.\ a pair containing a network address and the length of its mask) and,
				1082	optionally, the TOS value. An IP packet matches the route if the highest
				1083	bits of its destination address are equal to the route prefix at least
				1084	up to the prefix length and if the TOS of the route is zero or equal to
				1085	the TOS of the packet.
				1086
				1087	If several routes match the packet, the following pruning rules
				1088	are used to select the best one (see~\cite{RFC1812}):
				1089	\begin{enumerate}
				1090	\item The longest matching prefix is selected. All shorter ones
				1091	are dropped.
				1092
				1093	\item If the TOS of some route with the longest prefix is equal to the TOS
				1094	of the packet, the routes with different TOS are dropped.
				1095
				1096	If no exact TOS match was found and routes with TOS=0 exist,
				1097	the rest of routes are pruned.
				1098
				1099	Otherwise, the route lookup fails.
				1100
				1101	\item If several routes remain after the previous steps, then
				1102	the routes with the best preference values are selected.
				1103
				1104	\item If we still have several routes, then the {\em first\/} of them
				1105	is selected.
				1106
				1107	\begin{NB}
				1108	Note the ambiguity of the last step. Unfortunately, Linux
				1109	historically allows such a bizarre situation. The sense of the
				1110	word ``first'' depends on the order of route additions and it is practically
				1111	impossible to maintain a bundle of such routes in this order.
				1112	\end{NB}
				1113
				1114	For simplicity we will limit ourselves to the case where such a situation
				1115	is impossible and routes are uniquely identified by the triplet
				1116	\{prefix, tos, preference\}. Actually, it is impossible to create
				1117	non-unique routes with \verb\|ip\| commands described in this section.
				1118
				1119	One useful exception to this rule is the default route on non-forwarding
				1120	hosts. It is ``officially'' allowed to have several fallback routes
				1121	when several routers are present on directly connected networks.
				1122	In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122}
				1123	controlled by neighbour unreachability detection and by advice
				1124	from transport protocols to select a working router, so the order
				1125	of the routes is not essential. However, in this case,
				1126	fiddling with default routes manually is not recommended. Use the Router Discovery
				1127	protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP})
				1128	instead. Actually, Linux-2.2 IPv6 does not give user level applications
				1129	any access to default routes.
				1130	\end{enumerate}
				1131
				1132	Certainly, the steps above are not performed exactly
				1133	in this sequence. Instead, the routing table in the kernel is kept
				1134	in some data structure to achieve the final result
				1135	with minimal cost. However, not depending on a particular
				1136	routing algorithm implemented in the kernel, we can summarize
				1137	the statements above as: a route is identified by the triplet
				1138	\{prefix, tos, preference\}. This {\em key\/} lets us locate
				1139	the route in the routing table.
				1140
				1141	\paragraph{Route attributes:} Each route key refers to a routing
				1142	information record containing
				1143	the data required to deliver IP packets (f.e.\ output device and
				1144	next hop router) and some optional attributes (f.e. the path MTU or
				1145	the preferred source address when communicating with this destination).
				1146	These attributes are described in the following subsection.
				1147
				1148	\paragraph{Route types:} \label{IP-ROUTE-TYPES}
				1149	It is important that the set
				1150	of required and optional attributes depend on the route {\em type\/}.
				1151	The most important route type
				1152	is \verb\|unicast\|. It describes real paths to other hosts.
				1153	As a rule, common routing tables contain only such routes. However,
				1154	there are other types of routes with different semantics. The
				1155	full list of types understood by Linux-2.2 is:
				1156	\begin{itemize}
				1157	\item \verb\|unicast\| --- the route entry describes real paths to the
				1158	destinations covered by the route prefix.
				1159	\item \verb\|unreachable\| --- these destinations are unreachable. Packets
				1160	are discarded and the ICMP message {\em host unreachable\/} is generated.
				1161	The local senders get an \verb\|EHOSTUNREACH\| error.
				1162	\item \verb\|blackhole\| --- these destinations are unreachable. Packets
				1163	are discarded silently. The local senders get an \verb\|EINVAL\| error.
				1164	\item \verb\|prohibit\| --- these destinations are unreachable. Packets
				1165	are discarded and the ICMP message {\em communication administratively
				1166	prohibited\/} is generated. The local senders get an \verb\|EACCES\| error.
				1167	\item \verb\|local\| --- the destinations are assigned to this
				1168	host. The packets are looped back and delivered locally.
				1169	\item \verb\|broadcast\| --- the destinations are broadcast addresses.
				1170	The packets are sent as link broadcasts.
				1171	\item \verb\|throw\| --- a special control route used together with policy
				1172	rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup
				1173	in this table is terminated pretending that no route was found.
				1174	Without policy routing it is equivalent to the absence of the route in the routing
				1175	table. The packets are dropped and the ICMP message {\em net unreachable\/}
				1176	is generated. The local senders get an \verb\|ENETUNREACH\| error.
				1177	\item \verb\|nat\| --- a special NAT route. Destinations covered by the prefix
				1178	are considered to be dummy (or external) addresses which require translation
				1179	to real (or internal) ones before forwarding. The addresses to translate to
				1180	are selected with the attribute \verb\|via\|. More about NAT is
				1181	in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
				1182	\item \verb\|anycast\| --- ({\em not implemented\/}) the destinations are
				1183	{\em anycast\/} addresses assigned to this host. They are mainly equivalent
				1184	to \verb\|local\| with one difference: such addresses are invalid when used
				1185	as the source address of any packet.
				1186	\item \verb\|multicast\| --- a special type used for multicast routing.
				1187	It is not present in normal routing tables.
				1188	\end{itemize}
				1189
				1190	\paragraph{Route tables:} Linux-2.2 can pack routes into several routing
				1191	tables identified by a number in the range from 1 to 255 or by
				1192	name from the file \verb\|/etc/iproute2/rt_tables\|. By default all normal
				1193	routes are inserted into the \verb\|main\| table (ID 254) and the kernel only uses
				1194	this table when calculating routes.
				1195
				1196	Actually, one other table always exists, which is invisible but
				1197	even more important. It is the \verb\|local\| table (ID 255). This table
				1198	consists of routes for local and broadcast addresses. The kernel maintains
				1199	this table automatically and the administrator usually need not modify it
				1200	or even look at it.
				1201
				1202	The multiple routing tables enter the game when {\em policy routing\/}
				1203	is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}.
				1204	In this case, the table identifier effectively becomes
				1205	one more parameter, which should be added to the triplet
				1206	\{prefix, tos, preference\} to uniquely identify the route.
				1207
				1208
				1209	\subsection{{\tt ip route add} --- add a new route\\
				1210	{\tt ip route change} --- change a route\\
				1211	{\tt ip route replace} --- change a route or add a new one}
				1212	\label{IP-ROUTE-ADD}
				1213
				1214	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|change\|, \verb\|chg\|;
				1215	\verb\|replace\|, \verb\|repl\|.
				1216
				1217
				1218	\paragraph{Arguments:}
				1219	\begin{itemize}
				1220	\item \verb\|to PREFIX\| or \verb\|to TYPE PREFIX\| (default)
				1221
				1222	--- the destination prefix of the route. If \verb\|TYPE\| is omitted,
				1223	\verb\|ip\| assumes type \verb\|unicast\|. Other values of \verb\|TYPE\|
				1224	are listed above. \verb\|PREFIX\| is an IP or IPv6 address optionally followed
				1225	by a slash and the prefix length. If the length of the prefix is missing,
				1226	\verb\|ip\| assumes a full-length host route. There is also a special
				1227	\verb\|PREFIX\| --- \verb\|default\| --- which is equivalent to IP \verb\|0/0\| or
				1228	to IPv6 \verb\|::/0\|.
				1229
				1230	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				1231
				1232	--- the Type Of Service (TOS) key. This key has no associated mask and
				1233	the longest match is understood as: First, compare the TOS
				1234	of the route and of the packet. If they are not equal, then the packet
				1235	may still match a route with a zero TOS. \verb\|TOS\| is either an 8 bit hexadecimal
				1236	number or an identifier from {\tt /etc/iproute2/rt\_dsfield}.
				1237
				1238
				1239	\item \verb\|metric NUMBER\| or \verb\|preference NUMBER\|
				1240
				1241	--- the preference value of the route. \verb\|NUMBER\| is an arbitrary 32bit number.
				1242
				1243	\item \verb\|table TABLEID\|
				1244
				1245	--- the table to add this route to.
				1246	\verb\|TABLEID\| may be a number or a string from the file
				1247	\verb\|/etc/iproute2/rt_tables\|. If this parameter is omitted,
				1248	\verb\|ip\| assumes the \verb\|main\| table, with the exception of
				1249	\verb\|local\|, \verb\|broadcast\| and \verb\|nat\| routes, which are
				1250	put into the \verb\|local\| table by default.
				1251
				1252	\item \verb\|dev NAME\|
				1253
				1254	--- the output device name.
				1255
				1256	\item \verb\|via ADDRESS\|
				1257
				1258	--- the address of the nexthop router. Actually, the sense of this field depends
				1259	on the route type. For normal \verb\|unicast\| routes it is either the true nexthop
				1260	router or, if it is a direct route installed in BSD compatibility mode,
				1261	it can be a local address of the interface.
				1262	For NAT routes it is the first address of the block of translated IP destinations.
				1263
				1264	\item \verb\|src ADDRESS\|
				1265
				1266	--- the source address to prefer when sending to the destinations
				1267	covered by the route prefix.
				1268
				1269	\item \verb\|realm REALMID\|
				1270
				1271	--- the realm to which this route is assigned.
				1272	\verb\|REALMID\| may be a number or a string from the file
				1273	\verb\|/etc/iproute2/rt_realms\|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS})
				1274	contains more information on realms.
				1275
				1276	\item \verb\|mtu MTU\| or \verb\|mtu lock MTU\|
				1277
				1278	--- the MTU along the path to the destination. If the modifier \verb\|lock\| is
				1279	not used, the MTU may be updated by the kernel due to Path MTU Discovery.
				1280	If the modifier \verb\|lock\| is used, no path MTU discovery will be tried,
				1281	all packets will be sent without the DF bit in IPv4 case
				1282	or fragmented to MTU for IPv6.
				1283
				1284	\item \verb\|window NUMBER\|
				1285
				1286	--- the maximal window for TCP to advertise to these destinations,
				1287	measured in bytes. It limits maximal data bursts that our TCP
				1288	peers are allowed to send to us.
				1289
				1290	\item \verb\|rtt NUMBER\|
				1291
				1292	--- the initial RTT (``Round Trip Time'') estimate.
				1293
				1294
				1295	\item \verb\|rttvar NUMBER\|
				1296
				1297	--- \threeonly the initial RTT variance estimate.
				1298
				1299
				1300	\item \verb\|ssthresh NUMBER\|
				1301
				1302	--- \threeonly an estimate for the initial slow start threshold.
				1303
				1304
				1305	\item \verb\|cwnd NUMBER\|
				1306
				1307	--- \threeonly the clamp for congestion window. It is ignored if the \verb\|lock\|
				1308	flag is not used.
				1309
				1310
				1311	\item \verb\|advmss NUMBER\|
				1312
				1313	--- \threeonly the MSS (``Maximal Segment Size'') to advertise to these
				1314	destinations when establishing TCP connections. If it is not given,
				1315	Linux uses a default value calculated from the first hop device MTU.
				1316
				1317	\begin{NB}
				1318	If the path to these destination is asymmetric, this guess may be wrong.
				1319	\end{NB}
				1320
				1321	\item \verb\|reordering NUMBER\|
				1322
				1323	--- \threeonly Maximal reordering on the path to this destination.
				1324	If it is not given, Linux uses the value selected with \verb\|sysctl\|
				1325	variable \verb\|net/ipv4/tcp_reordering\|.
				1326
				1327	\item \verb\|hoplimit NUMBER\|
				1328
				1329	--- [2.5.74+ only] Maximum number of hops on the path to this destination.
				1330	The default is the value selected with the \verb\|sysctl\| variable
				1331	\verb\|net/ipv4/ip_default_ttl\|.
				1332
				1333	\item \verb\|initcwnd NUMBER\|
				1334	--- [2.5.70+ only] Initial congestion window size for connections to
				1335	this destination. Actual window size is this value multiplied by the
				1336	MSS (``Maximal Segment Size'') for same connection. The default is
				1337	zero, meaning to use the values specified in~\cite{RFC2414}.
				1338
				1339	+\item \verb\|initrwnd NUMBER\|
				1340
				1341	+--- [2.6.33+ only] Initial receive window size for connections to
				1342	+ this destination. The actual window size is this value multiplied
				1343	+ by the MSS (''Maximal Segment Size'') of the connection. The default
				1344	+ value is zero, meaning to use Slow Start value.
				1345
				1346	\item \verb\|nexthop NEXTHOP\|
				1347
				1348	--- the nexthop of a multipath route. \verb\|NEXTHOP\| is a complex value
				1349	with its own syntax similar to the top level argument lists:
				1350	\begin{itemize}
				1351	\item \verb\|via ADDRESS\| is the nexthop router.
				1352	\item \verb\|dev NAME\| is the output device.
				1353	\item \verb\|weight NUMBER\| is a weight for this element of a multipath
				1354	route reflecting its relative bandwidth or quality.
				1355	\end{itemize}
				1356
				1357	\item \verb\|scope SCOPE_VAL\|
				1358
				1359	--- the scope of the destinations covered by the route prefix.
				1360	\verb\|SCOPE_VAL\| may be a number or a string from the file
				1361	\verb\|/etc/iproute2/rt_scopes\|.
				1362	If this parameter is omitted,
				1363	\verb\|ip\| assumes scope \verb\|global\| for all gatewayed \verb\|unicast\|
				1364	routes, scope \verb\|link\| for direct \verb\|unicast\| and \verb\|broadcast\| routes
				1365	and scope \verb\|host\| for \verb\|local\| routes.
				1366
				1367	\item \verb\|protocol RTPROTO\|
				1368
				1369	--- the routing protocol identifier of this route.
				1370	\verb\|RTPROTO\| may be a number or a string from the file
				1371	\verb\|/etc/iproute2/rt_protos\|. If the routing protocol ID is
				1372	not given, \verb\|ip\| assumes protocol \verb\|boot\| (i.e.\
				1373	it assumes the route was added by someone who doesn't
				1374	understand what they are doing). Several protocol values have a fixed interpretation.
				1375	Namely:
				1376	\begin{itemize}
				1377	\item \verb\|redirect\| --- the route was installed due to an ICMP redirect.
				1378	\item \verb\|kernel\| --- the route was installed by the kernel during
				1379	autoconfiguration.
				1380	\item \verb\|boot\| --- the route was installed during the bootup sequence.
				1381	If a routing daemon starts, it will purge all of them.
				1382	\item \verb\|static\| --- the route was installed by the administrator
				1383	to override dynamic routing. Routing daemon will respect them
				1384	and, probably, even advertise them to its peers.
				1385	\item \verb\|ra\| --- the route was installed by Router Discovery protocol.
				1386	\end{itemize}
				1387	The rest of the values are not reserved and the administrator is free
				1388	to assign (or not to assign) protocol tags. At least, routing
				1389	daemons should take care of setting some unique protocol values,
				1390	f.e.\ as they are assigned in \verb\|rtnetlink.h\| or in \verb\|rt_protos\|
				1391	database.
				1392
				1393
				1394	\item \verb\|onlink\|
				1395
				1396	--- pretend that the nexthop is directly attached to this link,
				1397	even if it does not match any interface prefix. One application of this
				1398	option may be found in~\cite{IP-TUNNELS}.
				1399
				1400	\end{itemize}
				1401
				1402
				1403	\begin{NB}
				1404	Actually there are more commands: \verb\|prepend\| does the same
				1405	thing as classic \verb\|route add\|, i.e.\ adds a route, even if another
				1406	route to the same destination exists. Its opposite case is \verb\|append\|,
				1407	which adds the route to the end of the list. Avoid these
				1408	features.
				1409	\end{NB}
				1410	\begin{NB}
				1411	More sad news, IPv6 only understands the \verb\|append\| command correctly.
				1412	All the others are translated into \verb\|append\| commands. Certainly,
				1413	this will change in the future.
				1414	\end{NB}
				1415
				1416	\paragraph{Examples:}
				1417	\begin{itemize}
				1418	\item add a plain route to network 10.0.0/24 via gateway 193.233.7.65
				1419	\begin{verbatim}
				1420	ip route add 10.0.0/24 via 193.233.7.65
				1421	\end{verbatim}
				1422	\item change it to a direct route via the \verb\|dummy\| device
				1423	\begin{verbatim}
				1424	ip ro chg 10.0.0/24 dev dummy
				1425	\end{verbatim}
				1426	\item add a default multipath route splitting the load between \verb\|ppp0\|
				1427	and \verb\|ppp1\|
				1428	\begin{verbatim}
				1429	ip route add default scope global nexthop dev ppp0 \
				1430	nexthop dev ppp1
				1431	\end{verbatim}
				1432	Note the scope value. It is not necessary but it informs the kernel
				1433	that this route is gatewayed rather than direct. Actually, if you
				1434	know the addresses of remote endpoints it would be better to use the
				1435	\verb\|via\| parameter.
				1436	\item announce that the address 192.203.80.144 is not a real one, but
				1437	should be translated to 193.233.7.83 before forwarding
				1438	\begin{verbatim}
				1439	ip route add nat 192.203.80.144 via 193.233.7.83
				1440	\end{verbatim}
				1441	Backward translation is setup with policy rules described
				1442	in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}).
				1443	\end{itemize}
				1444
				1445	\subsection{{\tt ip route delete} --- delete a route}
				1446
				1447	\paragraph{Abbreviations:} \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				1448
				1449	\paragraph{Arguments:} \verb\|ip route del\| has the same arguments as
				1450	\verb\|ip route add\|, but their semantics are a bit different.
				1451
				1452	Key values (\verb\|to\|, \verb\|tos\|, \verb\|preference\| and \verb\|table\|)
				1453	select the route to delete. If optional attributes are present, \verb\|ip\|
				1454	verifies that they coincide with the attributes of the route to delete.
				1455	If no route with the given key and attributes was found, \verb\|ip route del\|
				1456	fails.
				1457	\begin{NB}
				1458	Linux-2.0 had the option to delete a route selected only by prefix address,
				1459	ignoring its length (i.e.\ netmask). This option no longer exists
				1460	because it was ambiguous. However, look at {\tt ip route flush}
				1461	(sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which
				1462	provides similar and even richer functionality.
				1463	\end{NB}
				1464
				1465	\paragraph{Example:}
				1466	\begin{itemize}
				1467	\item delete the multipath route created by the command in previous subsection
				1468	\begin{verbatim}
				1469	ip route del default scope global nexthop dev ppp0 \
				1470	nexthop dev ppp1
				1471	\end{verbatim}
				1472	\end{itemize}
				1473
				1474
				1475
				1476	\subsection{{\tt ip route show} --- list routes}
				1477
				1478	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				1479
				1480	\paragraph{Description:} the command displays the contents of the routing tables
				1481	or the route(s) selected by some criteria.
				1482
				1483
				1484	\paragraph{Arguments:}
				1485	\begin{itemize}
				1486	\item \verb\|to SELECTOR\| (default)
				1487
				1488	--- only select routes from the given range of destinations. \verb\|SELECTOR\|
				1489	consists of an optional modifier (\verb\|root\|, \verb\|match\| or \verb\|exact\|)
				1490	and a prefix. \verb\|root PREFIX\| selects routes with prefixes not shorter
				1491	than \verb\|PREFIX\|. F.e.\ \verb\|root 0/0\| selects the entire routing table.
				1492	\verb\|match PREFIX\| selects routes with prefixes not longer than
				1493	\verb\|PREFIX\|. F.e.\ \verb\|match 10.0/16\| selects \verb\|10.0/16\|,
				1494	\verb\|10/8\| and \verb\|0/0\|, but it does not select \verb\|10.1/16\| and
				1495	\verb\|10.0.0/24\|. And \verb\|exact PREFIX\| (or just \verb\|PREFIX\|)
				1496	selects routes with this exact prefix. If neither of these options
				1497	are present, \verb\|ip\| assumes \verb\|root 0/0\| i.e.\ it lists the entire table.
				1498
				1499
				1500	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				1501
				1502	--- only select routes with the given TOS.
				1503
				1504
				1505	\item \verb\|table TABLEID\|
				1506
				1507	--- show the routes from this table(s). The default setting is to show
				1508	\verb\|table\| \verb\|main\|. \verb\|TABLEID\| may either be the ID of a real table
				1509	or one of the special values:
				1510	\begin{itemize}
				1511	\item \verb\|all\| --- list all of the tables.
				1512	\item \verb\|cache\| --- dump the routing cache.
				1513	\end{itemize}
				1514	\begin{NB}
				1515	IPv6 has a single table. However, splitting it into \verb\|main\|, \verb\|local\|
				1516	and \verb\|cache\| is emulated by the \verb\|ip\| utility.
				1517	\end{NB}
				1518
				1519	\item \verb\|cloned\| or \verb\|cached\|
				1520
				1521	--- list cloned routes i.e.\ routes which were dynamically forked from
				1522	other routes because some route attribute (f.e.\ MTU) was updated.
				1523	Actually, it is equivalent to \verb\|table cache\|.
				1524
				1525	\item \verb\|from SELECTOR\|
				1526
				1527	--- the same syntax as for \verb\|to\|, but it binds the source address range
				1528	rather than destinations. Note that the \verb\|from\| option only works with
				1529	cloned routes.
				1530
				1531	\item \verb\|protocol RTPROTO\|
				1532
				1533	--- only list routes of this protocol.
				1534
				1535
				1536	\item \verb\|scope SCOPE_VAL\|
				1537
				1538	--- only list routes with this scope.
				1539
				1540	\item \verb\|type TYPE\|
				1541
				1542	--- only list routes of this type.
				1543
				1544	\item \verb\|dev NAME\|
				1545
				1546	--- only list routes going via this device.
				1547
				1548	\item \verb\|via PREFIX\|
				1549
				1550	--- only list routes going via the nexthop routers selected by \verb\|PREFIX\|.
				1551
				1552	\item \verb\|src PREFIX\|
				1553
				1554	--- only list routes with preferred source addresses selected
				1555	by \verb\|PREFIX\|.
				1556
				1557	\item \verb\|realm REALMID\| or \verb\|realms FROMREALM/TOREALM\|
				1558
				1559	--- only list routes with these realms.
				1560
				1561	\end{itemize}
				1562
				1563	\paragraph{Examples:} Let us count routes of protocol \verb\|gated/bgp\|
				1564	on a router:
				1565	\begin{verbatim}
				1566	kuznet@amber:~ $ ip ro ls proto gated/bgp \| wc
				1567	1413 9891 79010
				1568	kuznet@amber:~ $
				1569	\end{verbatim}
				1570	To count the size of the routing cache, we have to use the \verb\|-o\| option
				1571	because cached attributes can take more than one line of output:
				1572	\begin{verbatim}
				1573	kuznet@amber:~ $ ip -o ro ls cloned \| wc
				1574	159 2543 18707
				1575	kuznet@amber:~ $
				1576	\end{verbatim}
				1577
				1578
				1579	\paragraph{Output format:} The output of this command consists
				1580	of per route records separated by line feeds.
				1581	However, some records may consist
				1582	of more than one line: particularly, this is the case when the route
				1583	is cloned or you requested additional statistics. If the
				1584	\verb\|-o\| option was given, then line feeds separating lines inside
				1585	records are replaced with the backslash sign.
				1586
				1587	The output has the same syntax as arguments given to {\tt ip route add},
				1588	so that it can be understood easily. F.e.\
				1589	\begin{verbatim}
				1590	kuznet@amber:~ $ ip ro ls 193.233.7/24
				1591	193.233.7.0/24 dev eth0 proto gated/conn scope link \
				1592	src 193.233.7.65 realms inr.ac
				1593	kuznet@amber:~ $
				1594	\end{verbatim}
				1595
				1596	If you list cloned entries, the output contains other attributes which
				1597	are evaluated during route calculation and updated during route
				1598	lifetime. An example of the output is:
				1599	\begin{verbatim}
				1600	kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache
				1601	193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
				1602	realms inr.ac/inr.ac
				1603	cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
				1604	193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
				1605	cache mtu 1500 rtt 300
				1606	kuznet@amber:~ $
				1607	\end{verbatim}
				1608	\begin{NB}
				1609	\label{NB-strange-route}
				1610	The route looks a bit strange, doesn't it? Did you notice that
				1611	it is a path from 193.233.7.82 back to 193.233.82? Well, you will
				1612	see in the section on \verb\|ip route get\| (p.\pageref{NB-nature-of-strangeness})
				1613	how it appeared.
				1614	\end{NB}
				1615	The second line, starting with the word \verb\|cache\|, shows
				1616	additional attributes which normal routes do not possess.
				1617	Cached flags are summarized in angle brackets:
				1618	\begin{itemize}
				1619	\item \verb\|local\| --- packets are delivered locally.
				1620	It stands for loopback unicast routes, for broadcast routes
				1621	and for multicast routes, if this host is a member of the corresponding
				1622	group.
				1623
				1624	\item \verb\|reject\| --- the path is bad. Any attempt to use it results
				1625	in an error. See attribute \verb\|error\| below (p.\pageref{IP-ROUTE-GET-error}).
				1626
				1627	\item \verb\|mc\| --- the destination is multicast.
				1628
				1629	\item \verb\|brd\| --- the destination is broadcast.
				1630
				1631	\item \verb\|src-direct\| --- the source is on a directly connected
				1632	interface.
				1633
				1634	\item \verb\|redirected\| --- the route was created by an ICMP Redirect.
				1635
				1636	\item \verb\|redirect\| --- packets going via this route will
				1637	trigger an ICMP redirect.
				1638
				1639	\item \verb\|fastroute\| --- the route is eligible to be used for fastroute.
				1640
				1641	\item \verb\|equalize\| --- make packet by packet randomization
				1642	along this path.
				1643
				1644	\item \verb\|dst-nat\| --- the destination address requires translation.
				1645
				1646	\item \verb\|src-nat\| --- the source address requires translation.
				1647
				1648	\item \verb\|masq\| --- the source address requires masquerading.
				1649	This feature disappeared in linux-2.4.
				1650
				1651	\item \verb\|notify\| --- ({\em not implemented}) change/deletion
				1652	of this route will trigger RTNETLINK notification.
				1653	\end{itemize}
				1654
				1655	Then some optional attributes follow:
				1656	\begin{itemize}
				1657	\item \verb\|error\| --- on \verb\|reject\| routes it is error code
				1658	returned to local senders when they try to use this route.
				1659	These error codes are translated into ICMP error codes, sent to remote
				1660	senders, according to the rules described above in the subsection
				1661	devoted to route types (p.\pageref{IP-ROUTE-TYPES}).
				1662	\label{IP-ROUTE-GET-error}
				1663
				1664	\item \verb\|expires\| --- this entry will expire after this timeout.
				1665
				1666	\item \verb\|iif\| --- the packets for this path are expected to arrive
				1667	on this interface.
				1668	\end{itemize}
				1669
				1670	\paragraph{Statistics:} With the \verb\|-statistics\| option, more
				1671	information about this route is shown:
				1672	\begin{itemize}
				1673	\item \verb\|users\| --- the number of users of this entry.
				1674	\item \verb\|age\| --- shows when this route was last used.
				1675	\item \verb\|used\| --- the number of lookups of this route since its creation.
				1676	\end{itemize}
				1677
				1678	\subsection{{\tt ip route save} -- save routing tables}
				1679	\label{IP-ROUTE-SAVE}
				1680
				1681	\paragraph{Description:} this command saves the contents of the routing
				1682	tables or the route(s) selected by some criteria to standard output.
				1683
				1684	\paragraph{Arguments:} \verb\|ip route save\| has the same arguments as
				1685	\verb\|ip route show\|.
				1686
				1687	\paragraph{Example:} This saves all the routes to the {\tt saved\_routes}
				1688	file:
				1689	\begin{verbatim}
				1690	dan@caffeine:~ # ip route save > saved_routes
				1691	\end{verbatim}
				1692
				1693	\paragraph{Output format:} The format of the data stream provided by
				1694	\verb\|ip route save\| is that of \verb\|rtnetlink\|. See
				1695	\verb\|rtnetlink(7)\| for more information.
				1696
				1697	\subsection{{\tt ip route restore} -- restore routing tables}
				1698	\label{IP-ROUTE-RESTORE}
				1699
				1700	\paragraph{Description:} this command restores the contents of the routing
				1701	tables according to a data stream as provided by \verb\|ip route save\| via
				1702	standard input. Note that any routes already in the table are left unchanged.
				1703	Any routes in the input stream that already exist in the tables are ignored.
				1704
				1705	\paragraph{Arguments:} This command takes no arguments.
				1706
				1707	\paragraph{Example:} This restores all routes that were saved to the
				1708	{\tt saved\_routes} file:
				1709
				1710	\begin{verbatim}
				1711	dan@caffeine:~ # ip route restore < saved_routes
				1712	\end{verbatim}
				1713
				1714	\subsection{{\tt ip route flush} --- flush routing tables}
				1715	\label{IP-ROUTE-FLUSH}
				1716
				1717	\paragraph{Abbreviations:} \verb\|flush\|, \verb\|f\|.
				1718
				1719	\paragraph{Description:} this command flushes routes selected
				1720	by some criteria.
				1721
				1722	\paragraph{Arguments:} the arguments have the same syntax and semantics
				1723	as the arguments of \verb\|ip route show\|, but routing tables are not
				1724	listed but purged. The only difference is the default action: \verb\|show\|
				1725	dumps all the IP main routing table but \verb\|flush\| prints the helper page.
				1726	The reason for this difference does not require any explanation, does it?
				1727
				1728
				1729	\paragraph{Statistics:} With the \verb\|-statistics\| option, the command
				1730	becomes verbose. It prints out the number of deleted routes and the number
				1731	of rounds made to flush the routing table. If the option is given
				1732	twice, \verb\|ip route flush\| also dumps all the deleted routes
				1733	in the format described in the previous subsection.
				1734
				1735	\paragraph{Examples:} The first example flushes all the
				1736	gatewayed routes from the main table (f.e.\ after a routing daemon crash).
				1737	\begin{verbatim}
				1738	netadm@amber:~ # ip -4 ro flush scope global type unicast
				1739	\end{verbatim}
				1740	This option deserves to be put into a scriptlet \verb\|routef\|.
				1741	\begin{NB}
				1742	This option was described in the \verb\|route(8)\| man page borrowed
				1743	from BSD, but was never implemented in Linux.
				1744	\end{NB}
				1745
				1746	The second example flushes all IPv6 cloned routes:
				1747	\begin{verbatim}
				1748	netadm@amber:~ # ip -6 -s -s ro flush cache
				1749	3ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \
				1750	dev eth0 metric 0
				1751	cache used 2 age 12sec mtu 1500 rtt 300
				1752	3ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \
				1753	dev eth0 metric 0
				1754	cache used 2 age 15sec mtu 1500 rtt 300
				1755	3ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \
				1756	dev eth0 metric 0
				1757	cache users 1 used 1 age 23sec mtu 1500 rtt 300
				1758	3ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \
				1759	dev eth1 metric 0
				1760	cache used 2 age 20sec mtu 1500 rtt 300
				1761	3ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \
				1762	dev eth1 metric 0
				1763	cache used 2 age 33sec mtu 1500 rtt 300
				1764	ff02::1 via ff02::1 dev eth1 metric 0
				1765	cache users 1 used 1 age 45sec mtu 1500 rtt 300
				1766
				1767	* Round 1, deleting 6 entries *
				1768	* Flush is complete after 1 round *
				1769	netadm@amber:~ # ip -6 -s -s ro flush cache
				1770	Nothing to flush.
				1771	netadm@amber:~ #
				1772	\end{verbatim}
				1773
				1774	The third example flushes BGP routing tables after a \verb\|gated\|
				1775	death.
				1776	\begin{verbatim}
				1777	netadm@amber:~ # ip ro ls proto gated/bgp \| wc
				1778	1408 9856 78730
				1779	netadm@amber:~ # ip -s ro f proto gated/bgp
				1780
				1781	* Round 1, deleting 1408 entries *
				1782	* Flush is complete after 1 round *
				1783	netadm@amber:~ # ip ro f proto gated/bgp
				1784	Nothing to flush.
				1785	netadm@amber:~ # ip ro ls proto gated/bgp
				1786	netadm@amber:~ #
				1787	\end{verbatim}
				1788
				1789
				1790	\subsection{{\tt ip route get} --- get a single route}
				1791	\label{IP-ROUTE-GET}
				1792
				1793	\paragraph{Abbreviations:} \verb\|get\|, \verb\|g\|.
				1794
				1795	\paragraph{Description:} this command gets a single route to a destination
				1796	and prints its contents exactly as the kernel sees it.
				1797
				1798	\paragraph{Arguments:}
				1799	\begin{itemize}
				1800	\item \verb\|to ADDRESS\| (default)
				1801
				1802	--- the destination address.
				1803
				1804	\item \verb\|from ADDRESS\|
				1805
				1806	--- the source address.
				1807
				1808	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				1809
				1810	--- the Type Of Service.
				1811
				1812	\item \verb\|iif NAME\|
				1813
				1814	--- the device from which this packet is expected to arrive.
				1815
				1816	\item \verb\|oif NAME\|
				1817
				1818	--- force the output device on which this packet will be routed.
				1819
				1820	\item \verb\|connected\|
				1821
				1822	--- if no source address (option \verb\|from\|) was given, relookup
				1823	the route with the source set to the preferred address received from the first lookup.
				1824	If policy routing is used, it may be a different route.
				1825
				1826	\end{itemize}
				1827
				1828	Note that this operation is not equivalent to \verb\|ip route show\|.
				1829	\verb\|show\| shows existing routes. \verb\|get\| resolves them and
				1830	creates new clones if necessary. Essentially, \verb\|get\|
				1831	is equivalent to sending a packet along this path.
				1832	If the \verb\|iif\| argument is not given, the kernel creates a route
				1833	to output packets towards the requested destination.
				1834	This is equivalent to pinging the destination
				1835	with a subsequent {\tt ip route ls cache}, however, no packets are
				1836	actually sent. With the \verb\|iif\| argument, the kernel pretends
				1837	that a packet arrived from this interface and searches for
				1838	a path to forward the packet.
				1839
				1840	\paragraph{Output format:} This command outputs routes in the same
				1841	format as \verb\|ip route ls\|.
				1842
				1843	\paragraph{Examples:}
				1844	\begin{itemize}
				1845	\item Find a route to output packets to 193.233.7.82:
				1846	\begin{verbatim}
				1847	kuznet@amber:~ $ ip route get 193.233.7.82
				1848	193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
				1849	cache mtu 1500 rtt 300
				1850	kuznet@amber:~ $
				1851	\end{verbatim}
				1852
				1853	\item Find a route to forward packets arriving on \verb\|eth0\|
				1854	from 193.233.7.82 and destined for 193.233.7.82:
				1855	\begin{verbatim}
				1856	kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0
				1857	193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
				1858	realms inr.ac/inr.ac
				1859	cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
				1860	kuznet@amber:~ $
				1861	\end{verbatim}
				1862	\begin{NB}
				1863	\label{NB-nature-of-strangeness}
				1864	This is the command that created the funny route from 193.233.7.82
				1865	looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}).
				1866	Note the \verb\|redirect\| flag on it.
				1867	\end{NB}
				1868
				1869	\item Find a multicast route for packets arriving on \verb\|eth0\|
				1870	from host 193.233.7.82 and destined for multicast group 224.2.127.254
				1871	(it is assumed that a multicast routing daemon is running.
				1872	In this case, it is \verb\|pimd\|)
				1873	\begin{verbatim}
				1874	kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0
				1875	multicast 224.2.127.254 from 193.233.7.82 dev lo \
				1876	src 193.233.7.65 realms inr.ac/cosmos
				1877	cache <mc> iif eth0 Oifs: eth1 pimreg
				1878	kuznet@amber:~ $
				1879	\end{verbatim}
				1880	This route differs from the ones seen before. It contains a ``normal'' part
				1881	and a ``multicast'' part. The normal part is used to deliver (or not to
				1882	deliver) the packet to local IP listeners. In this case the router
				1883	is not a member
				1884	of this group, so that route has no \verb\|local\| flag and only
				1885	forwards packets. The output device for such entries is always loopback.
				1886	The multicast part consists of an additional \verb\|Oifs:\| list showing
				1887	the output interfaces.
				1888	\end{itemize}
				1889
				1890
				1891	It is time for a more complicated example. Let us add an invalid
				1892	gatewayed route for a destination which is really directly connected:
				1893	\begin{verbatim}
				1894	netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254
				1895	netadm@alisa:~ # ip route get 193.233.7.98
				1896	193.233.7.98 via 193.233.7.254 dev eth0 src 193.233.7.90
				1897	cache mtu 1500 rtt 3072
				1898	netadm@alisa:~ #
				1899	\end{verbatim}
				1900	and probe it with ping:
				1901	\begin{verbatim}
				1902	netadm@alisa:~ # ping -n 193.233.7.98
				1903	PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes
				1904	From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
				1905	64 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms
				1906	From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
				1907	64 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms
				1908	64 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms
				1909	64 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms
				1910	64 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms
				1911	^C
				1912	--- 193.233.7.98 ping statistics ---
				1913	5 packets transmitted, 5 packets received, 0% packet loss
				1914	round-trip min/avg/max = 0.4/1.3/3.5 ms
				1915	netadm@alisa:~ #
				1916	\end{verbatim}
				1917	What happened? Router 193.233.7.254 understood that we have a much
				1918	better path to the destination and sent us an ICMP redirect message.
				1919	We may retry \verb\|ip route get\| to see what we have in the routing
				1920	tables now:
				1921	\begin{verbatim}
				1922	netadm@alisa:~ # ip route get 193.233.7.98
				1923	193.233.7.98 dev eth0 src 193.233.7.90
				1924	cache <redirected> mtu 1500 rtt 3072
				1925	netadm@alisa:~ #
				1926	\end{verbatim}
				1927
				1928
				1929
				1930	\section{{\tt ip rule} --- routing policy database management}
				1931	\label{IP-RULE}
				1932
				1933	\paragraph{Abbreviations:} \verb\|rule\|, \verb\|ru\|.
				1934
				1935	\paragraph{Object:} \verb\|rule\|s in the routing policy database control
				1936	the route selection algorithm.
				1937
				1938	Classic routing algorithms used in the Internet make routing decisions
				1939	based only on the destination address of packets (and in theory,
				1940	but not in practice, on the TOS field). The seminal review of classic
				1941	routing algorithms and their modifications can be found in~\cite{RFC1812}.
				1942
				1943	In some circumstances we want to route packets differently depending not only
				1944	on destination addresses, but also on other packet fields: source address,
				1945	IP protocol, transport protocol ports or even packet payload.
				1946	This task is called ``policy routing''.
				1947
				1948	\begin{NB}
				1949	``policy routing'' $\neq$ ``routing policy''.
				1950
				1951	\noindent ``policy routing'' $=$ ``cunning routing''.
				1952
				1953	\noindent ``routing policy'' $=$ ``routing tactics'' or ``routing plan''.
				1954	\end{NB}
				1955
				1956	To solve this task, the conventional destination based routing table, ordered
				1957	according to the longest match rule, is replaced with a ``routing policy
				1958	database'' (or RPDB), which selects routes
				1959	by executing some set of rules. The rules may have lots of keys of different
				1960	natures and therefore they have no natural ordering, but one imposed
				1961	by the administrator. Linux-2.2 RPDB is a linear list of rules
				1962	ordered by numeric priority value.
				1963	RPDB explicitly allows matching a few packet fields:
				1964
				1965	\begin{itemize}
				1966	\item packet source address.
				1967	\item packet destination address.
				1968	\item TOS.
				1969	\item incoming interface (which is packet metadata, rather than a packet field).
				1970	\end{itemize}
				1971
				1972	Matching IP protocols and transport ports is also possible,
				1973	indirectly, via \verb\|ipchains\|, by exploiting their ability
				1974	to mark some classes of packets with \verb\|fwmark\|. Therefore,
				1975	\verb\|fwmark\| is also included in the set of keys checked by rules.
				1976
				1977	Each policy routing rule consists of a {\em selector\/} and an {\em action\/}
				1978	predicate. The RPDB is scanned in the order of increasing priority. The selector
				1979	of each rule is applied to \{source address, destination address, incoming
				1980	interface, tos, fwmark\} and, if the selector matches the packet,
				1981	the action is performed. The action predicate may return with success.
				1982	In this case, it will either give a route or failure indication
				1983	and the RPDB lookup is terminated. Otherwise, the RPDB program
				1984	continues on the next rule.
				1985
				1986	What is the action, semantically? The natural action is to select the
				1987	nexthop and the output device. This is what
				1988	Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''.
				1989	The Linux-2.2 approach is more flexible. The action includes
				1990	lookups in destination-based routing tables and selecting
				1991	a route from these tables according to the classic longest match algorithm.
				1992	The ``match \& set'' approach is the simplest case of the Linux one. It is realized
				1993	when a second level routing table contains a single default route.
				1994	Recall that Linux-2.2 supports multiple tables
				1995	managed with the \verb\|ip route\| command, described in the previous section.
				1996
				1997	At startup time the kernel configures the default RPDB consisting of three
				1998	rules:
				1999
				2000	\begin{enumerate}
				2001	\item Priority: 0, Selector: match anything, Action: lookup routing
				2002	table \verb\|local\| (ID 255).
				2003	The \verb\|local\| table is a special routing table containing
				2004	high priority control routes for local and broadcast addresses.
				2005
				2006	Rule 0 is special. It cannot be deleted or overridden.
				2007
				2008
				2009	\item Priority: 32766, Selector: match anything, Action: lookup routing
				2010	table \verb\|main\| (ID 254).
				2011	The \verb\|main\| table is the normal routing table containing all non-policy
				2012	routes. This rule may be deleted and/or overridden with other
				2013	ones by the administrator.
				2014
				2015	\item Priority: 32767, Selector: match anything, Action: lookup routing
				2016	table \verb\|default\| (ID 253).
				2017	The \verb\|default\| table is empty. It is reserved for some
				2018	post-processing if no previous default rules selected the packet.
				2019	This rule may also be deleted.
				2020
				2021	\end{enumerate}
				2022
				2023	Do not confuse routing tables with rules: rules point to routing tables,
				2024	several rules may refer to one routing table and some routing tables
				2025	may have no rules pointing to them. If the administrator deletes all the rules
				2026	referring to a table, the table is not used, but it still exists
				2027	and will disappear only after all the routes contained in it are deleted.
				2028
				2029
				2030	\paragraph{Rule attributes:} Each RPDB entry has additional
				2031	attributes. F.e.\ each rule has a pointer to some routing
				2032	table. NAT and masquerading rules have an attribute to select new IP
				2033	address to translate/masquerade. Besides that, rules have some
				2034	optional attributes, which routes have, namely \verb\|realms\|.
				2035	These values do not override those contained in the routing tables. They
				2036	are only used if the route did not select any attributes.
				2037
				2038
				2039	\paragraph{Rule types:} The RPDB may contain rules of the following
				2040	types:
				2041	\begin{itemize}
				2042	\item \verb\|unicast\| --- the rule prescribes to return the route found
				2043	in the routing table referenced by the rule.
				2044	\item \verb\|blackhole\| --- the rule prescribes to silently drop the packet.
				2045	\item \verb\|unreachable\| --- the rule prescribes to generate a ``Network
				2046	is unreachable'' error.
				2047	\item \verb\|prohibit\| --- the rule prescribes to generate
				2048	``Communication is administratively prohibited'' error.
				2049	\item \verb\|nat\| --- the rule prescribes to translate the source address
				2050	of the IP packet into some other value. More about NAT is
				2051	in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
				2052	\end{itemize}
				2053
				2054
				2055	\paragraph{Commands:} \verb\|add\|, \verb\|delete\| and \verb\|show\|
				2056	(or \verb\|list\|).
				2057
				2058	\subsection{{\tt ip rule add} --- insert a new rule\\
				2059	{\tt ip rule delete} --- delete a rule}
				2060	\label{IP-RULE-ADD}
				2061
				2062	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|delete\|, \verb\|del\|,
				2063	\verb\|d\|.
				2064
				2065	\paragraph{Arguments:}
				2066
				2067	\begin{itemize}
				2068	\item \verb\|type TYPE\| (default)
				2069
				2070	--- the type of this rule. The list of valid types was given in the previous
				2071	subsection.
				2072
				2073	\item \verb\|from PREFIX\|
				2074
				2075	--- select the source prefix to match.
				2076
				2077	\item \verb\|to PREFIX\|
				2078
				2079	--- select the destination prefix to match.
				2080
				2081	\item \verb\|iif NAME\|
				2082
				2083	--- select the incoming device to match. If the interface is loopback,
				2084	the rule only matches packets originating from this host. This means that you
				2085	may create separate routing tables for forwarded and local packets and,
				2086	hence, completely segregate them.
				2087
				2088	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				2089
				2090	--- select the TOS value to match.
				2091
				2092	\item \verb\|fwmark MARK\|
				2093
				2094	--- select the \verb\|fwmark\| value to match.
				2095
				2096	\item \verb\|priority PREFERENCE\|
				2097
				2098	--- the priority of this rule. Each rule should have an explicitly
				2099	set {\em unique\/} priority value.
				2100	\begin{NB}
				2101	Really, for historical reasons \verb\|ip rule add\| does not require a
				2102	priority value and allows them to be non-unique.
				2103	If the user does not supplied a priority, it is selected by the kernel.
				2104	If the user creates a rule with a priority value that
				2105	already exists, the kernel does not reject the request. It adds
				2106	the new rule before all old rules of the same priority.
				2107
				2108	It is mistake in design, no more. And it will be fixed one day,
				2109	so do not rely on this feature. Use explicit priorities.
				2110	\end{NB}
				2111
				2112
				2113	\item \verb\|table TABLEID\|
				2114
				2115	--- the routing table identifier to lookup if the rule selector matches.
				2116
				2117	\item \verb\|realms FROM/TO\|
				2118
				2119	--- Realms to select if the rule matched and the routing table lookup
				2120	succeeded. Realm \verb\|TO\| is only used if the route did not select
				2121	any realm.
				2122
				2123	\item \verb\|nat ADDRESS\|
				2124
				2125	--- The base of the IP address block to translate (for source addresses).
				2126	The \verb\|ADDRESS\| may be either the start of the block of NAT addresses
				2127	(selected by NAT routes) or in linux-2.2 a local host address (or even zero).
				2128	In the last case the router does not translate the packets,
				2129	but masquerades them to this address; this feature disappered in 2.4.
				2130	More about NAT is in Appendix~\ref{ROUTE-NAT},
				2131	p.\pageref{ROUTE-NAT}.
				2132
				2133	\end{itemize}
				2134
				2135	\paragraph{Warning:} Changes to the RPDB made with these commands
				2136	do not become active immediately. It is assumed that after
				2137	a script finishes a batch of updates, it flushes the routing cache
				2138	with \verb\|ip route flush cache\|.
				2139
				2140	\paragraph{Examples:}
				2141	\begin{itemize}
				2142	\item Route packets with source addresses from 192.203.80/24
				2143	according to routing table \verb\|inr.ruhep\|:
				2144	\begin{verbatim}
				2145	ip ru add from 192.203.80.0/24 table inr.ruhep prio 220
				2146	\end{verbatim}
				2147
				2148	\item Translate packet source address 193.233.7.83 into 192.203.80.144
				2149	and route it according to table \#1 (actually, it is \verb\|inr.ruhep\|):
				2150	\begin{verbatim}
				2151	ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320
				2152	\end{verbatim}
				2153
				2154	\item Delete the unused default rule:
				2155	\begin{verbatim}
				2156	ip ru del prio 32767
				2157	\end{verbatim}
				2158
				2159	\end{itemize}
				2160
				2161
				2162
				2163	\subsection{{\tt ip rule show} --- list rules}
				2164	\label{IP-RULE-SHOW}
				2165
				2166	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2167
				2168
				2169	\paragraph{Arguments:} Good news, this is one command that has no arguments.
				2170
				2171	\paragraph{Output format:}
				2172
				2173	\begin{verbatim}
				2174	kuznet@amber:~ $ ip ru ls
				2175	0: from all lookup local
				2176	200: from 192.203.80.0/24 to 193.233.7.0/24 lookup main
				2177	210: from 192.203.80.0/24 to 192.203.80.0/24 lookup main
				2178	220: from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu
				2179	300: from 193.233.7.83 to 193.233.7.0/24 lookup main
				2180	310: from 193.233.7.83 to 192.203.80.0/24 lookup main
				2181	320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
				2182	32766: from all lookup main
				2183	kuznet@amber:~ $
				2184	\end{verbatim}
				2185
				2186	In the first column is the rule priority value followed
				2187	by a colon. Then the selectors follow. Each key is prefixed
				2188	with the same keyword that was used to create the rule.
				2189
				2190	The keyword \verb\|lookup\| is followed by a routing table identifier,
				2191	as it is recorded in the file \verb\|/etc/iproute2/rt_tables\|.
				2192
				2193	If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword
				2194	\verb\|map-to\| followed by the start of the block of addresses to map.
				2195
				2196	The sense of this example is pretty simple. The prefixes
				2197	192.203.80.0/24 and 193.233.7.0/24 form the internal network, but
				2198	they are routed differently when the packets leave it.
				2199	Besides that, the host 193.233.7.83 is translated into
				2200	another prefix to look like 192.203.80.144 when talking
				2201	to the outer world.
				2202
				2203
				2204
				2205	\section{{\tt ip maddress} --- multicast addresses management}
				2206	\label{IP-MADDR}
				2207
				2208	\paragraph{Object:} \verb\|maddress\| objects are multicast addresses.
				2209
				2210	\paragraph{Commands:} \verb\|add\|, \verb\|delete\|, \verb\|show\| (or \verb\|list\|).
				2211
				2212	\subsection{{\tt ip maddress show} --- list multicast addresses}
				2213
				2214	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2215
				2216	\paragraph{Arguments:}
				2217
				2218	\begin{itemize}
				2219
				2220	\item \verb\|dev NAME\| (default)
				2221
				2222	--- the device name.
				2223
				2224	\end{itemize}
				2225
				2226	\paragraph{Output format:}
				2227
				2228	\begin{verbatim}
				2229	kuznet@alisa:~ $ ip maddr ls dummy
				2230	2: dummy
				2231	link 33:33:00:00:00:01
				2232	link 01:00:5e:00:00:01
				2233	inet 224.0.0.1 users 2
				2234	inet6 ff02::1
				2235	kuznet@alisa:~ $
				2236	\end{verbatim}
				2237
				2238	The first line of the output shows the interface index and its name.
				2239	Then the multicast address list follows. Each line starts with the
				2240	protocol identifier. The word \verb\|link\| denotes a link layer
				2241	multicast addresses.
				2242
				2243	If a multicast address has more than one user, the number
				2244	of users is shown after the \verb\|users\| keyword.
				2245
				2246	One additional feature not present in the example above
				2247	is the \verb\|static\| flag, which indicates that the address was joined
				2248	with \verb\|ip maddr add\|. See the following subsection.
				2249
				2250
				2251
				2252	\subsection{{\tt ip maddress add} --- add a multicast address\\
				2253	{\tt ip maddress delete} --- delete a multicast address}
				2254
				2255	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				2256
				2257	\paragraph{Description:} these commands attach/detach
				2258	a static link layer multicast address to listen on the interface.
				2259	Note that it is impossible to join protocol multicast groups
				2260	statically. This command only manages link layer addresses.
				2261
				2262
				2263	\paragraph{Arguments:}
				2264
				2265	\begin{itemize}
				2266	\item \verb\|address LLADDRESS\| (default)
				2267
				2268	--- the link layer multicast address.
				2269
				2270	\item \verb\|dev NAME\|
				2271
				2272	--- the device to join/leave this multicast address.
				2273
				2274	\end{itemize}
				2275
				2276
				2277	\paragraph{Example:} Let us continue with the example from the previous subsection.
				2278
				2279	\begin{verbatim}
				2280	netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy
				2281	netadm@alisa:~ # ip -0 maddr ls dummy
				2282	2: dummy
				2283	link 33:33:00:00:00:01 users 2 static
				2284	link 01:00:5e:00:00:01
				2285	netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy
				2286	\end{verbatim}
				2287
				2288	\begin{NB}
				2289	Neither \verb\|ip\| nor the kernel check for multicast address validity.
				2290	Particularly, this means that you can try to load a unicast address
				2291	instead of a multicast address. Most drivers will ignore such addresses,
				2292	but several (f.e.\ Tulip) will intern it to their on-board filter.
				2293	The effects may be strange. Namely, the addresses become additional
				2294	local link addresses and, if you loaded the address of another host
				2295	to the router, wait for duplicated packets on the wire.
				2296	It is not a bug, but rather a hole in the API and intra-kernel interfaces.
				2297	This feature is really more useful for traffic monitoring, but using it
				2298	with Linux-2.2 you {\em have to\/} be sure that the host is not
				2299	a router and, especially, that it is not a transparent proxy or masquerading
				2300	agent.
				2301	\end{NB}
				2302
				2303
				2304
				2305	\section{{\tt ip mroute} --- multicast routing cache management}
				2306	\label{IP-MROUTE}
				2307
				2308	\paragraph{Abbreviations:} \verb\|mroute\|, \verb\|mr\|.
				2309
				2310	\paragraph{Object:} \verb\|mroute\| objects are multicast routing cache
				2311	entries created by a user level mrouting daemon
				2312	(f.e.\ \verb\|pimd\| or \verb\|mrouted\|).
				2313
				2314	Due to the limitations of the current interface to the multicast routing
				2315	engine, it is impossible to change \verb\|mroute\| objects administratively,
				2316	so we may only display them. This limitation will be removed
				2317	in the future.
				2318
				2319	\paragraph{Commands:} \verb\|show\| (or \verb\|list\|).
				2320
				2321
				2322	\subsection{{\tt ip mroute show} --- list mroute cache entries}
				2323
				2324	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2325
				2326	\paragraph{Arguments:}
				2327
				2328	\begin{itemize}
				2329	\item \verb\|to PREFIX\| (default)
				2330
				2331	--- the prefix selecting the destination multicast addresses to list.
				2332
				2333
				2334	\item \verb\|iif NAME\|
				2335
				2336	--- the interface on which multicast packets are received.
				2337
				2338
				2339	\item \verb\|from PREFIX\|
				2340
				2341	--- the prefix selecting the IP source addresses of the multicast route.
				2342
				2343
				2344	\end{itemize}
				2345
				2346	\paragraph{Output format:}
				2347
				2348	\begin{verbatim}
				2349	kuznet@amber:~ $ ip mroute ls
				2350	(193.232.127.6, 224.0.1.39) Iif: unresolved
				2351	(193.232.244.34, 224.0.1.40) Iif: unresolved
				2352	(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
				2353	kuznet@amber:~ $
				2354	\end{verbatim}
				2355
				2356	Each line shows one (S,G) entry in the multicast routing cache,
				2357	where S is the source address and G is the multicast group. \verb\|Iif\| is
				2358	the interface on which multicast packets are expected to arrive.
				2359	If the word \verb\|unresolved\| is there instead of the interface name,
				2360	it means that the routing daemon still hasn't resolved this entry.
				2361	The keyword \verb\|oifs\| is followed by a list of output interfaces, separated
				2362	by spaces. If a multicast routing entry is created with non-trivial
				2363	TTL scope, administrative distances are appended to the device names
				2364	in the \verb\|oifs\| list.
				2365
				2366	\paragraph{Statistics:} The \verb\|-statistics\| option also prints the
				2367	number of packets and bytes forwarded along this route and
				2368	the number of packets that arrived on the wrong interface, if this number is not zero.
				2369
				2370	\begin{verbatim}
				2371	kuznet@amber:~ $ ip -s mr ls 224.66/16
				2372	(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
				2373	9383 packets, 300256 bytes
				2374	kuznet@amber:~ $
				2375	\end{verbatim}
				2376
				2377
				2378	\section{{\tt ip tunnel} --- tunnel configuration}
				2379	\label{IP-TUNNEL}
				2380
				2381	\paragraph{Abbreviations:} \verb\|tunnel\|, \verb\|tunl\|.
				2382
				2383	\paragraph{Object:} \verb\|tunnel\| objects are tunnels, encapsulating
				2384	packets in IPv4 packets and then sending them over the IP infrastructure.
				2385
				2386	\paragraph{Commands:} \verb\|add\|, \verb\|delete\|, \verb\|change\|, \verb\|show\|
				2387	(or \verb\|list\|).
				2388
				2389	\paragraph{See also:} A more informal discussion of tunneling
				2390	over IP and the \verb\|ip tunnel\| command can be found in~\cite{IP-TUNNELS}.
				2391
				2392	\subsection{{\tt ip tunnel add} --- add a new tunnel\\
				2393	{\tt ip tunnel change} --- change an existing tunnel\\
				2394	{\tt ip tunnel delete} --- destroy a tunnel}
				2395
				2396	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|change\|, \verb\|chg\|;
				2397	\verb\|delete\|, \verb\|del\|, \verb\|d\|.
				2398
				2399
				2400	\paragraph{Arguments:}
				2401
				2402	\begin{itemize}
				2403
				2404	\item \verb\|name NAME\| (default)
				2405
				2406	--- select the tunnel device name.
				2407
				2408	\item \verb\|mode MODE\|
				2409
				2410	--- set the tunnel mode. Three modes are currently available:
				2411	\verb\|ipip\|, \verb\|sit\| and \verb\|gre\|.
				2412
				2413	\item \verb\|remote ADDRESS\|
				2414
				2415	--- set the remote endpoint of the tunnel.
				2416
				2417	\item \verb\|local ADDRESS\|
				2418
				2419	--- set the fixed local address for tunneled packets.
				2420	It must be an address on another interface of this host.
				2421
				2422	\item \verb\|ttl N\|
				2423
				2424	--- set a fixed TTL \verb\|N\| on tunneled packets.
				2425	\verb\|N\| is a number in the range 1--255. 0 is a special value
				2426	meaning that packets inherit the TTL value.
				2427	The default value is: \verb\|inherit\|.
				2428
				2429	\item \verb\|tos T\| or \verb\|dsfield T\|
				2430
				2431	--- set a fixed TOS \verb\|T\| on tunneled packets.
				2432	The default value is: \verb\|inherit\|.
				2433
				2434
				2435
				2436	\item \verb\|dev NAME\|
				2437
				2438	--- bind the tunnel to the device \verb\|NAME\| so that
				2439	tunneled packets will only be routed via this device and will
				2440	not be able to escape to another device when the route to endpoint changes.
				2441
				2442	\item \verb\|nopmtudisc\|
				2443
				2444	--- disable Path MTU Discovery on this tunnel.
				2445	It is enabled by default. Note that a fixed ttl is incompatible
				2446	with this option: tunnelling with a fixed ttl always makes pmtu discovery.
				2447
				2448	\item \verb\|key K\|, \verb\|ikey K\|, \verb\|okey K\|
				2449
				2450	--- (only GRE tunnels) use keyed GRE with key \verb\|K\|. \verb\|K\| is
				2451	either a number or an IP address-like dotted quad.
				2452	The \verb\|key\| parameter sets the key to use in both directions.
				2453	The \verb\|ikey\| and \verb\|okey\| parameters set different keys for input and output.
				2454
				2455
				2456	\item \verb\|csum\|, \verb\|icsum\|, \verb\|ocsum\|
				2457
				2458	--- (only GRE tunnels) generate/require checksums for tunneled packets.
				2459	The \verb\|ocsum\| flag calculates checksums for outgoing packets.
				2460	The \verb\|icsum\| flag requires that all input packets have the correct
				2461	checksum. The \verb\|csum\| flag is equivalent to the combination
				2462	``\verb\|icsum\| \verb\|ocsum\|''.
				2463
				2464	\item \verb\|seq\|, \verb\|iseq\|, \verb\|oseq\|
				2465
				2466	--- (only GRE tunnels) serialize packets.
				2467	The \verb\|oseq\| flag enables sequencing of outgoing packets.
				2468	The \verb\|iseq\| flag requires that all input packets are serialized.
				2469	The \verb\|seq\| flag is equivalent to the combination ``\verb\|iseq\| \verb\|oseq\|''.
				2470
				2471	\begin{NB}
				2472	I think this option does not
				2473	work. At least, I did not test it, did not debug it and
				2474	do not even understand how it is supposed to work or for what
				2475	purpose Cisco planned to use it. Do not use it.
				2476	\end{NB}
				2477
				2478
				2479	\end{itemize}
				2480
				2481	\paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32.
				2482	\begin{verbatim}
				2483	netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \
				2484	local 192.203.80.142 ttl 32
				2485	\end{verbatim}
				2486
				2487	\subsection{{\tt ip tunnel show} --- list tunnels}
				2488
				2489	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2490
				2491
				2492	\paragraph{Arguments:} None.
				2493
				2494	\paragraph{Output format:}
				2495	\begin{verbatim}
				2496	kuznet@amber:~ $ ip tunl ls Cisco
				2497	Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
				2498	kuznet@amber:~ $
				2499	\end{verbatim}
				2500	The line starts with the tunnel device name followed by a colon.
				2501	Then the tunnel mode follows. The parameters of the tunnel are listed
				2502	with the same keywords that were used when creating the tunnel.
				2503
				2504	\paragraph{Statistics:}
				2505
				2506	\begin{verbatim}
				2507	kuznet@amber:~ $ ip -s tunl ls Cisco
				2508	Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
				2509	RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts
				2510	12566 1707516 0 0 0 0
				2511	TX: Packets Bytes Errors DeadLoop NoRoute NoBufs
				2512	13445 1879677 0 0 0 0
				2513	kuznet@amber:~ $
				2514	\end{verbatim}
				2515	Essentially, these numbers are the same as the numbers
				2516	printed with {\tt ip -s link show}
				2517	(sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different
				2518	to reflect that they are tunnel specific.
				2519	\begin{itemize}
				2520	\item \verb\|CsumErrs\| --- the total number of packets dropped
				2521	because of checksum failures for a GRE tunnel with checksumming enabled.
				2522	\item \verb\|OutOfSeq\| --- the total number of packets dropped
				2523	because they arrived out of sequence for a GRE tunnel with
				2524	serialization enabled.
				2525	\item \verb\|Mcasts\| --- the total number of multicast packets
				2526	received on a broadcast GRE tunnel.
				2527	\item \verb\|DeadLoop\| --- the total number of packets which were not
				2528	transmitted because the tunnel is looped back to itself.
				2529	\item \verb\|NoRoute\| --- the total number of packets which were not
				2530	transmitted because there is no IP route to the remote endpoint.
				2531	\item \verb\|NoBufs\| --- the total number of packets which were not
				2532	transmitted because the kernel failed to allocate a buffer.
				2533	\end{itemize}
				2534
				2535
				2536	\section{{\tt ip monitor} and {\tt rtmon} --- state monitoring}
				2537	\label{IP-MONITOR}
				2538
				2539	The \verb\|ip\| utility can monitor the state of devices, addresses
				2540	and routes continuously. This option has a slightly different format.
				2541	Namely,
				2542	the \verb\|monitor\| command is the first in the command line and then
				2543	the object list follows:
				2544	\begin{verbatim}
				2545	ip monitor [ file FILE ] [ all \| OBJECT-LIST ]
				2546	\end{verbatim}
				2547	\verb\|OBJECT-LIST\| is the list of object types that we want to monitor.
				2548	It may contain \verb\|link\|, \verb\|address\| and \verb\|route\|.
				2549	If no \verb\|file\| argument is given, \verb\|ip\| opens RTNETLINK,
				2550	listens on it and dumps state changes in the format described
				2551	in previous sections.
				2552
				2553	If a file name is given, it does not listen on RTNETLINK,
				2554	but opens the file containing RTNETLINK messages saved in binary format
				2555	and dumps them. Such a history file can be generated with the
				2556	\verb\|rtmon\| utility. This utility has a command line syntax similar to
				2557	\verb\|ip monitor\|.
				2558	Ideally, \verb\|rtmon\| should be started before
				2559	the first network configuration command is issued. F.e.\ if
				2560	you insert:
				2561	\begin{verbatim}
				2562	rtmon file /var/log/rtmon.log
				2563	\end{verbatim}
				2564	in a startup script, you will be able to view the full history
				2565	later.
				2566
				2567	Certainly, it is possible to start \verb\|rtmon\| at any time.
				2568	It prepends the history with the state snapshot dumped at the moment
				2569	of starting.
				2570
				2571
				2572	\section{Route realms and policy propagation, {\tt rtacct}}
				2573	\label{RT-REALMS}
				2574
				2575	On routers using OSPF ASE or, especially, the BGP protocol, routing
				2576	tables may be huge. If we want to classify or to account for the packets
				2577	per route, we will have to keep lots of information. Even worse, if we
				2578	want to distinguish the packets not only by their destination, but
				2579	also by their source, the task gets quadratic complexity and its solution
				2580	is physically impossible.
				2581
				2582	One approach to propagating the policy from routing protocols
				2583	to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}.
				2584	Essentially, Cisco Policy Propagation via BGP is based on the fact
				2585	that dedicated routers all have the RIB (Routing Information Base)
				2586	close to the forwarding engine, so policy routing rules can
				2587	check all the route attributes, including ASPATH information
				2588	and community strings.
				2589
				2590	The Linux architecture, splitting the RIB (maintained by a user level
				2591	daemon) and the kernel based FIB (Forwarding Information Base),
				2592	does not allow such a simple approach.
				2593
				2594	It is to our fortune because there is another solution
				2595	which allows even more flexible policy and richer semantics.
				2596
				2597	Namely, routes can be clustered together in user space, based on their
				2598	attributes. F.e.\ a BGP router knows route ASPATH, its community;
				2599	an OSPF router knows the route tag or its area. The administrator, when adding
				2600	routes manually, also knows their nature. Providing that the number of such
				2601	aggregates (we call them {\em realms\/}) is low, the task of full
				2602	classification both by source and destination becomes quite manageable.
				2603
				2604	So each route may be assigned to a realm. It is assumed that
				2605	this identification is made by a routing daemon, but static routes
				2606	can also be handled manually with \verb\|ip route\| (see sec.\ref{IP-ROUTE},
				2607	p.\pageref{IP-ROUTE}).
				2608	\begin{NB}
				2609	There is a patch to \verb\|gated\|, allowing classification of routes
				2610	to realms with all the set of policy rules implemented in \verb\|gated\|:
				2611	by prefix, by ASPATH, by origin, by tag etc.
				2612	\end{NB}
				2613
				2614	To facilitate the construction (f.e.\ in case the routing
				2615	daemon is not aware of realms), missing realms may be completed
				2616	with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}.
				2617
				2618	For each packet the kernel calculates a tuple of realms: source realm
				2619	and destination realm, using the following algorithm:
				2620
				2621	\begin{enumerate}
				2622	\item If the route has a realm, the destination realm of the packet is set to it.
				2623	\item If the rule has a source realm, the source realm of the packet is set to it.
				2624	If the destination realm was not inherited from the route and the rule has a destination realm,
				2625	it is also set.
				2626	\item If at least one of the realms is still unknown, the kernel finds
				2627	the reversed route to the source of the packet.
				2628	\item If the source realm is still unknown, get it from the reversed route.
				2629	\item If one of the realms is still unknown, swap the realms of reversed
				2630	routes and apply step 2 again.
				2631	\end{enumerate}
				2632
				2633	After this procedure is completed we know what realm the packet
				2634	arrived from and the realm where it is going to propagate to.
				2635	If some of the realms are unknown, they are initialized to zero
				2636	(or realm \verb\|unknown\|).
				2637
				2638	The main application of realms is the TC \verb\|route\| classifier~\cite{TC-CREF},
				2639	where they are used to help assign packets to traffic classes,
				2640	to account, police and schedule them according to this
				2641	classification.
				2642
				2643	A much simpler but still very useful application is incoming packet
				2644	accounting by realms. The kernel gathers a packet statistics summary
				2645	which can be viewed with the \verb\|rtacct\| utility.
				2646	\begin{verbatim}
				2647	kuznet@amber:~ $ rtacct russia
				2648	Realm BytesTo PktsTo BytesFrom PktsFrom
				2649	russia 20576778 169176 47080168 153805
				2650	kuznet@amber:~ $
				2651	\end{verbatim}
				2652	This shows that this router received 153805 packets from
				2653	the realm \verb\|russia\| and forwarded 169176 packets to \verb\|russia\|.
				2654	The realm \verb\|russia\| consists of routes with ASPATHs not leaving
				2655	Russia.
				2656
				2657	Note that locally originating packets are not accounted here,
				2658	\verb\|rtacct\| shows incoming packets only. Using the \verb\|route\|
				2659	classifier (see~\cite{TC-CREF}) you can get even more detailed
				2660	accounting information about outgoing packets, optionally
				2661	summarizing traffic not only by source or destination, but
				2662	by any pair of source and destination realms.
				2663
				2664
				2665	\begin{thebibliography}{99}
				2666	\addcontentsline{toc}{section}{References}
				2667	\bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson.
				2668	``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461.
				2669
				2670	\bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten.
				2671	``IPv6 Stateless Address Autoconfiguration'', RFC-2462.
				2672
				2673	\bibitem{RFC1812} F.~Baker.
				2674	``Requirements for IP Version 4 Routers'', RFC-1812.
				2675
				2676	\bibitem{RFC1122} R.~T.~Braden.
				2677	``Requirements for Internet hosts --- communication layers'', RFC-1122.
				2678
				2679	\bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols
				2680	Command Reference, Part 1'' and
				2681	``Cisco IOS Release 12.0 Quality of Service Solutions
				2682	Configuration Guide: Configuring Policy-Based Routing'',\\
				2683	http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
				2684
				2685	\bibitem{IP-TUNNELS} A.~N.~Kuznetsov.
				2686	``Tunnels over IP in Linux-2.2'', \\
				2687	In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
				2688
				2689	\bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\
				2690	In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
				2691
				2692	\bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions
				2693	Configuration Guide: Configuring QoS Policy Propagation via
				2694	Border Gateway Protocol'',\\
				2695	http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
				2696
				2697	\bibitem{RFC-DHCP} R.~Droms.
				2698	``Dynamic Host Configuration Protocol.'', RFC-2131
				2699
				2700	\bibitem{RFC2414} M.~Allman, S.~Floyd, C.~Partridge.
				2701	``Increasing TCP's Initial Window'', RFC-2414.
				2702
				2703	\end{thebibliography}
				2704
				2705
				2706
				2707
				2708	\appendix
				2709	\addcontentsline{toc}{section}{Appendix}
				2710
				2711	\section{Source address selection}
				2712	\label{ADDR-SEL}
				2713
				2714	When a host creates an IP packet, it must select some source
				2715	address. Correct source address selection is a critical procedure,
				2716	because it gives the receiver the information needed to deliver a
				2717	reply. If the source is selected incorrectly, in the best case,
				2718	the backward path may appear different to the forward one which
				2719	is harmful for performance. In the worst case, when the addresses
				2720	are administratively scoped, the reply may be lost entirely.
				2721
				2722	Linux-2.2 selects source addresses using the following algorithm:
				2723
				2724	\begin{itemize}
				2725	\item
				2726	The application may select a source address explicitly with \verb\|bind(2)\|
				2727	syscall or supplying it to \verb\|sendmsg(2)\| via the ancillary data object
				2728	\verb\|IP_PKTINFO\|. In this case the kernel only checks the validity
				2729	of the address and never tries to ``improve'' an incorrect user choice,
				2730	generating an error instead.
				2731	\begin{NB}
				2732	Never say ``Never''. The sysctl option \verb\|ip_dynaddr\| breaks
				2733	this axiom. It has been made deliberately with the purpose
				2734	of automatically reselecting the address on hosts with dynamic dial-out interfaces.
				2735	However, this hack {\em must not\/} be used on multihomed hosts
				2736	and especially on routers: it would break them.
				2737	\end{NB}
				2738
				2739
				2740	\item Otherwise, IP routing tables can contain an explicit source
				2741	address hint for this destination. The hint is set with the \verb\|src\| parameter
				2742	to the \verb\|ip route\| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}.
				2743
				2744
				2745	\item Otherwise, the kernel searches through the list of addresses
				2746	attached to the interface through which the packets will be routed.
				2747	The search strategies are different for IP and IPv6. Namely:
				2748
				2749	\begin{itemize}
				2750	\item IPv6 searches for the first valid, not deprecated address
				2751	with the same scope as the destination.
				2752
				2753	\item IP searches for the first valid address with a scope wider
				2754	than the scope of the destination but it prefers addresses
				2755	which fall to the same subnet as the nexthop of the route
				2756	to the destination. Unlike IPv6, the scopes of IPv4 destinations
				2757	are not encoded in their addresses but are supplied
				2758	in routing tables instead (the \verb\|scope\| parameter to the \verb\|ip route\| command,
				2759	sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}).
				2760
				2761	\end{itemize}
				2762
				2763
				2764	\item Otherwise, if the scope of the destination is \verb\|link\| or \verb\|host\|,
				2765	the algorithm fails and returns a zero source address.
				2766
				2767	\item Otherwise, all interfaces are scanned to search for an address
				2768	with an appropriate scope. The loopback device \verb\|lo\| is always the first
				2769	in the search list, so that if an address with global scope (not 127.0.0.1!)
				2770	is configured on loopback, it is always preferred.
				2771
				2772	\end{itemize}
				2773
				2774
				2775	\section{Proxy ARP/NDISC}
				2776	\label{PROXY-NEIGH}
				2777
				2778	Routers may answer ARP/NDISC solicitations on behalf of other hosts.
				2779	In Linux-2.2 proxy ARP on an interface may be enabled
				2780	by setting the kernel \verb\|sysctl\| variable
				2781	\verb\|/proc/sys/net/ipv4/conf/<dev>/proxy_arp\| to 1. After this, the router
				2782	starts to answer ARP requests on the interface \verb\|<dev>\|, provided
				2783	the route to the requested destination does {\em not\/} go back via the same
				2784	device.
				2785
				2786	The variable \verb\|/proc/sys/net/ipv4/conf/all/proxy_arp\| enables proxy
				2787	ARP on all the IP devices.
				2788
				2789	However, this approach fails in the case of IPv6 because the router
				2790	must join the solicited node multicast address to listen for the corresponding
				2791	NDISC queries. It means that proxy NDISC is possible only on a per destination
				2792	basis.
				2793
				2794	Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented
				2795	in user space. However, similar functionality was present in BSD kernels
				2796	and in Linux-2.0, so we have to preserve it at least to the extent that
				2797	is standardized in BSD.
				2798	\begin{NB}
				2799	Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP.
				2800	It is replaced with the sysctl flag in Linux-2.2.
				2801	\end{NB}
				2802
				2803
				2804	The \verb\|ip\| utility provides a way to manage proxy ARP/NDISC
				2805	with the \verb\|ip neigh\| command, namely:
				2806	\begin{verbatim}
				2807	ip neigh add proxy ADDRESS [ dev NAME ]
				2808	\end{verbatim}
				2809	adds a new proxy ARP/NDISC record and
				2810	\begin{verbatim}
				2811	ip neigh del proxy ADDRESS [ dev NAME ]
				2812	\end{verbatim}
				2813	deletes it.
				2814
				2815	If the name of the device is not given, the router will answer solicitations
				2816	for address \verb\|ADDRESS\| on all devices, otherwise it will only serve
				2817	the device \verb\|NAME\|. Even if the proxy entry is created with
				2818	\verb\|ip neigh\|, the router {\em will not\/} answer a query if the route
				2819	to the destination goes back via the interface from which the solicitation
				2820	was received.
				2821
				2822	It is important to emphasize that proxy entries have {\em no\/}
				2823	parameters other than these (IP/IPv6 address and optional device).
				2824	Particularly, the entry does not store any link layer address.
				2825	It always advertises the station address of the interface
				2826	on which it sends advertisements (i.e. it's own station address).
				2827
				2828	\section{Route NAT status}
				2829	\label{ROUTE-NAT}
				2830
				2831	NAT (or ``Network Address Translation'') remaps some parts
				2832	of the IP address space into other ones. Linux-2.2 route NAT is supposed
				2833	to be used to facilitate policy routing by rewriting addresses
				2834	to other routing domains or to help while renumbering sites
				2835	to another prefix.
				2836
				2837	\paragraph{What it is not:}
				2838	It is necessary to emphasize that {\em it is not supposed\/}
				2839	to be used to compress address space or to split load.
				2840	This is not missing functionality but a design principle.
				2841	Route NAT is {\em stateless\/}. It does not hold any state
				2842	about translated sessions. This means that it handles any number
				2843	of sessions flawlessly. But it also means that it is {\em static\/}.
				2844	It cannot detect the moment when the last TCP client stops
				2845	using an address. For the same reason, it will not help to split
				2846	load between several servers.
				2847	\begin{NB}
				2848	It is a pretty commonly held belief that it is useful to split load between
				2849	several servers with NAT. This is a mistake. All you get from this
				2850	is the requirement that the router keep the state of all the TCP connections
				2851	going via it. Well, if the router is so powerful, run apache on it. 8)
				2852	\end{NB}
				2853
				2854	The second feature: it does not touch packet payload,
				2855	does not try to ``improve'' broken protocols by looking
				2856	through its data and mangling it. It mangles IP addresses,
				2857	only IP addresses and nothing but IP addresses.
				2858	This also, is not missing any functionality.
				2859
				2860	To resume: if you need to compress address space or keep
				2861	active FTP clients happy, your choice is not route NAT but masquerading,
				2862	port forwarding, NAPT etc.
				2863	\begin{NB}
				2864	By the way, you may also want to look at
				2865	http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html
				2866	\end{NB}
				2867
				2868
				2869	\paragraph{How it works.}
				2870	Some part of the address space is reserved for dummy addresses
				2871	which will look for all the world like some host addresses
				2872	inside your network. No other hosts may use these addresses,
				2873	however other routers may also be configured to translate them.
				2874	\begin{NB}
				2875	A great advantage of route NAT is that it may be used not
				2876	only in stub networks but in environments with arbitrarily complicated
				2877	structure. It does not firewall, it {\em forwards.}
				2878	\end{NB}
				2879	These addresses are selected by the \verb\|ip route\| command
				2880	(sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\
				2881	\begin{verbatim}
				2882	ip route add nat 192.203.80.144 via 193.233.7.83
				2883	\end{verbatim}
				2884	states that the single address 192.203.80.144 is a dummy NAT address.
				2885	For all the world it looks like a host address inside our network.
				2886	For neighbouring hosts and routers it looks like the local address
				2887	of the translating router. The router answers ARP for it, advertises
				2888	this address as routed via it, {\em et al\/}. When the router
				2889	receives a packet destined for 192.203.80.144, it replaces
				2890	this address with 193.233.7.83 which is the address of some real
				2891	host and forwards the packet. If you need to remap
				2892	blocks of addresses, you may use a command like:
				2893	\begin{verbatim}
				2894	ip route add nat 192.203.80.192/26 via 193.233.7.64
				2895	\end{verbatim}
				2896	This command will map a block of 63 addresses 192.203.80.192-255 to
				2897	193.233.7.64-127.
				2898
				2899	When an internal host (193.233.7.83 in the example above)
				2900	sends something to the outer world and these packets are forwarded
				2901	by our router, it should translate the source address 193.233.7.83
				2902	into 192.203.80.144. This task is solved by setting a special
				2903	policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}):
				2904	\begin{verbatim}
				2905	ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144
				2906	\end{verbatim}
				2907	This rule says that the source address 193.233.7.83
				2908	should be translated into 192.203.80.144 before forwarding.
				2909	It is important that the address after the \verb\|nat\| keyword
				2910	is some NAT address, declared by {\tt ip route add nat}.
				2911	If it is just a random address the router will not map to it.
				2912	\begin{NB}
				2913	The exception is when the address is a local address of this
				2914	router (or 0.0.0.0) and masquerading is configured in the linux-2.2
				2915	kernel. In this case the router will masquerade the packets as this address.
				2916	If 0.0.0.0 is selected, the result is equivalent to one
				2917	obtained with firewalling rules. Otherwise, you have the way
				2918	to order Linux to masquerade to this fixed address.
				2919	NAT mechanism used in linux-2.4 is more flexible than
				2920	masquerading, so that this feature has lost meaning and disabled.
				2921	\end{NB}
				2922
				2923	If the network has non-trivial internal structure, it is
				2924	useful and even necessary to add rules disabling translation
				2925	when a packet does not leave this network. Let us return to the
				2926	example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}).
				2927	\begin{verbatim}
				2928	300: from 193.233.7.83 to 193.233.7.0/24 lookup main
				2929	310: from 193.233.7.83 to 192.203.80.0/24 lookup main
				2930	320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
				2931	\end{verbatim}
				2932	This block of rules causes normal forwarding when
				2933	packets from 193.233.7.83 do not leave networks 193.233.7/24
				2934	and 192.203.80/24. Also, if the \verb\|inr.ruhep\| table does not
				2935	contain a route to the destination (which means that the routing
				2936	domain owning addresses from 192.203.80/24 is dead), no translation
				2937	will occur. Otherwise, the packets are translated.
				2938
				2939	\paragraph{How to only translate selected ports:}
				2940	If you only want to translate selected ports (f.e.\ http)
				2941	and leave the rest intact, you may use \verb\|ipchains\|
				2942	to \verb\|fwmark\| a class of packets.
				2943	Suppose you did and all the packets from 193.233.7.83
				2944	destined for port 80 are marked with marker 0x1234 in input fwchain.
				2945	In this case you may replace rule \#320 with:
				2946	\begin{verbatim}
				2947	320: from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144
				2948	\end{verbatim}
				2949	and translation will only be enabled for outgoing http requests.
				2950
				2951	\section{Example: minimal host setup}
				2952	\label{EXAMPLE-SETUP}
				2953
				2954	The following script gives an example of a fault safe
				2955	setup of IP (and IPv6, if it is compiled into the kernel)
				2956	in the common case of a node attached to a single broadcast
				2957	network. A more advanced script, which may be used both on multihomed
				2958	hosts and on routers, is described in the following
				2959	section.
				2960
				2961	The utilities used in the script may be found in the
				2962	directory ftp://ftp.inr.ac.ru/ip-routing/:
				2963	\begin{enumerate}
				2964	\item \verb\|ip\| --- package \verb\|iproute2\|.
				2965	\item \verb\|arping\| --- package \verb\|iputils\|.
				2966	\item \verb\|rdisc\| --- package \verb\|iputils\|.
				2967	\end{enumerate}
				2968	\begin{NB}
				2969	It also refers to a DHCP client, \verb\|dhcpcd\|. I should refrain from
				2970	recommending a good DHCP client to use. All that I can
				2971	say is that ISC \verb\|dhcp-2.0b1pl6\| patched with the patch that
				2972	can be found in the \verb\|dhcp.bootp.rarp\| subdirectory of
				2973	the same ftp site {\em does\/} work,
				2974	at least on Ethernet and Token Ring.
				2975	\end{NB}
				2976
				2977	\begin{verbatim}
				2978	#! /bin/bash
				2979	\end{verbatim}
				2980	\begin{flushleft}
				2981	\# {\bf Usage: \verb\|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]\|}\\
				2982	\# {\bf Parameters:}\\
				2983	\# \$1 --- Static IP address, optionally followed by prefix length.\\
				2984	\# \$2 --- Device name. If it is missing, \verb\|eth0\| is asssumed.\\
				2985	\# F.e. \verb\|ifone 193.233.7.90\|
				2986	\end{flushleft}
				2987	\begin{verbatim}
				2988	dev=$2
				2989	: ${dev:=eth0}
				2990	ipaddr=
				2991	\end{verbatim}
				2992	\# Parse IP address, splitting prefix length.
				2993	\begin{verbatim}
				2994	if [ "$1" != "" ]; then
				2995	ipaddr=${1%/*}
				2996	if [ "$1" != "$ipaddr" ]; then
				2997	pfxlen=${1#*/}
				2998	fi
				2999	: ${pfxlen:=24}
				3000	fi
				3001	pfx="${ipaddr}/${pfxlen}"
				3002	\end{verbatim}
				3003
				3004	\begin{flushleft}
				3005	\# {\bf Step 0} --- enable loopback.\\
				3006	\#\\
				3007	\# This step is necessary on any networked box before attempt\\
				3008	\# to configure any other device.\\
				3009	\end{flushleft}
				3010	\begin{verbatim}
				3011	ip link set up dev lo
				3012	ip addr add 127.0.0.1/8 dev lo brd + scope host
				3013	\end{verbatim}
				3014	\begin{flushleft}
				3015	\# IPv6 autoconfigure themself on loopback.\\
				3016	\#\\
				3017	\# If user gave loopback as device, we add the address as alias and exit.
				3018	\end{flushleft}
				3019	\begin{verbatim}
				3020	if [ "$dev" = "lo" ]; then
				3021	if [ "$ipaddr" != "" -a "$ipaddr" != "127.0.0.1" ]; then
				3022	ip address add $ipaddr dev $dev
				3023	exit $?
				3024	fi
				3025	exit 0
				3026	fi
				3027	\end{verbatim}
				3028
				3029	\noindent\# {\bf Step 1} --- enable device \verb\|$dev\|
				3030
				3031	\begin{verbatim}
				3032	if ! ip link set up dev $dev ; then
				3033	echo "Cannot enable interface $dev. Aborting." 1>&2
				3034	exit 1
				3035	fi
				3036	\end{verbatim}
				3037	\begin{flushleft}
				3038	\# The interface is \verb\|UP\|. IPv6 started stateless autoconfiguration itself,\\
				3039	\# and its configuration finishes here. However,\\
				3040	\# IP still needs some static preconfigured address.
				3041	\end{flushleft}
				3042	\begin{verbatim}
				3043	if [ "$ipaddr" = "" ]; then
				3044	echo "No address for $dev is configured, trying DHCP..." 1>&2
				3045	dhcpcd
				3046	exit $?
				3047	fi
				3048	\end{verbatim}
				3049
				3050	\begin{flushleft}
				3051	\# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
				3052	\# Send two probes and wait for result for 3 seconds.\\
				3053	\# If the interface opens slower f.e.\ due to long media detection,\\
				3054	\# you want to increase the timeout.\\
				3055	\end{flushleft}
				3056	\begin{verbatim}
				3057	if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
				3058	echo "Address $ipaddr is busy, trying DHCP..." 1>&2
				3059	dhcpcd
				3060	exit $?
				3061	fi
				3062	\end{verbatim}
				3063	\begin{flushleft}
				3064	\# OK, the address is unique, we may add it on the interface.\\
				3065	\#\\
				3066	\# {\bf Step 3} --- Configure the address on the interface.
				3067	\end{flushleft}
				3068
				3069	\begin{verbatim}
				3070	if ! ip address add $pfx brd + dev $dev; then
				3071	echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2
				3072	dhcpcd
				3073	exit $?
				3074	fi
				3075	\end{verbatim}
				3076
				3077	\noindent\# {\bf Step 4} --- Announce our presence on the link.
				3078	\begin{verbatim}
				3079	arping -A -c 1 -I $dev $ipaddr
				3080	noarp=$?
				3081	( sleep 2;
				3082	arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
				3083	\end{verbatim}
				3084
				3085	\begin{flushleft}
				3086	\# {\bf Step 5} (optional) --- Add some control routes.\\
				3087	\#\\
				3088	\# 1. Prohibit link local multicast addresses.\\
				3089	\# 2. Prohibit link local (alias, limited) broadcast.\\
				3090	\# 3. Add default multicast route.
				3091	\end{flushleft}
				3092	\begin{verbatim}
				3093	ip route add unreachable 224.0.0.0/24
				3094	ip route add unreachable 255.255.255.255
				3095	if [ `ip link ls $dev \| grep -c MULTICAST` -ge 1 ]; then
				3096	ip route add 224.0.0.0/4 dev $dev scope global
				3097	fi
				3098	\end{verbatim}
				3099
				3100	\begin{flushleft}
				3101	\# {\bf Step 6} --- Add fallback default route with huge metric.\\
				3102	\# If a proxy ARP server is present on the interface, we will be\\
				3103	\# able to talk to all the Internet without further configuration.\\
				3104	\# It is not so cheap though and we still hope that this route\\
				3105	\# will be overridden by more correct one by rdisc.\\
				3106	\# Do not make this step if the device is not ARPable,\\
				3107	\# because dead nexthop detection does not work on them.
				3108	\end{flushleft}
				3109	\begin{verbatim}
				3110	if [ "$noarp" = "0" ]; then
				3111	ip ro add default dev $dev metric 30000 scope global
				3112	fi
				3113	\end{verbatim}
				3114
				3115	\begin{flushleft}
				3116	\# {\bf Step 7} --- Restart router discovery and exit.
				3117	\end{flushleft}
				3118	\begin{verbatim}
				3119	killall -HUP rdisc \|\| rdisc -fs
				3120	exit 0
				3121	\end{verbatim}
				3122
				3123
				3124	\section{Example: {\protect\tt ifcfg} --- interface address management}
				3125	\label{EXAMPLE-IFCFG}
				3126
				3127	This is a simplistic script replacing one option of \verb\|ifconfig\|,
				3128	namely, IP address management. It not only adds
				3129	addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP},
				3130	sends unsolicited ARP to update the caches of other hosts sharing
				3131	the interface, adds some control routes and restarts Router Discovery
				3132	when it is necessary.
				3133
				3134	I strongly recommend using it {\em instead\/} of \verb\|ifconfig\| both
				3135	on hosts and on routers.
				3136
				3137	\begin{verbatim}
				3138	#! /bin/bash
				3139	\end{verbatim}
				3140	\begin{flushleft}
				3141	\# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add\|del] ADDRESS[/LENGTH] [PEER]?}\\
				3142	\# {\bf Parameters:}\\
				3143	\# ---Device name. It may have alias suffix, separated by colon.\\
				3144	\# ---Command: add, delete or stop.\\
				3145	\# ---IP address, optionally followed by prefix length.\\
				3146	\# ---Optional peer address for pointopoint interfaces.\\
				3147	\# F.e. \verb\|ifcfg eth0 193.233.7.90/24\|
				3148
				3149	\noindent\# This function determines, whether it is router or host.\\
				3150	\# It returns 0, if the host is apparently not router.
				3151	\end{flushleft}
				3152	\begin{verbatim}
				3153	CheckForwarding () {
				3154	local sbase fwd
				3155	sbase=/proc/sys/net/ipv4/conf
				3156	fwd=0
				3157	if [ -d $sbase ]; then
				3158	for dir in $sbase/*/forwarding; do
				3159	fwd=$[$fwd + `cat $dir`]
				3160	done
				3161	else
				3162	fwd=2
				3163	fi
				3164	return $fwd
				3165	}
				3166	\end{verbatim}
				3167	\begin{flushleft}
				3168	\# This function restarts Router Discovery.\\
				3169	\end{flushleft}
				3170	\begin{verbatim}
				3171	RestartRDISC () {
				3172	killall -HUP rdisc \|\| rdisc -fs
				3173	}
				3174	\end{verbatim}
				3175	\begin{flushleft}
				3176	\# Calculate ABC "natural" mask length\\
				3177	\# Arg: \$1 = dotquad address
				3178	\end{flushleft}
				3179	\begin{verbatim}
				3180	ABCMaskLen () {
				3181	local class;
				3182	class=${1%%.*}
				3183	if [ $class -eq 0 -o $class -ge 224 ]; then return 0
				3184	elif [ $class -ge 192 ]; then return 24
				3185	elif [ $class -ge 128 ]; then return 16
				3186	else return 8 ; fi
				3187	}
				3188	\end{verbatim}
				3189
				3190
				3191	\begin{flushleft}
				3192	\# {\bf MAIN()}\\
				3193	\#\\
				3194	\# Strip alias suffix separated by colon.
				3195	\end{flushleft}
				3196	\begin{verbatim}
				3197	label="label $1"
				3198	ldev=$1
				3199	dev=${1%:*}
				3200	if [ "$dev" = "" -o "$1" = "help" ]; then
				3201	echo "Usage: ifcfg DEV [[add\|del [ADDR[/LEN]] [PEER] \| stop]" 1>&2
				3202	echo " add - add new address" 1>&2
				3203	echo " del - delete address" 1>&2
				3204	echo " stop - completely disable IP" 1>&2
				3205	exit 1
				3206	fi
				3207	shift
				3208
				3209	CheckForwarding
				3210	fwd=$?
				3211	\end{verbatim}
				3212	\begin{flushleft}
				3213	\# Parse command. If it is ``stop'', flush and exit.
				3214	\end{flushleft}
				3215	\begin{verbatim}
				3216	deleting=0
				3217	case "$1" in
				3218	add) shift ;;
				3219	stop)
				3220	if [ "$ldev" != "$dev" ]; then
				3221	echo "Cannot stop alias $ldev" 1>&2
				3222	exit 1;
				3223	fi
				3224	ip -4 addr flush dev $dev $label \|\| exit 1
				3225	if [ $fwd -eq 0 ]; then RestartRDISC; fi
				3226	exit 0 ;;
				3227	del*)
				3228	deleting=1; shift ;;
				3229	*)
				3230	esac
				3231	\end{verbatim}
				3232	\begin{flushleft}
				3233	\# Parse prefix, split prefix length, separated by slash.
				3234	\end{flushleft}
				3235	\begin{verbatim}
				3236	ipaddr=
				3237	pfxlen=
				3238	if [ "$1" != "" ]; then
				3239	ipaddr=${1%/*}
				3240	if [ "$1" != "$ipaddr" ]; then
				3241	pfxlen=${1#*/}
				3242	fi
				3243	if [ "$ipaddr" = "" ]; then
				3244	echo "$1 is bad IP address." 1>&2
				3245	exit 1
				3246	fi
				3247	fi
				3248	shift
				3249	\end{verbatim}
				3250	\begin{flushleft}
				3251	\# If peer address is present, prefix length is 32.\\
				3252	\# Otherwise, if prefix length was not given, guess it.
				3253	\end{flushleft}
				3254	\begin{verbatim}
				3255	peer=$1
				3256	if [ "$peer" != "" ]; then
				3257	if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
				3258	echo "Peer address with non-trivial netmask." 1>&2
				3259	exit 1
				3260	fi
				3261	pfx="$ipaddr peer $peer"
				3262	else
				3263	if [ "$pfxlen" = "" ]; then
				3264	ABCMaskLen $ipaddr
				3265	pfxlen=$?
				3266	fi
				3267	pfx="$ipaddr/$pfxlen"
				3268	fi
				3269	if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
				3270	label=
				3271	fi
				3272	\end{verbatim}
				3273	\begin{flushleft}
				3274	\# If deletion was requested, delete the address and restart RDISC
				3275	\end{flushleft}
				3276	\begin{verbatim}
				3277	if [ $deleting -ne 0 ]; then
				3278	ip addr del $pfx dev $dev $label \|\| exit 1
				3279	if [ $fwd -eq 0 ]; then RestartRDISC; fi
				3280	exit 0
				3281	fi
				3282	\end{verbatim}
				3283	\begin{flushleft}
				3284	\# Start interface initialization.\\
				3285	\#\\
				3286	\# {\bf Step 0} --- enable device \verb\|$dev\|
				3287	\end{flushleft}
				3288	\begin{verbatim}
				3289	if ! ip link set up dev $dev ; then
				3290	echo "Error: cannot enable interface $dev." 1>&2
				3291	exit 1
				3292	fi
				3293	if [ "$ipaddr" = "" ]; then exit 0; fi
				3294	\end{verbatim}
				3295	\begin{flushleft}
				3296	\# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
				3297	\# Send two probes and wait for result for 3 seconds.\\
				3298	\# If the interface opens slower f.e.\ due to long media detection,\\
				3299	\# you want to increase the timeout.\\
				3300	\end{flushleft}
				3301	\begin{verbatim}
				3302	if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
				3303	echo "Error: some host already uses address $ipaddr on $dev." 1>&2
				3304	exit 1
				3305	fi
				3306	\end{verbatim}
				3307	\begin{flushleft}
				3308	\# OK, the address is unique. We may add it to the interface.\\
				3309	\#\\
				3310	\# {\bf Step 2} --- Configure the address on the interface.
				3311	\end{flushleft}
				3312	\begin{verbatim}
				3313	if ! ip address add $pfx brd + dev $dev $label; then
				3314	echo "Error: failed to add $pfx on $dev." 1>&2
				3315	exit 1
				3316	fi
				3317	\end{verbatim}
				3318	\noindent\# {\bf Step 3} --- Announce our presence on the link
				3319	\begin{verbatim}
				3320	arping -q -A -c 1 -I $dev $ipaddr
				3321	noarp=$?
				3322	( sleep 2 ;
				3323	arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
				3324	\end{verbatim}
				3325	\begin{flushleft}
				3326	\# {\bf Step 4} (optional) --- Add some control routes.\\
				3327	\#\\
				3328	\# 1. Prohibit link local multicast addresses.\\
				3329	\# 2. Prohibit link local (alias, limited) broadcast.\\
				3330	\# 3. Add default multicast route.
				3331	\end{flushleft}
				3332	\begin{verbatim}
				3333	ip route add unreachable 224.0.0.0/24 >& /dev/null
				3334	ip route add unreachable 255.255.255.255 >& /dev/null
				3335	if [ `ip link ls $dev \| grep -c MULTICAST` -ge 1 ]; then
				3336	ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
				3337	fi
				3338	\end{verbatim}
				3339	\begin{flushleft}
				3340	\# {\bf Step 5} --- Add fallback default route with huge metric.\\
				3341	\# If a proxy ARP server is present on the interface, we will be\\
				3342	\# able to talk to all the Internet without further configuration.\\
				3343	\# Do not make this step on router or if the device is not ARPable.\\
				3344	\# because dead nexthop detection does not work on them.
				3345	\end{flushleft}
				3346	\begin{verbatim}
				3347	if [ $fwd -eq 0 ]; then
				3348	if [ $noarp -eq 0 ]; then
				3349	ip ro append default dev $dev metric 30000 scope global
				3350	elif [ "$peer" != "" ]; then
				3351	if ping -q -c 2 -w 4 $peer ; then
				3352	ip ro append default via $peer dev $dev metric 30001
				3353	fi
				3354	fi
				3355	RestartRDISC
				3356	fi
				3357
				3358	exit 0
				3359	\end{verbatim}
				3360	\begin{flushleft}
				3361	\# End of {\bf MAIN()}
				3362	\end{flushleft}
				3363
				3364
				3365	\end{document}