Blame - src/kernel/linux/v4.19/fs/btrfs/send.c - T800

blob: 931a7d1ddc951475aa224752470ae78b42e782ef [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (C) 2012 Alexander Block. All rights reserved.
				4	*/
				5
				6	#include <linux/bsearch.h>
				7	#include <linux/fs.h>
				8	#include <linux/file.h>
				9	#include <linux/sort.h>
				10	#include <linux/mount.h>
				11	#include <linux/xattr.h>
				12	#include <linux/posix_acl_xattr.h>
				13	#include <linux/radix-tree.h>
				14	#include <linux/vmalloc.h>
				15	#include <linux/string.h>
				16	#include <linux/compat.h>
				17	#include <linux/crc32c.h>
				18
				19	#include "send.h"
				20	#include "backref.h"
				21	#include "locking.h"
				22	#include "disk-io.h"
				23	#include "btrfs_inode.h"
				24	#include "transaction.h"
				25	#include "compression.h"
				26
				27	/*
				28	* Maximum number of references an extent can have in order for us to attempt to
				29	* issue clone operations instead of write operations. This currently exists to
				30	* avoid hitting limitations of the backreference walking code (taking a lot of
				31	* time and using too much memory for extents with large number of references).
				32	*/
				33	#define SEND_MAX_EXTENT_REFS 64
				34
				35	/*
				36	* A fs_path is a helper to dynamically build path names with unknown size.
				37	* It reallocates the internal buffer on demand.
				38	* It allows fast adding of path elements on the right side (normal path) and
				39	* fast adding to the left side (reversed path). A reversed path can also be
				40	* unreversed if needed.
				41	*/
				42	struct fs_path {
				43	union {
				44	struct {
				45	char *start;
				46	char *end;
				47
				48	char *buf;
				49	unsigned short buf_len:15;
				50	unsigned short reversed:1;
				51	char inline_buf[];
				52	};
				53	/*
				54	* Average path length does not exceed 200 bytes, we'll have
				55	* better packing in the slab and higher chance to satisfy
				56	* a allocation later during send.
				57	*/
				58	char pad[256];
				59	};
				60	};
				61	#define FS_PATH_INLINE_SIZE \
				62	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
				63
				64
				65	/* reused for each extent */
				66	struct clone_root {
				67	struct btrfs_root *root;
				68	u64 ino;
				69	u64 offset;
				70
				71	u64 found_refs;
				72	};
				73
				74	#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
				75	#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
				76
				77	struct send_ctx {
				78	struct file *send_filp;
				79	loff_t send_off;
				80	char *send_buf;
				81	u32 send_size;
				82	u32 send_max_size;
				83	u64 total_send_size;
				84	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
				85	u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
				86
				87	struct btrfs_root *send_root;
				88	struct btrfs_root *parent_root;
				89	struct clone_root *clone_roots;
				90	int clone_roots_cnt;
				91
				92	/* current state of the compare_tree call */
				93	struct btrfs_path *left_path;
				94	struct btrfs_path *right_path;
				95	struct btrfs_key *cmp_key;
				96
				97	/*
				98	* infos of the currently processed inode. In case of deleted inodes,
				99	* these are the values from the deleted inode.
				100	*/
				101	u64 cur_ino;
				102	u64 cur_inode_gen;
				103	int cur_inode_new;
				104	int cur_inode_new_gen;
				105	int cur_inode_deleted;
				106	u64 cur_inode_size;
				107	u64 cur_inode_mode;
				108	u64 cur_inode_rdev;
				109	u64 cur_inode_last_extent;
				110	u64 cur_inode_next_write_offset;
				111	bool ignore_cur_inode;
				112
				113	u64 send_progress;
				114
				115	struct list_head new_refs;
				116	struct list_head deleted_refs;
				117
				118	struct radix_tree_root name_cache;
				119	struct list_head name_cache_list;
				120	int name_cache_size;
				121
				122	struct file_ra_state ra;
				123
				124	char *read_buf;
				125
				126	/*
				127	* We process inodes by their increasing order, so if before an
				128	* incremental send we reverse the parent/child relationship of
				129	* directories such that a directory with a lower inode number was
				130	* the parent of a directory with a higher inode number, and the one
				131	* becoming the new parent got renamed too, we can't rename/move the
				132	* directory with lower inode number when we finish processing it - we
				133	* must process the directory with higher inode number first, then
				134	* rename/move it and then rename/move the directory with lower inode
				135	* number. Example follows.
				136	*
				137	* Tree state when the first send was performed:
				138	*
				139	* .
				140	* \|-- a (ino 257)
				141	* \|-- b (ino 258)
				142	* \|
				143	* \|
				144	* \|-- c (ino 259)
				145	* \| \|-- d (ino 260)
				146	* \|
				147	* \|-- c2 (ino 261)
				148	*
				149	* Tree state when the second (incremental) send is performed:
				150	*
				151	* .
				152	* \|-- a (ino 257)
				153	* \|-- b (ino 258)
				154	* \|-- c2 (ino 261)
				155	* \|-- d2 (ino 260)
				156	* \|-- cc (ino 259)
				157	*
				158	* The sequence of steps that lead to the second state was:
				159	*
				160	* mv /a/b/c/d /a/b/c2/d2
				161	* mv /a/b/c /a/b/c2/d2/cc
				162	*
				163	* "c" has lower inode number, but we can't move it (2nd mv operation)
				164	* before we move "d", which has higher inode number.
				165	*
				166	* So we just memorize which move/rename operations must be performed
				167	* later when their respective parent is processed and moved/renamed.
				168	*/
				169
				170	/* Indexed by parent directory inode number. */
				171	struct rb_root pending_dir_moves;
				172
				173	/*
				174	* Reverse index, indexed by the inode number of a directory that
				175	* is waiting for the move/rename of its immediate parent before its
				176	* own move/rename can be performed.
				177	*/
				178	struct rb_root waiting_dir_moves;
				179
				180	/*
				181	* A directory that is going to be rm'ed might have a child directory
				182	* which is in the pending directory moves index above. In this case,
				183	* the directory can only be removed after the move/rename of its child
				184	* is performed. Example:
				185	*
				186	* Parent snapshot:
				187	*
				188	* . (ino 256)
				189	* \|-- a/ (ino 257)
				190	* \|-- b/ (ino 258)
				191	* \|-- c/ (ino 259)
				192	* \| \|-- x/ (ino 260)
				193	* \|
				194	* \|-- y/ (ino 261)
				195	*
				196	* Send snapshot:
				197	*
				198	* . (ino 256)
				199	* \|-- a/ (ino 257)
				200	* \|-- b/ (ino 258)
				201	* \|-- YY/ (ino 261)
				202	* \|-- x/ (ino 260)
				203	*
				204	* Sequence of steps that lead to the send snapshot:
				205	* rm -f /a/b/c/foo.txt
				206	* mv /a/b/y /a/b/YY
				207	* mv /a/b/c/x /a/b/YY
				208	* rmdir /a/b/c
				209	*
				210	* When the child is processed, its move/rename is delayed until its
				211	* parent is processed (as explained above), but all other operations
				212	* like update utimes, chown, chgrp, etc, are performed and the paths
				213	* that it uses for those operations must use the orphanized name of
				214	* its parent (the directory we're going to rm later), so we need to
				215	* memorize that name.
				216	*
				217	* Indexed by the inode number of the directory to be deleted.
				218	*/
				219	struct rb_root orphan_dirs;
				220	};
				221
				222	struct pending_dir_move {
				223	struct rb_node node;
				224	struct list_head list;
				225	u64 parent_ino;
				226	u64 ino;
				227	u64 gen;
				228	struct list_head update_refs;
				229	};
				230
				231	struct waiting_dir_move {
				232	struct rb_node node;
				233	u64 ino;
				234	/*
				235	* There might be some directory that could not be removed because it
				236	* was waiting for this directory inode to be moved first. Therefore
				237	* after this directory is moved, we can try to rmdir the ino rmdir_ino.
				238	*/
				239	u64 rmdir_ino;
				240	bool orphanized;
				241	};
				242
				243	struct orphan_dir_info {
				244	struct rb_node node;
				245	u64 ino;
				246	u64 gen;
				247	u64 last_dir_index_offset;
				248	};
				249
				250	struct name_cache_entry {
				251	struct list_head list;
				252	/*
				253	* radix_tree has only 32bit entries but we need to handle 64bit inums.
				254	* We use the lower 32bit of the 64bit inum to store it in the tree. If
				255	* more then one inum would fall into the same entry, we use radix_list
				256	* to store the additional entries. radix_list is also used to store
				257	* entries where two entries have the same inum but different
				258	* generations.
				259	*/
				260	struct list_head radix_list;
				261	u64 ino;
				262	u64 gen;
				263	u64 parent_ino;
				264	u64 parent_gen;
				265	int ret;
				266	int need_later_update;
				267	int name_len;
				268	char name[];
				269	};
				270
				271	__cold
				272	static void inconsistent_snapshot_error(struct send_ctx *sctx,
				273	enum btrfs_compare_tree_result result,
				274	const char *what)
				275	{
				276	const char *result_string;
				277
				278	switch (result) {
				279	case BTRFS_COMPARE_TREE_NEW:
				280	result_string = "new";
				281	break;
				282	case BTRFS_COMPARE_TREE_DELETED:
				283	result_string = "deleted";
				284	break;
				285	case BTRFS_COMPARE_TREE_CHANGED:
				286	result_string = "updated";
				287	break;
				288	case BTRFS_COMPARE_TREE_SAME:
				289	ASSERT(0);
				290	result_string = "unchanged";
				291	break;
				292	default:
				293	ASSERT(0);
				294	result_string = "unexpected";
				295	}
				296
				297	btrfs_err(sctx->send_root->fs_info,
				298	"Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
				299	result_string, what, sctx->cmp_key->objectid,
				300	sctx->send_root->root_key.objectid,
				301	(sctx->parent_root ?
				302	sctx->parent_root->root_key.objectid : 0));
				303	}
				304
				305	static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
				306
				307	static struct waiting_dir_move *
				308	get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
				309
				310	static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
				311
				312	static int need_send_hole(struct send_ctx *sctx)
				313	{
				314	return (sctx->parent_root && !sctx->cur_inode_new &&
				315	!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
				316	S_ISREG(sctx->cur_inode_mode));
				317	}
				318
				319	static void fs_path_reset(struct fs_path *p)
				320	{
				321	if (p->reversed) {
				322	p->start = p->buf + p->buf_len - 1;
				323	p->end = p->start;
				324	*p->start = 0;
				325	} else {
				326	p->start = p->buf;
				327	p->end = p->start;
				328	*p->start = 0;
				329	}
				330	}
				331
				332	static struct fs_path *fs_path_alloc(void)
				333	{
				334	struct fs_path *p;
				335
				336	p = kmalloc(sizeof(*p), GFP_KERNEL);
				337	if (!p)
				338	return NULL;
				339	p->reversed = 0;
				340	p->buf = p->inline_buf;
				341	p->buf_len = FS_PATH_INLINE_SIZE;
				342	fs_path_reset(p);
				343	return p;
				344	}
				345
				346	static struct fs_path *fs_path_alloc_reversed(void)
				347	{
				348	struct fs_path *p;
				349
				350	p = fs_path_alloc();
				351	if (!p)
				352	return NULL;
				353	p->reversed = 1;
				354	fs_path_reset(p);
				355	return p;
				356	}
				357
				358	static void fs_path_free(struct fs_path *p)
				359	{
				360	if (!p)
				361	return;
				362	if (p->buf != p->inline_buf)
				363	kfree(p->buf);
				364	kfree(p);
				365	}
				366
				367	static int fs_path_len(struct fs_path *p)
				368	{
				369	return p->end - p->start;
				370	}
				371
				372	static int fs_path_ensure_buf(struct fs_path *p, int len)
				373	{
				374	char *tmp_buf;
				375	int path_len;
				376	int old_buf_len;
				377
				378	len++;
				379
				380	if (p->buf_len >= len)
				381	return 0;
				382
				383	if (len > PATH_MAX) {
				384	WARN_ON(1);
				385	return -ENOMEM;
				386	}
				387
				388	path_len = p->end - p->start;
				389	old_buf_len = p->buf_len;
				390
				391	/*
				392	* First time the inline_buf does not suffice
				393	*/
				394	if (p->buf == p->inline_buf) {
				395	tmp_buf = kmalloc(len, GFP_KERNEL);
				396	if (tmp_buf)
				397	memcpy(tmp_buf, p->buf, old_buf_len);
				398	} else {
				399	tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
				400	}
				401	if (!tmp_buf)
				402	return -ENOMEM;
				403	p->buf = tmp_buf;
				404	/*
				405	* The real size of the buffer is bigger, this will let the fast path
				406	* happen most of the time
				407	*/
				408	p->buf_len = ksize(p->buf);
				409
				410	if (p->reversed) {
				411	tmp_buf = p->buf + old_buf_len - path_len - 1;
				412	p->end = p->buf + p->buf_len - 1;
				413	p->start = p->end - path_len;
				414	memmove(p->start, tmp_buf, path_len + 1);
				415	} else {
				416	p->start = p->buf;
				417	p->end = p->start + path_len;
				418	}
				419	return 0;
				420	}
				421
				422	static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
				423	char **prepared)
				424	{
				425	int ret;
				426	int new_len;
				427
				428	new_len = p->end - p->start + name_len;
				429	if (p->start != p->end)
				430	new_len++;
				431	ret = fs_path_ensure_buf(p, new_len);
				432	if (ret < 0)
				433	goto out;
				434
				435	if (p->reversed) {
				436	if (p->start != p->end)
				437	*--p->start = '/';
				438	p->start -= name_len;
				439	*prepared = p->start;
				440	} else {
				441	if (p->start != p->end)
				442	*p->end++ = '/';
				443	*prepared = p->end;
				444	p->end += name_len;
				445	*p->end = 0;
				446	}
				447
				448	out:
				449	return ret;
				450	}
				451
				452	static int fs_path_add(struct fs_path p, const char name, int name_len)
				453	{
				454	int ret;
				455	char *prepared;
				456
				457	ret = fs_path_prepare_for_add(p, name_len, &prepared);
				458	if (ret < 0)
				459	goto out;
				460	memcpy(prepared, name, name_len);
				461
				462	out:
				463	return ret;
				464	}
				465
				466	static int fs_path_add_path(struct fs_path p, struct fs_path p2)
				467	{
				468	int ret;
				469	char *prepared;
				470
				471	ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
				472	if (ret < 0)
				473	goto out;
				474	memcpy(prepared, p2->start, p2->end - p2->start);
				475
				476	out:
				477	return ret;
				478	}
				479
				480	static int fs_path_add_from_extent_buffer(struct fs_path *p,
				481	struct extent_buffer *eb,
				482	unsigned long off, int len)
				483	{
				484	int ret;
				485	char *prepared;
				486
				487	ret = fs_path_prepare_for_add(p, len, &prepared);
				488	if (ret < 0)
				489	goto out;
				490
				491	read_extent_buffer(eb, prepared, off, len);
				492
				493	out:
				494	return ret;
				495	}
				496
				497	static int fs_path_copy(struct fs_path p, struct fs_path from)
				498	{
				499	int ret;
				500
				501	p->reversed = from->reversed;
				502	fs_path_reset(p);
				503
				504	ret = fs_path_add_path(p, from);
				505
				506	return ret;
				507	}
				508
				509
				510	static void fs_path_unreverse(struct fs_path *p)
				511	{
				512	char *tmp;
				513	int len;
				514
				515	if (!p->reversed)
				516	return;
				517
				518	tmp = p->start;
				519	len = p->end - p->start;
				520	p->start = p->buf;
				521	p->end = p->start + len;
				522	memmove(p->start, tmp, len + 1);
				523	p->reversed = 0;
				524	}
				525
				526	static struct btrfs_path *alloc_path_for_send(void)
				527	{
				528	struct btrfs_path *path;
				529
				530	path = btrfs_alloc_path();
				531	if (!path)
				532	return NULL;
				533	path->search_commit_root = 1;
				534	path->skip_locking = 1;
				535	path->need_commit_sem = 1;
				536	return path;
				537	}
				538
				539	static int write_buf(struct file filp, const void buf, u32 len, loff_t *off)
				540	{
				541	int ret;
				542	u32 pos = 0;
				543
				544	while (pos < len) {
				545	ret = kernel_write(filp, buf + pos, len - pos, off);
				546	/* TODO handle that correctly */
				547	/*if (ret == -ERESTARTSYS) {
				548	continue;
				549	}*/
				550	if (ret < 0)
				551	return ret;
				552	if (ret == 0) {
				553	return -EIO;
				554	}
				555	pos += ret;
				556	}
				557
				558	return 0;
				559	}
				560
				561	static int tlv_put(struct send_ctx sctx, u16 attr, const void data, int len)
				562	{
				563	struct btrfs_tlv_header *hdr;
				564	int total_len = sizeof(*hdr) + len;
				565	int left = sctx->send_max_size - sctx->send_size;
				566
				567	if (unlikely(left < total_len))
				568	return -EOVERFLOW;
				569
				570	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
				571	hdr->tlv_type = cpu_to_le16(attr);
				572	hdr->tlv_len = cpu_to_le16(len);
				573	memcpy(hdr + 1, data, len);
				574	sctx->send_size += total_len;
				575
				576	return 0;
				577	}
				578
				579	#define TLV_PUT_DEFINE_INT(bits) \
				580	static int tlv_put_u##bits(struct send_ctx *sctx, \
				581	u##bits attr, u##bits value) \
				582	{ \
				583	__le##bits __tmp = cpu_to_le##bits(value); \
				584	return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
				585	}
				586
				587	TLV_PUT_DEFINE_INT(64)
				588
				589	static int tlv_put_string(struct send_ctx *sctx, u16 attr,
				590	const char *str, int len)
				591	{
				592	if (len == -1)
				593	len = strlen(str);
				594	return tlv_put(sctx, attr, str, len);
				595	}
				596
				597	static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
				598	const u8 *uuid)
				599	{
				600	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
				601	}
				602
				603	static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
				604	struct extent_buffer *eb,
				605	struct btrfs_timespec *ts)
				606	{
				607	struct btrfs_timespec bts;
				608	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
				609	return tlv_put(sctx, attr, &bts, sizeof(bts));
				610	}
				611
				612
				613	#define TLV_PUT(sctx, attrtype, data, attrlen) \
				614	do { \
				615	ret = tlv_put(sctx, attrtype, data, attrlen); \
				616	if (ret < 0) \
				617	goto tlv_put_failure; \
				618	} while (0)
				619
				620	#define TLV_PUT_INT(sctx, attrtype, bits, value) \
				621	do { \
				622	ret = tlv_put_u##bits(sctx, attrtype, value); \
				623	if (ret < 0) \
				624	goto tlv_put_failure; \
				625	} while (0)
				626
				627	#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
				628	#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
				629	#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
				630	#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
				631	#define TLV_PUT_STRING(sctx, attrtype, str, len) \
				632	do { \
				633	ret = tlv_put_string(sctx, attrtype, str, len); \
				634	if (ret < 0) \
				635	goto tlv_put_failure; \
				636	} while (0)
				637	#define TLV_PUT_PATH(sctx, attrtype, p) \
				638	do { \
				639	ret = tlv_put_string(sctx, attrtype, p->start, \
				640	p->end - p->start); \
				641	if (ret < 0) \
				642	goto tlv_put_failure; \
				643	} while(0)
				644	#define TLV_PUT_UUID(sctx, attrtype, uuid) \
				645	do { \
				646	ret = tlv_put_uuid(sctx, attrtype, uuid); \
				647	if (ret < 0) \
				648	goto tlv_put_failure; \
				649	} while (0)
				650	#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
				651	do { \
				652	ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
				653	if (ret < 0) \
				654	goto tlv_put_failure; \
				655	} while (0)
				656
				657	static int send_header(struct send_ctx *sctx)
				658	{
				659	struct btrfs_stream_header hdr;
				660
				661	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
				662	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
				663
				664	return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
				665	&sctx->send_off);
				666	}
				667
				668	/*
				669	* For each command/item we want to send to userspace, we call this function.
				670	*/
				671	static int begin_cmd(struct send_ctx *sctx, int cmd)
				672	{
				673	struct btrfs_cmd_header *hdr;
				674
				675	if (WARN_ON(!sctx->send_buf))
				676	return -EINVAL;
				677
				678	BUG_ON(sctx->send_size);
				679
				680	sctx->send_size += sizeof(*hdr);
				681	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
				682	hdr->cmd = cpu_to_le16(cmd);
				683
				684	return 0;
				685	}
				686
				687	static int send_cmd(struct send_ctx *sctx)
				688	{
				689	int ret;
				690	struct btrfs_cmd_header *hdr;
				691	u32 crc;
				692
				693	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
				694	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
				695	hdr->crc = 0;
				696
				697	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
				698	hdr->crc = cpu_to_le32(crc);
				699
				700	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
				701	&sctx->send_off);
				702
				703	sctx->total_send_size += sctx->send_size;
				704	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
				705	sctx->send_size = 0;
				706
				707	return ret;
				708	}
				709
				710	/*
				711	* Sends a move instruction to user space
				712	*/
				713	static int send_rename(struct send_ctx *sctx,
				714	struct fs_path from, struct fs_path to)
				715	{
				716	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				717	int ret;
				718
				719	btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
				720
				721	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
				722	if (ret < 0)
				723	goto out;
				724
				725	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
				726	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
				727
				728	ret = send_cmd(sctx);
				729
				730	tlv_put_failure:
				731	out:
				732	return ret;
				733	}
				734
				735	/*
				736	* Sends a link instruction to user space
				737	*/
				738	static int send_link(struct send_ctx *sctx,
				739	struct fs_path path, struct fs_path lnk)
				740	{
				741	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				742	int ret;
				743
				744	btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
				745
				746	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
				747	if (ret < 0)
				748	goto out;
				749
				750	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
				751	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
				752
				753	ret = send_cmd(sctx);
				754
				755	tlv_put_failure:
				756	out:
				757	return ret;
				758	}
				759
				760	/*
				761	* Sends an unlink instruction to user space
				762	*/
				763	static int send_unlink(struct send_ctx sctx, struct fs_path path)
				764	{
				765	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				766	int ret;
				767
				768	btrfs_debug(fs_info, "send_unlink %s", path->start);
				769
				770	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
				771	if (ret < 0)
				772	goto out;
				773
				774	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
				775
				776	ret = send_cmd(sctx);
				777
				778	tlv_put_failure:
				779	out:
				780	return ret;
				781	}
				782
				783	/*
				784	* Sends a rmdir instruction to user space
				785	*/
				786	static int send_rmdir(struct send_ctx sctx, struct fs_path path)
				787	{
				788	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				789	int ret;
				790
				791	btrfs_debug(fs_info, "send_rmdir %s", path->start);
				792
				793	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
				794	if (ret < 0)
				795	goto out;
				796
				797	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
				798
				799	ret = send_cmd(sctx);
				800
				801	tlv_put_failure:
				802	out:
				803	return ret;
				804	}
				805
				806	/*
				807	* Helper function to retrieve some fields from an inode item.
				808	*/
				809	static int __get_inode_info(struct btrfs_root root, struct btrfs_path path,
				810	u64 ino, u64 size, u64 gen, u64 mode, u64 uid,
				811	u64 gid, u64 rdev)
				812	{
				813	int ret;
				814	struct btrfs_inode_item *ii;
				815	struct btrfs_key key;
				816
				817	key.objectid = ino;
				818	key.type = BTRFS_INODE_ITEM_KEY;
				819	key.offset = 0;
				820	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				821	if (ret) {
				822	if (ret > 0)
				823	ret = -ENOENT;
				824	return ret;
				825	}
				826
				827	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
				828	struct btrfs_inode_item);
				829	if (size)
				830	*size = btrfs_inode_size(path->nodes[0], ii);
				831	if (gen)
				832	*gen = btrfs_inode_generation(path->nodes[0], ii);
				833	if (mode)
				834	*mode = btrfs_inode_mode(path->nodes[0], ii);
				835	if (uid)
				836	*uid = btrfs_inode_uid(path->nodes[0], ii);
				837	if (gid)
				838	*gid = btrfs_inode_gid(path->nodes[0], ii);
				839	if (rdev)
				840	*rdev = btrfs_inode_rdev(path->nodes[0], ii);
				841
				842	return ret;
				843	}
				844
				845	static int get_inode_info(struct btrfs_root *root,
				846	u64 ino, u64 size, u64 gen,
				847	u64 mode, u64 uid, u64 *gid,
				848	u64 *rdev)
				849	{
				850	struct btrfs_path *path;
				851	int ret;
				852
				853	path = alloc_path_for_send();
				854	if (!path)
				855	return -ENOMEM;
				856	ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
				857	rdev);
				858	btrfs_free_path(path);
				859	return ret;
				860	}
				861
				862	typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
				863	struct fs_path *p,
				864	void *ctx);
				865
				866	/*
				867	* Helper function to iterate the entries in ONE btrfs_inode_ref or
				868	* btrfs_inode_extref.
				869	* The iterate callback may return a non zero value to stop iteration. This can
				870	* be a negative value for error codes or 1 to simply stop it.
				871	*
				872	* path must point to the INODE_REF or INODE_EXTREF when called.
				873	*/
				874	static int iterate_inode_ref(struct btrfs_root root, struct btrfs_path path,
				875	struct btrfs_key *found_key, int resolve,
				876	iterate_inode_ref_t iterate, void *ctx)
				877	{
				878	struct extent_buffer *eb = path->nodes[0];
				879	struct btrfs_item *item;
				880	struct btrfs_inode_ref *iref;
				881	struct btrfs_inode_extref *extref;
				882	struct btrfs_path *tmp_path;
				883	struct fs_path *p;
				884	u32 cur = 0;
				885	u32 total;
				886	int slot = path->slots[0];
				887	u32 name_len;
				888	char *start;
				889	int ret = 0;
				890	int num = 0;
				891	int index;
				892	u64 dir;
				893	unsigned long name_off;
				894	unsigned long elem_size;
				895	unsigned long ptr;
				896
				897	p = fs_path_alloc_reversed();
				898	if (!p)
				899	return -ENOMEM;
				900
				901	tmp_path = alloc_path_for_send();
				902	if (!tmp_path) {
				903	fs_path_free(p);
				904	return -ENOMEM;
				905	}
				906
				907
				908	if (found_key->type == BTRFS_INODE_REF_KEY) {
				909	ptr = (unsigned long)btrfs_item_ptr(eb, slot,
				910	struct btrfs_inode_ref);
				911	item = btrfs_item_nr(slot);
				912	total = btrfs_item_size(eb, item);
				913	elem_size = sizeof(*iref);
				914	} else {
				915	ptr = btrfs_item_ptr_offset(eb, slot);
				916	total = btrfs_item_size_nr(eb, slot);
				917	elem_size = sizeof(*extref);
				918	}
				919
				920	while (cur < total) {
				921	fs_path_reset(p);
				922
				923	if (found_key->type == BTRFS_INODE_REF_KEY) {
				924	iref = (struct btrfs_inode_ref *)(ptr + cur);
				925	name_len = btrfs_inode_ref_name_len(eb, iref);
				926	name_off = (unsigned long)(iref + 1);
				927	index = btrfs_inode_ref_index(eb, iref);
				928	dir = found_key->offset;
				929	} else {
				930	extref = (struct btrfs_inode_extref *)(ptr + cur);
				931	name_len = btrfs_inode_extref_name_len(eb, extref);
				932	name_off = (unsigned long)&extref->name;
				933	index = btrfs_inode_extref_index(eb, extref);
				934	dir = btrfs_inode_extref_parent(eb, extref);
				935	}
				936
				937	if (resolve) {
				938	start = btrfs_ref_to_path(root, tmp_path, name_len,
				939	name_off, eb, dir,
				940	p->buf, p->buf_len);
				941	if (IS_ERR(start)) {
				942	ret = PTR_ERR(start);
				943	goto out;
				944	}
				945	if (start < p->buf) {
				946	/* overflow , try again with larger buffer */
				947	ret = fs_path_ensure_buf(p,
				948	p->buf_len + p->buf - start);
				949	if (ret < 0)
				950	goto out;
				951	start = btrfs_ref_to_path(root, tmp_path,
				952	name_len, name_off,
				953	eb, dir,
				954	p->buf, p->buf_len);
				955	if (IS_ERR(start)) {
				956	ret = PTR_ERR(start);
				957	goto out;
				958	}
				959	BUG_ON(start < p->buf);
				960	}
				961	p->start = start;
				962	} else {
				963	ret = fs_path_add_from_extent_buffer(p, eb, name_off,
				964	name_len);
				965	if (ret < 0)
				966	goto out;
				967	}
				968
				969	cur += elem_size + name_len;
				970	ret = iterate(num, dir, index, p, ctx);
				971	if (ret)
				972	goto out;
				973	num++;
				974	}
				975
				976	out:
				977	btrfs_free_path(tmp_path);
				978	fs_path_free(p);
				979	return ret;
				980	}
				981
				982	typedef int (iterate_dir_item_t)(int num, struct btrfs_key di_key,
				983	const char *name, int name_len,
				984	const char *data, int data_len,
				985	u8 type, void *ctx);
				986
				987	/*
				988	* Helper function to iterate the entries in ONE btrfs_dir_item.
				989	* The iterate callback may return a non zero value to stop iteration. This can
				990	* be a negative value for error codes or 1 to simply stop it.
				991	*
				992	* path must point to the dir item when called.
				993	*/
				994	static int iterate_dir_item(struct btrfs_root root, struct btrfs_path path,
				995	iterate_dir_item_t iterate, void *ctx)
				996	{
				997	int ret = 0;
				998	struct extent_buffer *eb;
				999	struct btrfs_item *item;
				1000	struct btrfs_dir_item *di;
				1001	struct btrfs_key di_key;
				1002	char *buf = NULL;
				1003	int buf_len;
				1004	u32 name_len;
				1005	u32 data_len;
				1006	u32 cur;
				1007	u32 len;
				1008	u32 total;
				1009	int slot;
				1010	int num;
				1011	u8 type;
				1012
				1013	/*
				1014	* Start with a small buffer (1 page). If later we end up needing more
				1015	* space, which can happen for xattrs on a fs with a leaf size greater
				1016	* then the page size, attempt to increase the buffer. Typically xattr
				1017	* values are small.
				1018	*/
				1019	buf_len = PATH_MAX;
				1020	buf = kmalloc(buf_len, GFP_KERNEL);
				1021	if (!buf) {
				1022	ret = -ENOMEM;
				1023	goto out;
				1024	}
				1025
				1026	eb = path->nodes[0];
				1027	slot = path->slots[0];
				1028	item = btrfs_item_nr(slot);
				1029	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
				1030	cur = 0;
				1031	len = 0;
				1032	total = btrfs_item_size(eb, item);
				1033
				1034	num = 0;
				1035	while (cur < total) {
				1036	name_len = btrfs_dir_name_len(eb, di);
				1037	data_len = btrfs_dir_data_len(eb, di);
				1038	type = btrfs_dir_type(eb, di);
				1039	btrfs_dir_item_key_to_cpu(eb, di, &di_key);
				1040
				1041	if (type == BTRFS_FT_XATTR) {
				1042	if (name_len > XATTR_NAME_MAX) {
				1043	ret = -ENAMETOOLONG;
				1044	goto out;
				1045	}
				1046	if (name_len + data_len >
				1047	BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
				1048	ret = -E2BIG;
				1049	goto out;
				1050	}
				1051	} else {
				1052	/*
				1053	* Path too long
				1054	*/
				1055	if (name_len + data_len > PATH_MAX) {
				1056	ret = -ENAMETOOLONG;
				1057	goto out;
				1058	}
				1059	}
				1060
				1061	if (name_len + data_len > buf_len) {
				1062	buf_len = name_len + data_len;
				1063	if (is_vmalloc_addr(buf)) {
				1064	vfree(buf);
				1065	buf = NULL;
				1066	} else {
				1067	char *tmp = krealloc(buf, buf_len,
				1068	GFP_KERNEL \| __GFP_NOWARN);
				1069
				1070	if (!tmp)
				1071	kfree(buf);
				1072	buf = tmp;
				1073	}
				1074	if (!buf) {
				1075	buf = kvmalloc(buf_len, GFP_KERNEL);
				1076	if (!buf) {
				1077	ret = -ENOMEM;
				1078	goto out;
				1079	}
				1080	}
				1081	}
				1082
				1083	read_extent_buffer(eb, buf, (unsigned long)(di + 1),
				1084	name_len + data_len);
				1085
				1086	len = sizeof(*di) + name_len + data_len;
				1087	di = (struct btrfs_dir_item )((char )di + len);
				1088	cur += len;
				1089
				1090	ret = iterate(num, &di_key, buf, name_len, buf + name_len,
				1091	data_len, type, ctx);
				1092	if (ret < 0)
				1093	goto out;
				1094	if (ret) {
				1095	ret = 0;
				1096	goto out;
				1097	}
				1098
				1099	num++;
				1100	}
				1101
				1102	out:
				1103	kvfree(buf);
				1104	return ret;
				1105	}
				1106
				1107	static int __copy_first_ref(int num, u64 dir, int index,
				1108	struct fs_path p, void ctx)
				1109	{
				1110	int ret;
				1111	struct fs_path *pt = ctx;
				1112
				1113	ret = fs_path_copy(pt, p);
				1114	if (ret < 0)
				1115	return ret;
				1116
				1117	/* we want the first only */
				1118	return 1;
				1119	}
				1120
				1121	/*
				1122	* Retrieve the first path of an inode. If an inode has more then one
				1123	* ref/hardlink, this is ignored.
				1124	*/
				1125	static int get_inode_path(struct btrfs_root *root,
				1126	u64 ino, struct fs_path *path)
				1127	{
				1128	int ret;
				1129	struct btrfs_key key, found_key;
				1130	struct btrfs_path *p;
				1131
				1132	p = alloc_path_for_send();
				1133	if (!p)
				1134	return -ENOMEM;
				1135
				1136	fs_path_reset(path);
				1137
				1138	key.objectid = ino;
				1139	key.type = BTRFS_INODE_REF_KEY;
				1140	key.offset = 0;
				1141
				1142	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
				1143	if (ret < 0)
				1144	goto out;
				1145	if (ret) {
				1146	ret = 1;
				1147	goto out;
				1148	}
				1149	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
				1150	if (found_key.objectid != ino \|\|
				1151	(found_key.type != BTRFS_INODE_REF_KEY &&
				1152	found_key.type != BTRFS_INODE_EXTREF_KEY)) {
				1153	ret = -ENOENT;
				1154	goto out;
				1155	}
				1156
				1157	ret = iterate_inode_ref(root, p, &found_key, 1,
				1158	__copy_first_ref, path);
				1159	if (ret < 0)
				1160	goto out;
				1161	ret = 0;
				1162
				1163	out:
				1164	btrfs_free_path(p);
				1165	return ret;
				1166	}
				1167
				1168	struct backref_ctx {
				1169	struct send_ctx *sctx;
				1170
				1171	struct btrfs_path *path;
				1172	/* number of total found references */
				1173	u64 found;
				1174
				1175	/*
				1176	* used for clones found in send_root. clones found behind cur_objectid
				1177	* and cur_offset are not considered as allowed clones.
				1178	*/
				1179	u64 cur_objectid;
				1180	u64 cur_offset;
				1181
				1182	/* may be truncated in case it's the last extent in a file */
				1183	u64 extent_len;
				1184
				1185	/* data offset in the file extent item */
				1186	u64 data_offset;
				1187
				1188	/* Just to check for bugs in backref resolving */
				1189	int found_itself;
				1190	};
				1191
				1192	static int __clone_root_cmp_bsearch(const void key, const void elt)
				1193	{
				1194	u64 root = (u64)(uintptr_t)key;
				1195	struct clone_root cr = (struct clone_root )elt;
				1196
				1197	if (root < cr->root->objectid)
				1198	return -1;
				1199	if (root > cr->root->objectid)
				1200	return 1;
				1201	return 0;
				1202	}
				1203
				1204	static int __clone_root_cmp_sort(const void e1, const void e2)
				1205	{
				1206	struct clone_root cr1 = (struct clone_root )e1;
				1207	struct clone_root cr2 = (struct clone_root )e2;
				1208
				1209	if (cr1->root->objectid < cr2->root->objectid)
				1210	return -1;
				1211	if (cr1->root->objectid > cr2->root->objectid)
				1212	return 1;
				1213	return 0;
				1214	}
				1215
				1216	/*
				1217	* Called for every backref that is found for the current extent.
				1218	* Results are collected in sctx->clone_roots->ino/offset/found_refs
				1219	*/
				1220	static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
				1221	{
				1222	struct backref_ctx *bctx = ctx_;
				1223	struct clone_root *found;
				1224	int ret;
				1225	u64 i_size;
				1226
				1227	/* First check if the root is in the list of accepted clone sources */
				1228	found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
				1229	bctx->sctx->clone_roots_cnt,
				1230	sizeof(struct clone_root),
				1231	__clone_root_cmp_bsearch);
				1232	if (!found)
				1233	return 0;
				1234
				1235	if (found->root == bctx->sctx->send_root &&
				1236	ino == bctx->cur_objectid &&
				1237	offset == bctx->cur_offset) {
				1238	bctx->found_itself = 1;
				1239	}
				1240
				1241	/*
				1242	* There are inodes that have extents that lie behind its i_size. Don't
				1243	* accept clones from these extents.
				1244	*/
				1245	ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
				1246	NULL, NULL, NULL);
				1247	btrfs_release_path(bctx->path);
				1248	if (ret < 0)
				1249	return ret;
				1250
				1251	if (offset + bctx->data_offset + bctx->extent_len > i_size)
				1252	return 0;
				1253
				1254	/*
				1255	* Make sure we don't consider clones from send_root that are
				1256	* behind the current inode/offset.
				1257	*/
				1258	if (found->root == bctx->sctx->send_root) {
				1259	/*
				1260	* TODO for the moment we don't accept clones from the inode
				1261	* that is currently send. We may change this when
				1262	* BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
				1263	* file.
				1264	*/
				1265	if (ino >= bctx->cur_objectid)
				1266	return 0;
				1267	}
				1268
				1269	bctx->found++;
				1270	found->found_refs++;
				1271	if (ino < found->ino) {
				1272	found->ino = ino;
				1273	found->offset = offset;
				1274	} else if (found->ino == ino) {
				1275	/*
				1276	* same extent found more then once in the same file.
				1277	*/
				1278	if (found->offset > offset + bctx->extent_len)
				1279	found->offset = offset;
				1280	}
				1281
				1282	return 0;
				1283	}
				1284
				1285	/*
				1286	* Given an inode, offset and extent item, it finds a good clone for a clone
				1287	* instruction. Returns -ENOENT when none could be found. The function makes
				1288	* sure that the returned clone is usable at the point where sending is at the
				1289	* moment. This means, that no clones are accepted which lie behind the current
				1290	* inode+offset.
				1291	*
				1292	* path must point to the extent item when called.
				1293	*/
				1294	static int find_extent_clone(struct send_ctx *sctx,
				1295	struct btrfs_path *path,
				1296	u64 ino, u64 data_offset,
				1297	u64 ino_size,
				1298	struct clone_root **found)
				1299	{
				1300	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				1301	int ret;
				1302	int extent_type;
				1303	u64 logical;
				1304	u64 disk_byte;
				1305	u64 num_bytes;
				1306	u64 extent_item_pos;
				1307	u64 flags = 0;
				1308	struct btrfs_file_extent_item *fi;
				1309	struct extent_buffer *eb = path->nodes[0];
				1310	struct backref_ctx *backref_ctx = NULL;
				1311	struct clone_root *cur_clone_root;
				1312	struct btrfs_key found_key;
				1313	struct btrfs_path *tmp_path;
				1314	struct btrfs_extent_item *ei;
				1315	int compressed;
				1316	u32 i;
				1317
				1318	tmp_path = alloc_path_for_send();
				1319	if (!tmp_path)
				1320	return -ENOMEM;
				1321
				1322	/* We only use this path under the commit sem */
				1323	tmp_path->need_commit_sem = 0;
				1324
				1325	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
				1326	if (!backref_ctx) {
				1327	ret = -ENOMEM;
				1328	goto out;
				1329	}
				1330
				1331	backref_ctx->path = tmp_path;
				1332
				1333	if (data_offset >= ino_size) {
				1334	/*
				1335	* There may be extents that lie behind the file's size.
				1336	* I at least had this in combination with snapshotting while
				1337	* writing large files.
				1338	*/
				1339	ret = 0;
				1340	goto out;
				1341	}
				1342
				1343	fi = btrfs_item_ptr(eb, path->slots[0],
				1344	struct btrfs_file_extent_item);
				1345	extent_type = btrfs_file_extent_type(eb, fi);
				1346	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
				1347	ret = -ENOENT;
				1348	goto out;
				1349	}
				1350	compressed = btrfs_file_extent_compression(eb, fi);
				1351
				1352	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
				1353	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
				1354	if (disk_byte == 0) {
				1355	ret = -ENOENT;
				1356	goto out;
				1357	}
				1358	logical = disk_byte + btrfs_file_extent_offset(eb, fi);
				1359
				1360	down_read(&fs_info->commit_root_sem);
				1361	ret = extent_from_logical(fs_info, disk_byte, tmp_path,
				1362	&found_key, &flags);
				1363	up_read(&fs_info->commit_root_sem);
				1364
				1365	if (ret < 0)
				1366	goto out;
				1367	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
				1368	ret = -EIO;
				1369	goto out;
				1370	}
				1371
				1372	ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
				1373	struct btrfs_extent_item);
				1374	/*
				1375	* Backreference walking (iterate_extent_inodes() below) is currently
				1376	* too expensive when an extent has a large number of references, both
				1377	* in time spent and used memory. So for now just fallback to write
				1378	* operations instead of clone operations when an extent has more than
				1379	* a certain amount of references.
				1380	*/
				1381	if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
				1382	ret = -ENOENT;
				1383	goto out;
				1384	}
				1385	btrfs_release_path(tmp_path);
				1386
				1387	/*
				1388	* Setup the clone roots.
				1389	*/
				1390	for (i = 0; i < sctx->clone_roots_cnt; i++) {
				1391	cur_clone_root = sctx->clone_roots + i;
				1392	cur_clone_root->ino = (u64)-1;
				1393	cur_clone_root->offset = 0;
				1394	cur_clone_root->found_refs = 0;
				1395	}
				1396
				1397	backref_ctx->sctx = sctx;
				1398	backref_ctx->found = 0;
				1399	backref_ctx->cur_objectid = ino;
				1400	backref_ctx->cur_offset = data_offset;
				1401	backref_ctx->found_itself = 0;
				1402	backref_ctx->extent_len = num_bytes;
				1403	/*
				1404	* For non-compressed extents iterate_extent_inodes() gives us extent
				1405	* offsets that already take into account the data offset, but not for
				1406	* compressed extents, since the offset is logical and not relative to
				1407	* the physical extent locations. We must take this into account to
				1408	* avoid sending clone offsets that go beyond the source file's size,
				1409	* which would result in the clone ioctl failing with -EINVAL on the
				1410	* receiving end.
				1411	*/
				1412	if (compressed == BTRFS_COMPRESS_NONE)
				1413	backref_ctx->data_offset = 0;
				1414	else
				1415	backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
				1416
				1417	/*
				1418	* The last extent of a file may be too large due to page alignment.
				1419	* We need to adjust extent_len in this case so that the checks in
				1420	* __iterate_backrefs work.
				1421	*/
				1422	if (data_offset + num_bytes >= ino_size)
				1423	backref_ctx->extent_len = ino_size - data_offset;
				1424
				1425	/*
				1426	* Now collect all backrefs.
				1427	*/
				1428	if (compressed == BTRFS_COMPRESS_NONE)
				1429	extent_item_pos = logical - found_key.objectid;
				1430	else
				1431	extent_item_pos = 0;
				1432	ret = iterate_extent_inodes(fs_info, found_key.objectid,
				1433	extent_item_pos, 1, __iterate_backrefs,
				1434	backref_ctx, false);
				1435
				1436	if (ret < 0)
				1437	goto out;
				1438
				1439	if (!backref_ctx->found_itself) {
				1440	/* found a bug in backref code? */
				1441	ret = -EIO;
				1442	btrfs_err(fs_info,
				1443	"did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu",
				1444	ino, data_offset, disk_byte, found_key.objectid);
				1445	goto out;
				1446	}
				1447
				1448	btrfs_debug(fs_info,
				1449	"find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
				1450	data_offset, ino, num_bytes, logical);
				1451
				1452	if (!backref_ctx->found)
				1453	btrfs_debug(fs_info, "no clones found");
				1454
				1455	cur_clone_root = NULL;
				1456	for (i = 0; i < sctx->clone_roots_cnt; i++) {
				1457	if (sctx->clone_roots[i].found_refs) {
				1458	if (!cur_clone_root)
				1459	cur_clone_root = sctx->clone_roots + i;
				1460	else if (sctx->clone_roots[i].root == sctx->send_root)
				1461	/* prefer clones from send_root over others */
				1462	cur_clone_root = sctx->clone_roots + i;
				1463	}
				1464
				1465	}
				1466
				1467	if (cur_clone_root) {
				1468	*found = cur_clone_root;
				1469	ret = 0;
				1470	} else {
				1471	ret = -ENOENT;
				1472	}
				1473
				1474	out:
				1475	btrfs_free_path(tmp_path);
				1476	kfree(backref_ctx);
				1477	return ret;
				1478	}
				1479
				1480	static int read_symlink(struct btrfs_root *root,
				1481	u64 ino,
				1482	struct fs_path *dest)
				1483	{
				1484	int ret;
				1485	struct btrfs_path *path;
				1486	struct btrfs_key key;
				1487	struct btrfs_file_extent_item *ei;
				1488	u8 type;
				1489	u8 compression;
				1490	unsigned long off;
				1491	int len;
				1492
				1493	path = alloc_path_for_send();
				1494	if (!path)
				1495	return -ENOMEM;
				1496
				1497	key.objectid = ino;
				1498	key.type = BTRFS_EXTENT_DATA_KEY;
				1499	key.offset = 0;
				1500	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				1501	if (ret < 0)
				1502	goto out;
				1503	if (ret) {
				1504	/*
				1505	* An empty symlink inode. Can happen in rare error paths when
				1506	* creating a symlink (transaction committed before the inode
				1507	* eviction handler removed the symlink inode items and a crash
				1508	* happened in between or the subvol was snapshoted in between).
				1509	* Print an informative message to dmesg/syslog so that the user
				1510	* can delete the symlink.
				1511	*/
				1512	btrfs_err(root->fs_info,
				1513	"Found empty symlink inode %llu at root %llu",
				1514	ino, root->root_key.objectid);
				1515	ret = -EIO;
				1516	goto out;
				1517	}
				1518
				1519	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
				1520	struct btrfs_file_extent_item);
				1521	type = btrfs_file_extent_type(path->nodes[0], ei);
				1522	compression = btrfs_file_extent_compression(path->nodes[0], ei);
				1523	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
				1524	BUG_ON(compression);
				1525
				1526	off = btrfs_file_extent_inline_start(ei);
				1527	len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
				1528
				1529	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
				1530
				1531	out:
				1532	btrfs_free_path(path);
				1533	return ret;
				1534	}
				1535
				1536	/*
				1537	* Helper function to generate a file name that is unique in the root of
				1538	* send_root and parent_root. This is used to generate names for orphan inodes.
				1539	*/
				1540	static int gen_unique_name(struct send_ctx *sctx,
				1541	u64 ino, u64 gen,
				1542	struct fs_path *dest)
				1543	{
				1544	int ret = 0;
				1545	struct btrfs_path *path;
				1546	struct btrfs_dir_item *di;
				1547	char tmp[64];
				1548	int len;
				1549	u64 idx = 0;
				1550
				1551	path = alloc_path_for_send();
				1552	if (!path)
				1553	return -ENOMEM;
				1554
				1555	while (1) {
				1556	len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
				1557	ino, gen, idx);
				1558	ASSERT(len < sizeof(tmp));
				1559
				1560	di = btrfs_lookup_dir_item(NULL, sctx->send_root,
				1561	path, BTRFS_FIRST_FREE_OBJECTID,
				1562	tmp, strlen(tmp), 0);
				1563	btrfs_release_path(path);
				1564	if (IS_ERR(di)) {
				1565	ret = PTR_ERR(di);
				1566	goto out;
				1567	}
				1568	if (di) {
				1569	/* not unique, try again */
				1570	idx++;
				1571	continue;
				1572	}
				1573
				1574	if (!sctx->parent_root) {
				1575	/* unique */
				1576	ret = 0;
				1577	break;
				1578	}
				1579
				1580	di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
				1581	path, BTRFS_FIRST_FREE_OBJECTID,
				1582	tmp, strlen(tmp), 0);
				1583	btrfs_release_path(path);
				1584	if (IS_ERR(di)) {
				1585	ret = PTR_ERR(di);
				1586	goto out;
				1587	}
				1588	if (di) {
				1589	/* not unique, try again */
				1590	idx++;
				1591	continue;
				1592	}
				1593	/* unique */
				1594	break;
				1595	}
				1596
				1597	ret = fs_path_add(dest, tmp, strlen(tmp));
				1598
				1599	out:
				1600	btrfs_free_path(path);
				1601	return ret;
				1602	}
				1603
				1604	enum inode_state {
				1605	inode_state_no_change,
				1606	inode_state_will_create,
				1607	inode_state_did_create,
				1608	inode_state_will_delete,
				1609	inode_state_did_delete,
				1610	};
				1611
				1612	static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
				1613	{
				1614	int ret;
				1615	int left_ret;
				1616	int right_ret;
				1617	u64 left_gen;
				1618	u64 right_gen;
				1619
				1620	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
				1621	NULL, NULL);
				1622	if (ret < 0 && ret != -ENOENT)
				1623	goto out;
				1624	left_ret = ret;
				1625
				1626	if (!sctx->parent_root) {
				1627	right_ret = -ENOENT;
				1628	} else {
				1629	ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
				1630	NULL, NULL, NULL, NULL);
				1631	if (ret < 0 && ret != -ENOENT)
				1632	goto out;
				1633	right_ret = ret;
				1634	}
				1635
				1636	if (!left_ret && !right_ret) {
				1637	if (left_gen == gen && right_gen == gen) {
				1638	ret = inode_state_no_change;
				1639	} else if (left_gen == gen) {
				1640	if (ino < sctx->send_progress)
				1641	ret = inode_state_did_create;
				1642	else
				1643	ret = inode_state_will_create;
				1644	} else if (right_gen == gen) {
				1645	if (ino < sctx->send_progress)
				1646	ret = inode_state_did_delete;
				1647	else
				1648	ret = inode_state_will_delete;
				1649	} else {
				1650	ret = -ENOENT;
				1651	}
				1652	} else if (!left_ret) {
				1653	if (left_gen == gen) {
				1654	if (ino < sctx->send_progress)
				1655	ret = inode_state_did_create;
				1656	else
				1657	ret = inode_state_will_create;
				1658	} else {
				1659	ret = -ENOENT;
				1660	}
				1661	} else if (!right_ret) {
				1662	if (right_gen == gen) {
				1663	if (ino < sctx->send_progress)
				1664	ret = inode_state_did_delete;
				1665	else
				1666	ret = inode_state_will_delete;
				1667	} else {
				1668	ret = -ENOENT;
				1669	}
				1670	} else {
				1671	ret = -ENOENT;
				1672	}
				1673
				1674	out:
				1675	return ret;
				1676	}
				1677
				1678	static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
				1679	{
				1680	int ret;
				1681
				1682	if (ino == BTRFS_FIRST_FREE_OBJECTID)
				1683	return 1;
				1684
				1685	ret = get_cur_inode_state(sctx, ino, gen);
				1686	if (ret < 0)
				1687	goto out;
				1688
				1689	if (ret == inode_state_no_change \|\|
				1690	ret == inode_state_did_create \|\|
				1691	ret == inode_state_will_delete)
				1692	ret = 1;
				1693	else
				1694	ret = 0;
				1695
				1696	out:
				1697	return ret;
				1698	}
				1699
				1700	/*
				1701	* Helper function to lookup a dir item in a dir.
				1702	*/
				1703	static int lookup_dir_item_inode(struct btrfs_root *root,
				1704	u64 dir, const char *name, int name_len,
				1705	u64 *found_inode,
				1706	u8 *found_type)
				1707	{
				1708	int ret = 0;
				1709	struct btrfs_dir_item *di;
				1710	struct btrfs_key key;
				1711	struct btrfs_path *path;
				1712
				1713	path = alloc_path_for_send();
				1714	if (!path)
				1715	return -ENOMEM;
				1716
				1717	di = btrfs_lookup_dir_item(NULL, root, path,
				1718	dir, name, name_len, 0);
				1719	if (!di) {
				1720	ret = -ENOENT;
				1721	goto out;
				1722	}
				1723	if (IS_ERR(di)) {
				1724	ret = PTR_ERR(di);
				1725	goto out;
				1726	}
				1727	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
				1728	if (key.type == BTRFS_ROOT_ITEM_KEY) {
				1729	ret = -ENOENT;
				1730	goto out;
				1731	}
				1732	*found_inode = key.objectid;
				1733	*found_type = btrfs_dir_type(path->nodes[0], di);
				1734
				1735	out:
				1736	btrfs_free_path(path);
				1737	return ret;
				1738	}
				1739
				1740	/*
				1741	* Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
				1742	* generation of the parent dir and the name of the dir entry.
				1743	*/
				1744	static int get_first_ref(struct btrfs_root *root, u64 ino,
				1745	u64 dir, u64 dir_gen, struct fs_path *name)
				1746	{
				1747	int ret;
				1748	struct btrfs_key key;
				1749	struct btrfs_key found_key;
				1750	struct btrfs_path *path;
				1751	int len;
				1752	u64 parent_dir;
				1753
				1754	path = alloc_path_for_send();
				1755	if (!path)
				1756	return -ENOMEM;
				1757
				1758	key.objectid = ino;
				1759	key.type = BTRFS_INODE_REF_KEY;
				1760	key.offset = 0;
				1761
				1762	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
				1763	if (ret < 0)
				1764	goto out;
				1765	if (!ret)
				1766	btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				1767	path->slots[0]);
				1768	if (ret \|\| found_key.objectid != ino \|\|
				1769	(found_key.type != BTRFS_INODE_REF_KEY &&
				1770	found_key.type != BTRFS_INODE_EXTREF_KEY)) {
				1771	ret = -ENOENT;
				1772	goto out;
				1773	}
				1774
				1775	if (found_key.type == BTRFS_INODE_REF_KEY) {
				1776	struct btrfs_inode_ref *iref;
				1777	iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				1778	struct btrfs_inode_ref);
				1779	len = btrfs_inode_ref_name_len(path->nodes[0], iref);
				1780	ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
				1781	(unsigned long)(iref + 1),
				1782	len);
				1783	parent_dir = found_key.offset;
				1784	} else {
				1785	struct btrfs_inode_extref *extref;
				1786	extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
				1787	struct btrfs_inode_extref);
				1788	len = btrfs_inode_extref_name_len(path->nodes[0], extref);
				1789	ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
				1790	(unsigned long)&extref->name, len);
				1791	parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
				1792	}
				1793	if (ret < 0)
				1794	goto out;
				1795	btrfs_release_path(path);
				1796
				1797	if (dir_gen) {
				1798	ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
				1799	NULL, NULL, NULL);
				1800	if (ret < 0)
				1801	goto out;
				1802	}
				1803
				1804	*dir = parent_dir;
				1805
				1806	out:
				1807	btrfs_free_path(path);
				1808	return ret;
				1809	}
				1810
				1811	static int is_first_ref(struct btrfs_root *root,
				1812	u64 ino, u64 dir,
				1813	const char *name, int name_len)
				1814	{
				1815	int ret;
				1816	struct fs_path *tmp_name;
				1817	u64 tmp_dir;
				1818
				1819	tmp_name = fs_path_alloc();
				1820	if (!tmp_name)
				1821	return -ENOMEM;
				1822
				1823	ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
				1824	if (ret < 0)
				1825	goto out;
				1826
				1827	if (dir != tmp_dir \|\| name_len != fs_path_len(tmp_name)) {
				1828	ret = 0;
				1829	goto out;
				1830	}
				1831
				1832	ret = !memcmp(tmp_name->start, name, name_len);
				1833
				1834	out:
				1835	fs_path_free(tmp_name);
				1836	return ret;
				1837	}
				1838
				1839	/*
				1840	* Used by process_recorded_refs to determine if a new ref would overwrite an
				1841	* already existing ref. In case it detects an overwrite, it returns the
				1842	* inode/gen in who_ino/who_gen.
				1843	* When an overwrite is detected, process_recorded_refs does proper orphanizing
				1844	* to make sure later references to the overwritten inode are possible.
				1845	* Orphanizing is however only required for the first ref of an inode.
				1846	* process_recorded_refs does an additional is_first_ref check to see if
				1847	* orphanizing is really required.
				1848	*/
				1849	static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
				1850	const char *name, int name_len,
				1851	u64 who_ino, u64 who_gen, u64 *who_mode)
				1852	{
				1853	int ret = 0;
				1854	u64 gen;
				1855	u64 other_inode = 0;
				1856	u8 other_type = 0;
				1857
				1858	if (!sctx->parent_root)
				1859	goto out;
				1860
				1861	ret = is_inode_existent(sctx, dir, dir_gen);
				1862	if (ret <= 0)
				1863	goto out;
				1864
				1865	/*
				1866	* If we have a parent root we need to verify that the parent dir was
				1867	* not deleted and then re-created, if it was then we have no overwrite
				1868	* and we can just unlink this entry.
				1869	*/
				1870	if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
				1871	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
				1872	NULL, NULL, NULL);
				1873	if (ret < 0 && ret != -ENOENT)
				1874	goto out;
				1875	if (ret) {
				1876	ret = 0;
				1877	goto out;
				1878	}
				1879	if (gen != dir_gen)
				1880	goto out;
				1881	}
				1882
				1883	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
				1884	&other_inode, &other_type);
				1885	if (ret < 0 && ret != -ENOENT)
				1886	goto out;
				1887	if (ret) {
				1888	ret = 0;
				1889	goto out;
				1890	}
				1891
				1892	/*
				1893	* Check if the overwritten ref was already processed. If yes, the ref
				1894	* was already unlinked/moved, so we can safely assume that we will not
				1895	* overwrite anything at this point in time.
				1896	*/
				1897	if (other_inode > sctx->send_progress \|\|
				1898	is_waiting_for_move(sctx, other_inode)) {
				1899	ret = get_inode_info(sctx->parent_root, other_inode, NULL,
				1900	who_gen, who_mode, NULL, NULL, NULL);
				1901	if (ret < 0)
				1902	goto out;
				1903
				1904	ret = 1;
				1905	*who_ino = other_inode;
				1906	} else {
				1907	ret = 0;
				1908	}
				1909
				1910	out:
				1911	return ret;
				1912	}
				1913
				1914	/*
				1915	* Checks if the ref was overwritten by an already processed inode. This is
				1916	* used by __get_cur_name_and_parent to find out if the ref was orphanized and
				1917	* thus the orphan name needs be used.
				1918	* process_recorded_refs also uses it to avoid unlinking of refs that were
				1919	* overwritten.
				1920	*/
				1921	static int did_overwrite_ref(struct send_ctx *sctx,
				1922	u64 dir, u64 dir_gen,
				1923	u64 ino, u64 ino_gen,
				1924	const char *name, int name_len)
				1925	{
				1926	int ret = 0;
				1927	u64 gen;
				1928	u64 ow_inode;
				1929	u8 other_type;
				1930
				1931	if (!sctx->parent_root)
				1932	goto out;
				1933
				1934	ret = is_inode_existent(sctx, dir, dir_gen);
				1935	if (ret <= 0)
				1936	goto out;
				1937
				1938	if (dir != BTRFS_FIRST_FREE_OBJECTID) {
				1939	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
				1940	NULL, NULL, NULL);
				1941	if (ret < 0 && ret != -ENOENT)
				1942	goto out;
				1943	if (ret) {
				1944	ret = 0;
				1945	goto out;
				1946	}
				1947	if (gen != dir_gen)
				1948	goto out;
				1949	}
				1950
				1951	/* check if the ref was overwritten by another ref */
				1952	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
				1953	&ow_inode, &other_type);
				1954	if (ret < 0 && ret != -ENOENT)
				1955	goto out;
				1956	if (ret) {
				1957	/* was never and will never be overwritten */
				1958	ret = 0;
				1959	goto out;
				1960	}
				1961
				1962	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
				1963	NULL, NULL);
				1964	if (ret < 0)
				1965	goto out;
				1966
				1967	if (ow_inode == ino && gen == ino_gen) {
				1968	ret = 0;
				1969	goto out;
				1970	}
				1971
				1972	/*
				1973	* We know that it is or will be overwritten. Check this now.
				1974	* The current inode being processed might have been the one that caused
				1975	* inode 'ino' to be orphanized, therefore check if ow_inode matches
				1976	* the current inode being processed.
				1977	*/
				1978	if ((ow_inode < sctx->send_progress) \|\|
				1979	(ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
				1980	gen == sctx->cur_inode_gen))
				1981	ret = 1;
				1982	else
				1983	ret = 0;
				1984
				1985	out:
				1986	return ret;
				1987	}
				1988
				1989	/*
				1990	* Same as did_overwrite_ref, but also checks if it is the first ref of an inode
				1991	* that got overwritten. This is used by process_recorded_refs to determine
				1992	* if it has to use the path as returned by get_cur_path or the orphan name.
				1993	*/
				1994	static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
				1995	{
				1996	int ret = 0;
				1997	struct fs_path *name = NULL;
				1998	u64 dir;
				1999	u64 dir_gen;
				2000
				2001	if (!sctx->parent_root)
				2002	goto out;
				2003
				2004	name = fs_path_alloc();
				2005	if (!name)
				2006	return -ENOMEM;
				2007
				2008	ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
				2009	if (ret < 0)
				2010	goto out;
				2011
				2012	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
				2013	name->start, fs_path_len(name));
				2014
				2015	out:
				2016	fs_path_free(name);
				2017	return ret;
				2018	}
				2019
				2020	/*
				2021	* Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
				2022	* so we need to do some special handling in case we have clashes. This function
				2023	* takes care of this with the help of name_cache_entry::radix_list.
				2024	* In case of error, nce is kfreed.
				2025	*/
				2026	static int name_cache_insert(struct send_ctx *sctx,
				2027	struct name_cache_entry *nce)
				2028	{
				2029	int ret = 0;
				2030	struct list_head *nce_head;
				2031
				2032	nce_head = radix_tree_lookup(&sctx->name_cache,
				2033	(unsigned long)nce->ino);
				2034	if (!nce_head) {
				2035	nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
				2036	if (!nce_head) {
				2037	kfree(nce);
				2038	return -ENOMEM;
				2039	}
				2040	INIT_LIST_HEAD(nce_head);
				2041
				2042	ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
				2043	if (ret < 0) {
				2044	kfree(nce_head);
				2045	kfree(nce);
				2046	return ret;
				2047	}
				2048	}
				2049	list_add_tail(&nce->radix_list, nce_head);
				2050	list_add_tail(&nce->list, &sctx->name_cache_list);
				2051	sctx->name_cache_size++;
				2052
				2053	return ret;
				2054	}
				2055
				2056	static void name_cache_delete(struct send_ctx *sctx,
				2057	struct name_cache_entry *nce)
				2058	{
				2059	struct list_head *nce_head;
				2060
				2061	nce_head = radix_tree_lookup(&sctx->name_cache,
				2062	(unsigned long)nce->ino);
				2063	if (!nce_head) {
				2064	btrfs_err(sctx->send_root->fs_info,
				2065	"name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
				2066	nce->ino, sctx->name_cache_size);
				2067	}
				2068
				2069	list_del(&nce->radix_list);
				2070	list_del(&nce->list);
				2071	sctx->name_cache_size--;
				2072
				2073	/*
				2074	* We may not get to the final release of nce_head if the lookup fails
				2075	*/
				2076	if (nce_head && list_empty(nce_head)) {
				2077	radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
				2078	kfree(nce_head);
				2079	}
				2080	}
				2081
				2082	static struct name_cache_entry name_cache_search(struct send_ctx sctx,
				2083	u64 ino, u64 gen)
				2084	{
				2085	struct list_head *nce_head;
				2086	struct name_cache_entry *cur;
				2087
				2088	nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
				2089	if (!nce_head)
				2090	return NULL;
				2091
				2092	list_for_each_entry(cur, nce_head, radix_list) {
				2093	if (cur->ino == ino && cur->gen == gen)
				2094	return cur;
				2095	}
				2096	return NULL;
				2097	}
				2098
				2099	/*
				2100	* Removes the entry from the list and adds it back to the end. This marks the
				2101	* entry as recently used so that name_cache_clean_unused does not remove it.
				2102	*/
				2103	static void name_cache_used(struct send_ctx sctx, struct name_cache_entry nce)
				2104	{
				2105	list_del(&nce->list);
				2106	list_add_tail(&nce->list, &sctx->name_cache_list);
				2107	}
				2108
				2109	/*
				2110	* Remove some entries from the beginning of name_cache_list.
				2111	*/
				2112	static void name_cache_clean_unused(struct send_ctx *sctx)
				2113	{
				2114	struct name_cache_entry *nce;
				2115
				2116	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
				2117	return;
				2118
				2119	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
				2120	nce = list_entry(sctx->name_cache_list.next,
				2121	struct name_cache_entry, list);
				2122	name_cache_delete(sctx, nce);
				2123	kfree(nce);
				2124	}
				2125	}
				2126
				2127	static void name_cache_free(struct send_ctx *sctx)
				2128	{
				2129	struct name_cache_entry *nce;
				2130
				2131	while (!list_empty(&sctx->name_cache_list)) {
				2132	nce = list_entry(sctx->name_cache_list.next,
				2133	struct name_cache_entry, list);
				2134	name_cache_delete(sctx, nce);
				2135	kfree(nce);
				2136	}
				2137	}
				2138
				2139	/*
				2140	* Used by get_cur_path for each ref up to the root.
				2141	* Returns 0 if it succeeded.
				2142	* Returns 1 if the inode is not existent or got overwritten. In that case, the
				2143	* name is an orphan name. This instructs get_cur_path to stop iterating. If 1
				2144	* is returned, parent_ino/parent_gen are not guaranteed to be valid.
				2145	* Returns <0 in case of error.
				2146	*/
				2147	static int __get_cur_name_and_parent(struct send_ctx *sctx,
				2148	u64 ino, u64 gen,
				2149	u64 *parent_ino,
				2150	u64 *parent_gen,
				2151	struct fs_path *dest)
				2152	{
				2153	int ret;
				2154	int nce_ret;
				2155	struct name_cache_entry *nce = NULL;
				2156
				2157	/*
				2158	* First check if we already did a call to this function with the same
				2159	* ino/gen. If yes, check if the cache entry is still up-to-date. If yes
				2160	* return the cached result.
				2161	*/
				2162	nce = name_cache_search(sctx, ino, gen);
				2163	if (nce) {
				2164	if (ino < sctx->send_progress && nce->need_later_update) {
				2165	name_cache_delete(sctx, nce);
				2166	kfree(nce);
				2167	nce = NULL;
				2168	} else {
				2169	name_cache_used(sctx, nce);
				2170	*parent_ino = nce->parent_ino;
				2171	*parent_gen = nce->parent_gen;
				2172	ret = fs_path_add(dest, nce->name, nce->name_len);
				2173	if (ret < 0)
				2174	goto out;
				2175	ret = nce->ret;
				2176	goto out;
				2177	}
				2178	}
				2179
				2180	/*
				2181	* If the inode is not existent yet, add the orphan name and return 1.
				2182	* This should only happen for the parent dir that we determine in
				2183	* __record_new_ref
				2184	*/
				2185	ret = is_inode_existent(sctx, ino, gen);
				2186	if (ret < 0)
				2187	goto out;
				2188
				2189	if (!ret) {
				2190	ret = gen_unique_name(sctx, ino, gen, dest);
				2191	if (ret < 0)
				2192	goto out;
				2193	ret = 1;
				2194	goto out_cache;
				2195	}
				2196
				2197	/*
				2198	* Depending on whether the inode was already processed or not, use
				2199	* send_root or parent_root for ref lookup.
				2200	*/
				2201	if (ino < sctx->send_progress)
				2202	ret = get_first_ref(sctx->send_root, ino,
				2203	parent_ino, parent_gen, dest);
				2204	else
				2205	ret = get_first_ref(sctx->parent_root, ino,
				2206	parent_ino, parent_gen, dest);
				2207	if (ret < 0)
				2208	goto out;
				2209
				2210	/*
				2211	* Check if the ref was overwritten by an inode's ref that was processed
				2212	* earlier. If yes, treat as orphan and return 1.
				2213	*/
				2214	ret = did_overwrite_ref(sctx, parent_ino, parent_gen, ino, gen,
				2215	dest->start, dest->end - dest->start);
				2216	if (ret < 0)
				2217	goto out;
				2218	if (ret) {
				2219	fs_path_reset(dest);
				2220	ret = gen_unique_name(sctx, ino, gen, dest);
				2221	if (ret < 0)
				2222	goto out;
				2223	ret = 1;
				2224	}
				2225
				2226	out_cache:
				2227	/*
				2228	* Store the result of the lookup in the name cache.
				2229	*/
				2230	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
				2231	if (!nce) {
				2232	ret = -ENOMEM;
				2233	goto out;
				2234	}
				2235
				2236	nce->ino = ino;
				2237	nce->gen = gen;
				2238	nce->parent_ino = *parent_ino;
				2239	nce->parent_gen = *parent_gen;
				2240	nce->name_len = fs_path_len(dest);
				2241	nce->ret = ret;
				2242	strcpy(nce->name, dest->start);
				2243
				2244	if (ino < sctx->send_progress)
				2245	nce->need_later_update = 0;
				2246	else
				2247	nce->need_later_update = 1;
				2248
				2249	nce_ret = name_cache_insert(sctx, nce);
				2250	if (nce_ret < 0)
				2251	ret = nce_ret;
				2252	name_cache_clean_unused(sctx);
				2253
				2254	out:
				2255	return ret;
				2256	}
				2257
				2258	/*
				2259	* Magic happens here. This function returns the first ref to an inode as it
				2260	* would look like while receiving the stream at this point in time.
				2261	* We walk the path up to the root. For every inode in between, we check if it
				2262	* was already processed/sent. If yes, we continue with the parent as found
				2263	* in send_root. If not, we continue with the parent as found in parent_root.
				2264	* If we encounter an inode that was deleted at this point in time, we use the
				2265	* inodes "orphan" name instead of the real name and stop. Same with new inodes
				2266	* that were not created yet and overwritten inodes/refs.
				2267	*
				2268	* When do we have have orphan inodes:
				2269	* 1. When an inode is freshly created and thus no valid refs are available yet
				2270	* 2. When a directory lost all it's refs (deleted) but still has dir items
				2271	* inside which were not processed yet (pending for move/delete). If anyone
				2272	* tried to get the path to the dir items, it would get a path inside that
				2273	* orphan directory.
				2274	* 3. When an inode is moved around or gets new links, it may overwrite the ref
				2275	* of an unprocessed inode. If in that case the first ref would be
				2276	* overwritten, the overwritten inode gets "orphanized". Later when we
				2277	* process this overwritten inode, it is restored at a new place by moving
				2278	* the orphan inode.
				2279	*
				2280	* sctx->send_progress tells this function at which point in time receiving
				2281	* would be.
				2282	*/
				2283	static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
				2284	struct fs_path *dest)
				2285	{
				2286	int ret = 0;
				2287	struct fs_path *name = NULL;
				2288	u64 parent_inode = 0;
				2289	u64 parent_gen = 0;
				2290	int stop = 0;
				2291
				2292	name = fs_path_alloc();
				2293	if (!name) {
				2294	ret = -ENOMEM;
				2295	goto out;
				2296	}
				2297
				2298	dest->reversed = 1;
				2299	fs_path_reset(dest);
				2300
				2301	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
				2302	struct waiting_dir_move *wdm;
				2303
				2304	fs_path_reset(name);
				2305
				2306	if (is_waiting_for_rm(sctx, ino)) {
				2307	ret = gen_unique_name(sctx, ino, gen, name);
				2308	if (ret < 0)
				2309	goto out;
				2310	ret = fs_path_add_path(dest, name);
				2311	break;
				2312	}
				2313
				2314	wdm = get_waiting_dir_move(sctx, ino);
				2315	if (wdm && wdm->orphanized) {
				2316	ret = gen_unique_name(sctx, ino, gen, name);
				2317	stop = 1;
				2318	} else if (wdm) {
				2319	ret = get_first_ref(sctx->parent_root, ino,
				2320	&parent_inode, &parent_gen, name);
				2321	} else {
				2322	ret = __get_cur_name_and_parent(sctx, ino, gen,
				2323	&parent_inode,
				2324	&parent_gen, name);
				2325	if (ret)
				2326	stop = 1;
				2327	}
				2328
				2329	if (ret < 0)
				2330	goto out;
				2331
				2332	ret = fs_path_add_path(dest, name);
				2333	if (ret < 0)
				2334	goto out;
				2335
				2336	ino = parent_inode;
				2337	gen = parent_gen;
				2338	}
				2339
				2340	out:
				2341	fs_path_free(name);
				2342	if (!ret)
				2343	fs_path_unreverse(dest);
				2344	return ret;
				2345	}
				2346
				2347	/*
				2348	* Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
				2349	*/
				2350	static int send_subvol_begin(struct send_ctx *sctx)
				2351	{
				2352	int ret;
				2353	struct btrfs_root *send_root = sctx->send_root;
				2354	struct btrfs_root *parent_root = sctx->parent_root;
				2355	struct btrfs_path *path;
				2356	struct btrfs_key key;
				2357	struct btrfs_root_ref *ref;
				2358	struct extent_buffer *leaf;
				2359	char *name = NULL;
				2360	int namelen;
				2361
				2362	path = btrfs_alloc_path();
				2363	if (!path)
				2364	return -ENOMEM;
				2365
				2366	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
				2367	if (!name) {
				2368	btrfs_free_path(path);
				2369	return -ENOMEM;
				2370	}
				2371
				2372	key.objectid = send_root->objectid;
				2373	key.type = BTRFS_ROOT_BACKREF_KEY;
				2374	key.offset = 0;
				2375
				2376	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
				2377	&key, path, 1, 0);
				2378	if (ret < 0)
				2379	goto out;
				2380	if (ret) {
				2381	ret = -ENOENT;
				2382	goto out;
				2383	}
				2384
				2385	leaf = path->nodes[0];
				2386	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				2387	if (key.type != BTRFS_ROOT_BACKREF_KEY \|\|
				2388	key.objectid != send_root->objectid) {
				2389	ret = -ENOENT;
				2390	goto out;
				2391	}
				2392	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
				2393	namelen = btrfs_root_ref_name_len(leaf, ref);
				2394	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
				2395	btrfs_release_path(path);
				2396
				2397	if (parent_root) {
				2398	ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
				2399	if (ret < 0)
				2400	goto out;
				2401	} else {
				2402	ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
				2403	if (ret < 0)
				2404	goto out;
				2405	}
				2406
				2407	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
				2408
				2409	if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
				2410	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
				2411	sctx->send_root->root_item.received_uuid);
				2412	else
				2413	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
				2414	sctx->send_root->root_item.uuid);
				2415
				2416	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
				2417	le64_to_cpu(sctx->send_root->root_item.ctransid));
				2418	if (parent_root) {
				2419	if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
				2420	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
				2421	parent_root->root_item.received_uuid);
				2422	else
				2423	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
				2424	parent_root->root_item.uuid);
				2425	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
				2426	le64_to_cpu(sctx->parent_root->root_item.ctransid));
				2427	}
				2428
				2429	ret = send_cmd(sctx);
				2430
				2431	tlv_put_failure:
				2432	out:
				2433	btrfs_free_path(path);
				2434	kfree(name);
				2435	return ret;
				2436	}
				2437
				2438	static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
				2439	{
				2440	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				2441	int ret = 0;
				2442	struct fs_path *p;
				2443
				2444	btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
				2445
				2446	p = fs_path_alloc();
				2447	if (!p)
				2448	return -ENOMEM;
				2449
				2450	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
				2451	if (ret < 0)
				2452	goto out;
				2453
				2454	ret = get_cur_path(sctx, ino, gen, p);
				2455	if (ret < 0)
				2456	goto out;
				2457	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				2458	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
				2459
				2460	ret = send_cmd(sctx);
				2461
				2462	tlv_put_failure:
				2463	out:
				2464	fs_path_free(p);
				2465	return ret;
				2466	}
				2467
				2468	static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
				2469	{
				2470	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				2471	int ret = 0;
				2472	struct fs_path *p;
				2473
				2474	btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
				2475
				2476	p = fs_path_alloc();
				2477	if (!p)
				2478	return -ENOMEM;
				2479
				2480	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
				2481	if (ret < 0)
				2482	goto out;
				2483
				2484	ret = get_cur_path(sctx, ino, gen, p);
				2485	if (ret < 0)
				2486	goto out;
				2487	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				2488	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
				2489
				2490	ret = send_cmd(sctx);
				2491
				2492	tlv_put_failure:
				2493	out:
				2494	fs_path_free(p);
				2495	return ret;
				2496	}
				2497
				2498	static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
				2499	{
				2500	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				2501	int ret = 0;
				2502	struct fs_path *p;
				2503
				2504	btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
				2505	ino, uid, gid);
				2506
				2507	p = fs_path_alloc();
				2508	if (!p)
				2509	return -ENOMEM;
				2510
				2511	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
				2512	if (ret < 0)
				2513	goto out;
				2514
				2515	ret = get_cur_path(sctx, ino, gen, p);
				2516	if (ret < 0)
				2517	goto out;
				2518	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				2519	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
				2520	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
				2521
				2522	ret = send_cmd(sctx);
				2523
				2524	tlv_put_failure:
				2525	out:
				2526	fs_path_free(p);
				2527	return ret;
				2528	}
				2529
				2530	static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
				2531	{
				2532	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				2533	int ret = 0;
				2534	struct fs_path *p = NULL;
				2535	struct btrfs_inode_item *ii;
				2536	struct btrfs_path *path = NULL;
				2537	struct extent_buffer *eb;
				2538	struct btrfs_key key;
				2539	int slot;
				2540
				2541	btrfs_debug(fs_info, "send_utimes %llu", ino);
				2542
				2543	p = fs_path_alloc();
				2544	if (!p)
				2545	return -ENOMEM;
				2546
				2547	path = alloc_path_for_send();
				2548	if (!path) {
				2549	ret = -ENOMEM;
				2550	goto out;
				2551	}
				2552
				2553	key.objectid = ino;
				2554	key.type = BTRFS_INODE_ITEM_KEY;
				2555	key.offset = 0;
				2556	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
				2557	if (ret > 0)
				2558	ret = -ENOENT;
				2559	if (ret < 0)
				2560	goto out;
				2561
				2562	eb = path->nodes[0];
				2563	slot = path->slots[0];
				2564	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
				2565
				2566	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
				2567	if (ret < 0)
				2568	goto out;
				2569
				2570	ret = get_cur_path(sctx, ino, gen, p);
				2571	if (ret < 0)
				2572	goto out;
				2573	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				2574	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
				2575	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
				2576	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
				2577	/* TODO Add otime support when the otime patches get into upstream */
				2578
				2579	ret = send_cmd(sctx);
				2580
				2581	tlv_put_failure:
				2582	out:
				2583	fs_path_free(p);
				2584	btrfs_free_path(path);
				2585	return ret;
				2586	}
				2587
				2588	/*
				2589	* Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
				2590	* a valid path yet because we did not process the refs yet. So, the inode
				2591	* is created as orphan.
				2592	*/
				2593	static int send_create_inode(struct send_ctx *sctx, u64 ino)
				2594	{
				2595	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				2596	int ret = 0;
				2597	struct fs_path *p;
				2598	int cmd;
				2599	u64 gen;
				2600	u64 mode;
				2601	u64 rdev;
				2602
				2603	btrfs_debug(fs_info, "send_create_inode %llu", ino);
				2604
				2605	p = fs_path_alloc();
				2606	if (!p)
				2607	return -ENOMEM;
				2608
				2609	if (ino != sctx->cur_ino) {
				2610	ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
				2611	NULL, NULL, &rdev);
				2612	if (ret < 0)
				2613	goto out;
				2614	} else {
				2615	gen = sctx->cur_inode_gen;
				2616	mode = sctx->cur_inode_mode;
				2617	rdev = sctx->cur_inode_rdev;
				2618	}
				2619
				2620	if (S_ISREG(mode)) {
				2621	cmd = BTRFS_SEND_C_MKFILE;
				2622	} else if (S_ISDIR(mode)) {
				2623	cmd = BTRFS_SEND_C_MKDIR;
				2624	} else if (S_ISLNK(mode)) {
				2625	cmd = BTRFS_SEND_C_SYMLINK;
				2626	} else if (S_ISCHR(mode) \|\| S_ISBLK(mode)) {
				2627	cmd = BTRFS_SEND_C_MKNOD;
				2628	} else if (S_ISFIFO(mode)) {
				2629	cmd = BTRFS_SEND_C_MKFIFO;
				2630	} else if (S_ISSOCK(mode)) {
				2631	cmd = BTRFS_SEND_C_MKSOCK;
				2632	} else {
				2633	btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
				2634	(int)(mode & S_IFMT));
				2635	ret = -EOPNOTSUPP;
				2636	goto out;
				2637	}
				2638
				2639	ret = begin_cmd(sctx, cmd);
				2640	if (ret < 0)
				2641	goto out;
				2642
				2643	ret = gen_unique_name(sctx, ino, gen, p);
				2644	if (ret < 0)
				2645	goto out;
				2646
				2647	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				2648	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
				2649
				2650	if (S_ISLNK(mode)) {
				2651	fs_path_reset(p);
				2652	ret = read_symlink(sctx->send_root, ino, p);
				2653	if (ret < 0)
				2654	goto out;
				2655	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
				2656	} else if (S_ISCHR(mode) \|\| S_ISBLK(mode) \|\|
				2657	S_ISFIFO(mode) \|\| S_ISSOCK(mode)) {
				2658	TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
				2659	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
				2660	}
				2661
				2662	ret = send_cmd(sctx);
				2663	if (ret < 0)
				2664	goto out;
				2665
				2666
				2667	tlv_put_failure:
				2668	out:
				2669	fs_path_free(p);
				2670	return ret;
				2671	}
				2672
				2673	/*
				2674	* We need some special handling for inodes that get processed before the parent
				2675	* directory got created. See process_recorded_refs for details.
				2676	* This function does the check if we already created the dir out of order.
				2677	*/
				2678	static int did_create_dir(struct send_ctx *sctx, u64 dir)
				2679	{
				2680	int ret = 0;
				2681	struct btrfs_path *path = NULL;
				2682	struct btrfs_key key;
				2683	struct btrfs_key found_key;
				2684	struct btrfs_key di_key;
				2685	struct extent_buffer *eb;
				2686	struct btrfs_dir_item *di;
				2687	int slot;
				2688
				2689	path = alloc_path_for_send();
				2690	if (!path) {
				2691	ret = -ENOMEM;
				2692	goto out;
				2693	}
				2694
				2695	key.objectid = dir;
				2696	key.type = BTRFS_DIR_INDEX_KEY;
				2697	key.offset = 0;
				2698	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
				2699	if (ret < 0)
				2700	goto out;
				2701
				2702	while (1) {
				2703	eb = path->nodes[0];
				2704	slot = path->slots[0];
				2705	if (slot >= btrfs_header_nritems(eb)) {
				2706	ret = btrfs_next_leaf(sctx->send_root, path);
				2707	if (ret < 0) {
				2708	goto out;
				2709	} else if (ret > 0) {
				2710	ret = 0;
				2711	break;
				2712	}
				2713	continue;
				2714	}
				2715
				2716	btrfs_item_key_to_cpu(eb, &found_key, slot);
				2717	if (found_key.objectid != key.objectid \|\|
				2718	found_key.type != key.type) {
				2719	ret = 0;
				2720	goto out;
				2721	}
				2722
				2723	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
				2724	btrfs_dir_item_key_to_cpu(eb, di, &di_key);
				2725
				2726	if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
				2727	di_key.objectid < sctx->send_progress) {
				2728	ret = 1;
				2729	goto out;
				2730	}
				2731
				2732	path->slots[0]++;
				2733	}
				2734
				2735	out:
				2736	btrfs_free_path(path);
				2737	return ret;
				2738	}
				2739
				2740	/*
				2741	* Only creates the inode if it is:
				2742	* 1. Not a directory
				2743	* 2. Or a directory which was not created already due to out of order
				2744	* directories. See did_create_dir and process_recorded_refs for details.
				2745	*/
				2746	static int send_create_inode_if_needed(struct send_ctx *sctx)
				2747	{
				2748	int ret;
				2749
				2750	if (S_ISDIR(sctx->cur_inode_mode)) {
				2751	ret = did_create_dir(sctx, sctx->cur_ino);
				2752	if (ret < 0)
				2753	goto out;
				2754	if (ret) {
				2755	ret = 0;
				2756	goto out;
				2757	}
				2758	}
				2759
				2760	ret = send_create_inode(sctx, sctx->cur_ino);
				2761	if (ret < 0)
				2762	goto out;
				2763
				2764	out:
				2765	return ret;
				2766	}
				2767
				2768	struct recorded_ref {
				2769	struct list_head list;
				2770	char *name;
				2771	struct fs_path *full_path;
				2772	u64 dir;
				2773	u64 dir_gen;
				2774	int name_len;
				2775	};
				2776
				2777	static void set_ref_path(struct recorded_ref ref, struct fs_path path)
				2778	{
				2779	ref->full_path = path;
				2780	ref->name = (char *)kbasename(ref->full_path->start);
				2781	ref->name_len = ref->full_path->end - ref->name;
				2782	}
				2783
				2784	/*
				2785	* We need to process new refs before deleted refs, but compare_tree gives us
				2786	* everything mixed. So we first record all refs and later process them.
				2787	* This function is a helper to record one ref.
				2788	*/
				2789	static int __record_ref(struct list_head *head, u64 dir,
				2790	u64 dir_gen, struct fs_path *path)
				2791	{
				2792	struct recorded_ref *ref;
				2793
				2794	ref = kmalloc(sizeof(*ref), GFP_KERNEL);
				2795	if (!ref)
				2796	return -ENOMEM;
				2797
				2798	ref->dir = dir;
				2799	ref->dir_gen = dir_gen;
				2800	set_ref_path(ref, path);
				2801	list_add_tail(&ref->list, head);
				2802	return 0;
				2803	}
				2804
				2805	static int dup_ref(struct recorded_ref ref, struct list_head list)
				2806	{
				2807	struct recorded_ref *new;
				2808
				2809	new = kmalloc(sizeof(*ref), GFP_KERNEL);
				2810	if (!new)
				2811	return -ENOMEM;
				2812
				2813	new->dir = ref->dir;
				2814	new->dir_gen = ref->dir_gen;
				2815	new->full_path = NULL;
				2816	INIT_LIST_HEAD(&new->list);
				2817	list_add_tail(&new->list, list);
				2818	return 0;
				2819	}
				2820
				2821	static void __free_recorded_refs(struct list_head *head)
				2822	{
				2823	struct recorded_ref *cur;
				2824
				2825	while (!list_empty(head)) {
				2826	cur = list_entry(head->next, struct recorded_ref, list);
				2827	fs_path_free(cur->full_path);
				2828	list_del(&cur->list);
				2829	kfree(cur);
				2830	}
				2831	}
				2832
				2833	static void free_recorded_refs(struct send_ctx *sctx)
				2834	{
				2835	__free_recorded_refs(&sctx->new_refs);
				2836	__free_recorded_refs(&sctx->deleted_refs);
				2837	}
				2838
				2839	/*
				2840	* Renames/moves a file/dir to its orphan name. Used when the first
				2841	* ref of an unprocessed inode gets overwritten and for all non empty
				2842	* directories.
				2843	*/
				2844	static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
				2845	struct fs_path *path)
				2846	{
				2847	int ret;
				2848	struct fs_path *orphan;
				2849
				2850	orphan = fs_path_alloc();
				2851	if (!orphan)
				2852	return -ENOMEM;
				2853
				2854	ret = gen_unique_name(sctx, ino, gen, orphan);
				2855	if (ret < 0)
				2856	goto out;
				2857
				2858	ret = send_rename(sctx, path, orphan);
				2859
				2860	out:
				2861	fs_path_free(orphan);
				2862	return ret;
				2863	}
				2864
				2865	static struct orphan_dir_info *
				2866	add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
				2867	{
				2868	struct rb_node **p = &sctx->orphan_dirs.rb_node;
				2869	struct rb_node *parent = NULL;
				2870	struct orphan_dir_info entry, odi;
				2871
				2872	while (*p) {
				2873	parent = *p;
				2874	entry = rb_entry(parent, struct orphan_dir_info, node);
				2875	if (dir_ino < entry->ino) {
				2876	p = &(*p)->rb_left;
				2877	} else if (dir_ino > entry->ino) {
				2878	p = &(*p)->rb_right;
				2879	} else {
				2880	return entry;
				2881	}
				2882	}
				2883
				2884	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
				2885	if (!odi)
				2886	return ERR_PTR(-ENOMEM);
				2887	odi->ino = dir_ino;
				2888	odi->gen = 0;
				2889	odi->last_dir_index_offset = 0;
				2890
				2891	rb_link_node(&odi->node, parent, p);
				2892	rb_insert_color(&odi->node, &sctx->orphan_dirs);
				2893	return odi;
				2894	}
				2895
				2896	static struct orphan_dir_info *
				2897	get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
				2898	{
				2899	struct rb_node *n = sctx->orphan_dirs.rb_node;
				2900	struct orphan_dir_info *entry;
				2901
				2902	while (n) {
				2903	entry = rb_entry(n, struct orphan_dir_info, node);
				2904	if (dir_ino < entry->ino)
				2905	n = n->rb_left;
				2906	else if (dir_ino > entry->ino)
				2907	n = n->rb_right;
				2908	else
				2909	return entry;
				2910	}
				2911	return NULL;
				2912	}
				2913
				2914	static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
				2915	{
				2916	struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
				2917
				2918	return odi != NULL;
				2919	}
				2920
				2921	static void free_orphan_dir_info(struct send_ctx *sctx,
				2922	struct orphan_dir_info *odi)
				2923	{
				2924	if (!odi)
				2925	return;
				2926	rb_erase(&odi->node, &sctx->orphan_dirs);
				2927	kfree(odi);
				2928	}
				2929
				2930	/*
				2931	* Returns 1 if a directory can be removed at this point in time.
				2932	* We check this by iterating all dir items and checking if the inode behind
				2933	* the dir item was already processed.
				2934	*/
				2935	static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
				2936	u64 send_progress)
				2937	{
				2938	int ret = 0;
				2939	struct btrfs_root *root = sctx->parent_root;
				2940	struct btrfs_path *path;
				2941	struct btrfs_key key;
				2942	struct btrfs_key found_key;
				2943	struct btrfs_key loc;
				2944	struct btrfs_dir_item *di;
				2945	struct orphan_dir_info *odi = NULL;
				2946
				2947	/*
				2948	* Don't try to rmdir the top/root subvolume dir.
				2949	*/
				2950	if (dir == BTRFS_FIRST_FREE_OBJECTID)
				2951	return 0;
				2952
				2953	path = alloc_path_for_send();
				2954	if (!path)
				2955	return -ENOMEM;
				2956
				2957	key.objectid = dir;
				2958	key.type = BTRFS_DIR_INDEX_KEY;
				2959	key.offset = 0;
				2960
				2961	odi = get_orphan_dir_info(sctx, dir);
				2962	if (odi)
				2963	key.offset = odi->last_dir_index_offset;
				2964
				2965	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2966	if (ret < 0)
				2967	goto out;
				2968
				2969	while (1) {
				2970	struct waiting_dir_move *dm;
				2971
				2972	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
				2973	ret = btrfs_next_leaf(root, path);
				2974	if (ret < 0)
				2975	goto out;
				2976	else if (ret > 0)
				2977	break;
				2978	continue;
				2979	}
				2980	btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				2981	path->slots[0]);
				2982	if (found_key.objectid != key.objectid \|\|
				2983	found_key.type != key.type)
				2984	break;
				2985
				2986	di = btrfs_item_ptr(path->nodes[0], path->slots[0],
				2987	struct btrfs_dir_item);
				2988	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
				2989
				2990	dm = get_waiting_dir_move(sctx, loc.objectid);
				2991	if (dm) {
				2992	odi = add_orphan_dir_info(sctx, dir);
				2993	if (IS_ERR(odi)) {
				2994	ret = PTR_ERR(odi);
				2995	goto out;
				2996	}
				2997	odi->gen = dir_gen;
				2998	odi->last_dir_index_offset = found_key.offset;
				2999	dm->rmdir_ino = dir;
				3000	ret = 0;
				3001	goto out;
				3002	}
				3003
				3004	if (loc.objectid > send_progress) {
				3005	odi = add_orphan_dir_info(sctx, dir);
				3006	if (IS_ERR(odi)) {
				3007	ret = PTR_ERR(odi);
				3008	goto out;
				3009	}
				3010	odi->gen = dir_gen;
				3011	odi->last_dir_index_offset = found_key.offset;
				3012	ret = 0;
				3013	goto out;
				3014	}
				3015
				3016	path->slots[0]++;
				3017	}
				3018	free_orphan_dir_info(sctx, odi);
				3019
				3020	ret = 1;
				3021
				3022	out:
				3023	btrfs_free_path(path);
				3024	return ret;
				3025	}
				3026
				3027	static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
				3028	{
				3029	struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
				3030
				3031	return entry != NULL;
				3032	}
				3033
				3034	static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
				3035	{
				3036	struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
				3037	struct rb_node *parent = NULL;
				3038	struct waiting_dir_move entry, dm;
				3039
				3040	dm = kmalloc(sizeof(*dm), GFP_KERNEL);
				3041	if (!dm)
				3042	return -ENOMEM;
				3043	dm->ino = ino;
				3044	dm->rmdir_ino = 0;
				3045	dm->orphanized = orphanized;
				3046
				3047	while (*p) {
				3048	parent = *p;
				3049	entry = rb_entry(parent, struct waiting_dir_move, node);
				3050	if (ino < entry->ino) {
				3051	p = &(*p)->rb_left;
				3052	} else if (ino > entry->ino) {
				3053	p = &(*p)->rb_right;
				3054	} else {
				3055	kfree(dm);
				3056	return -EEXIST;
				3057	}
				3058	}
				3059
				3060	rb_link_node(&dm->node, parent, p);
				3061	rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
				3062	return 0;
				3063	}
				3064
				3065	static struct waiting_dir_move *
				3066	get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
				3067	{
				3068	struct rb_node *n = sctx->waiting_dir_moves.rb_node;
				3069	struct waiting_dir_move *entry;
				3070
				3071	while (n) {
				3072	entry = rb_entry(n, struct waiting_dir_move, node);
				3073	if (ino < entry->ino)
				3074	n = n->rb_left;
				3075	else if (ino > entry->ino)
				3076	n = n->rb_right;
				3077	else
				3078	return entry;
				3079	}
				3080	return NULL;
				3081	}
				3082
				3083	static void free_waiting_dir_move(struct send_ctx *sctx,
				3084	struct waiting_dir_move *dm)
				3085	{
				3086	if (!dm)
				3087	return;
				3088	rb_erase(&dm->node, &sctx->waiting_dir_moves);
				3089	kfree(dm);
				3090	}
				3091
				3092	static int add_pending_dir_move(struct send_ctx *sctx,
				3093	u64 ino,
				3094	u64 ino_gen,
				3095	u64 parent_ino,
				3096	struct list_head *new_refs,
				3097	struct list_head *deleted_refs,
				3098	const bool is_orphan)
				3099	{
				3100	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
				3101	struct rb_node *parent = NULL;
				3102	struct pending_dir_move entry = NULL, pm;
				3103	struct recorded_ref *cur;
				3104	int exists = 0;
				3105	int ret;
				3106
				3107	pm = kmalloc(sizeof(*pm), GFP_KERNEL);
				3108	if (!pm)
				3109	return -ENOMEM;
				3110	pm->parent_ino = parent_ino;
				3111	pm->ino = ino;
				3112	pm->gen = ino_gen;
				3113	INIT_LIST_HEAD(&pm->list);
				3114	INIT_LIST_HEAD(&pm->update_refs);
				3115	RB_CLEAR_NODE(&pm->node);
				3116
				3117	while (*p) {
				3118	parent = *p;
				3119	entry = rb_entry(parent, struct pending_dir_move, node);
				3120	if (parent_ino < entry->parent_ino) {
				3121	p = &(*p)->rb_left;
				3122	} else if (parent_ino > entry->parent_ino) {
				3123	p = &(*p)->rb_right;
				3124	} else {
				3125	exists = 1;
				3126	break;
				3127	}
				3128	}
				3129
				3130	list_for_each_entry(cur, deleted_refs, list) {
				3131	ret = dup_ref(cur, &pm->update_refs);
				3132	if (ret < 0)
				3133	goto out;
				3134	}
				3135	list_for_each_entry(cur, new_refs, list) {
				3136	ret = dup_ref(cur, &pm->update_refs);
				3137	if (ret < 0)
				3138	goto out;
				3139	}
				3140
				3141	ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
				3142	if (ret)
				3143	goto out;
				3144
				3145	if (exists) {
				3146	list_add_tail(&pm->list, &entry->list);
				3147	} else {
				3148	rb_link_node(&pm->node, parent, p);
				3149	rb_insert_color(&pm->node, &sctx->pending_dir_moves);
				3150	}
				3151	ret = 0;
				3152	out:
				3153	if (ret) {
				3154	__free_recorded_refs(&pm->update_refs);
				3155	kfree(pm);
				3156	}
				3157	return ret;
				3158	}
				3159
				3160	static struct pending_dir_move get_pending_dir_moves(struct send_ctx sctx,
				3161	u64 parent_ino)
				3162	{
				3163	struct rb_node *n = sctx->pending_dir_moves.rb_node;
				3164	struct pending_dir_move *entry;
				3165
				3166	while (n) {
				3167	entry = rb_entry(n, struct pending_dir_move, node);
				3168	if (parent_ino < entry->parent_ino)
				3169	n = n->rb_left;
				3170	else if (parent_ino > entry->parent_ino)
				3171	n = n->rb_right;
				3172	else
				3173	return entry;
				3174	}
				3175	return NULL;
				3176	}
				3177
				3178	static int path_loop(struct send_ctx sctx, struct fs_path name,
				3179	u64 ino, u64 gen, u64 *ancestor_ino)
				3180	{
				3181	int ret = 0;
				3182	u64 parent_inode = 0;
				3183	u64 parent_gen = 0;
				3184	u64 start_ino = ino;
				3185
				3186	*ancestor_ino = 0;
				3187	while (ino != BTRFS_FIRST_FREE_OBJECTID) {
				3188	fs_path_reset(name);
				3189
				3190	if (is_waiting_for_rm(sctx, ino))
				3191	break;
				3192	if (is_waiting_for_move(sctx, ino)) {
				3193	if (*ancestor_ino == 0)
				3194	*ancestor_ino = ino;
				3195	ret = get_first_ref(sctx->parent_root, ino,
				3196	&parent_inode, &parent_gen, name);
				3197	} else {
				3198	ret = __get_cur_name_and_parent(sctx, ino, gen,
				3199	&parent_inode,
				3200	&parent_gen, name);
				3201	if (ret > 0) {
				3202	ret = 0;
				3203	break;
				3204	}
				3205	}
				3206	if (ret < 0)
				3207	break;
				3208	if (parent_inode == start_ino) {
				3209	ret = 1;
				3210	if (*ancestor_ino == 0)
				3211	*ancestor_ino = ino;
				3212	break;
				3213	}
				3214	ino = parent_inode;
				3215	gen = parent_gen;
				3216	}
				3217	return ret;
				3218	}
				3219
				3220	static int apply_dir_move(struct send_ctx sctx, struct pending_dir_move pm)
				3221	{
				3222	struct fs_path *from_path = NULL;
				3223	struct fs_path *to_path = NULL;
				3224	struct fs_path *name = NULL;
				3225	u64 orig_progress = sctx->send_progress;
				3226	struct recorded_ref *cur;
				3227	u64 parent_ino, parent_gen;
				3228	struct waiting_dir_move *dm = NULL;
				3229	u64 rmdir_ino = 0;
				3230	u64 ancestor;
				3231	bool is_orphan;
				3232	int ret;
				3233
				3234	name = fs_path_alloc();
				3235	from_path = fs_path_alloc();
				3236	if (!name \|\| !from_path) {
				3237	ret = -ENOMEM;
				3238	goto out;
				3239	}
				3240
				3241	dm = get_waiting_dir_move(sctx, pm->ino);
				3242	ASSERT(dm);
				3243	rmdir_ino = dm->rmdir_ino;
				3244	is_orphan = dm->orphanized;
				3245	free_waiting_dir_move(sctx, dm);
				3246
				3247	if (is_orphan) {
				3248	ret = gen_unique_name(sctx, pm->ino,
				3249	pm->gen, from_path);
				3250	} else {
				3251	ret = get_first_ref(sctx->parent_root, pm->ino,
				3252	&parent_ino, &parent_gen, name);
				3253	if (ret < 0)
				3254	goto out;
				3255	ret = get_cur_path(sctx, parent_ino, parent_gen,
				3256	from_path);
				3257	if (ret < 0)
				3258	goto out;
				3259	ret = fs_path_add_path(from_path, name);
				3260	}
				3261	if (ret < 0)
				3262	goto out;
				3263
				3264	sctx->send_progress = sctx->cur_ino + 1;
				3265	ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
				3266	if (ret < 0)
				3267	goto out;
				3268	if (ret) {
				3269	LIST_HEAD(deleted_refs);
				3270	ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
				3271	ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
				3272	&pm->update_refs, &deleted_refs,
				3273	is_orphan);
				3274	if (ret < 0)
				3275	goto out;
				3276	if (rmdir_ino) {
				3277	dm = get_waiting_dir_move(sctx, pm->ino);
				3278	ASSERT(dm);
				3279	dm->rmdir_ino = rmdir_ino;
				3280	}
				3281	goto out;
				3282	}
				3283	fs_path_reset(name);
				3284	to_path = name;
				3285	name = NULL;
				3286	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
				3287	if (ret < 0)
				3288	goto out;
				3289
				3290	ret = send_rename(sctx, from_path, to_path);
				3291	if (ret < 0)
				3292	goto out;
				3293
				3294	if (rmdir_ino) {
				3295	struct orphan_dir_info *odi;
				3296	u64 gen;
				3297
				3298	odi = get_orphan_dir_info(sctx, rmdir_ino);
				3299	if (!odi) {
				3300	/* already deleted */
				3301	goto finish;
				3302	}
				3303	gen = odi->gen;
				3304
				3305	ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
				3306	if (ret < 0)
				3307	goto out;
				3308	if (!ret)
				3309	goto finish;
				3310
				3311	name = fs_path_alloc();
				3312	if (!name) {
				3313	ret = -ENOMEM;
				3314	goto out;
				3315	}
				3316	ret = get_cur_path(sctx, rmdir_ino, gen, name);
				3317	if (ret < 0)
				3318	goto out;
				3319	ret = send_rmdir(sctx, name);
				3320	if (ret < 0)
				3321	goto out;
				3322	}
				3323
				3324	finish:
				3325	ret = send_utimes(sctx, pm->ino, pm->gen);
				3326	if (ret < 0)
				3327	goto out;
				3328
				3329	/*
				3330	* After rename/move, need to update the utimes of both new parent(s)
				3331	* and old parent(s).
				3332	*/
				3333	list_for_each_entry(cur, &pm->update_refs, list) {
				3334	/*
				3335	* The parent inode might have been deleted in the send snapshot
				3336	*/
				3337	ret = get_inode_info(sctx->send_root, cur->dir, NULL,
				3338	NULL, NULL, NULL, NULL, NULL);
				3339	if (ret == -ENOENT) {
				3340	ret = 0;
				3341	continue;
				3342	}
				3343	if (ret < 0)
				3344	goto out;
				3345
				3346	ret = send_utimes(sctx, cur->dir, cur->dir_gen);
				3347	if (ret < 0)
				3348	goto out;
				3349	}
				3350
				3351	out:
				3352	fs_path_free(name);
				3353	fs_path_free(from_path);
				3354	fs_path_free(to_path);
				3355	sctx->send_progress = orig_progress;
				3356
				3357	return ret;
				3358	}
				3359
				3360	static void free_pending_move(struct send_ctx sctx, struct pending_dir_move m)
				3361	{
				3362	if (!list_empty(&m->list))
				3363	list_del(&m->list);
				3364	if (!RB_EMPTY_NODE(&m->node))
				3365	rb_erase(&m->node, &sctx->pending_dir_moves);
				3366	__free_recorded_refs(&m->update_refs);
				3367	kfree(m);
				3368	}
				3369
				3370	static void tail_append_pending_moves(struct send_ctx *sctx,
				3371	struct pending_dir_move *moves,
				3372	struct list_head *stack)
				3373	{
				3374	if (list_empty(&moves->list)) {
				3375	list_add_tail(&moves->list, stack);
				3376	} else {
				3377	LIST_HEAD(list);
				3378	list_splice_init(&moves->list, &list);
				3379	list_add_tail(&moves->list, stack);
				3380	list_splice_tail(&list, stack);
				3381	}
				3382	if (!RB_EMPTY_NODE(&moves->node)) {
				3383	rb_erase(&moves->node, &sctx->pending_dir_moves);
				3384	RB_CLEAR_NODE(&moves->node);
				3385	}
				3386	}
				3387
				3388	static int apply_children_dir_moves(struct send_ctx *sctx)
				3389	{
				3390	struct pending_dir_move *pm;
				3391	struct list_head stack;
				3392	u64 parent_ino = sctx->cur_ino;
				3393	int ret = 0;
				3394
				3395	pm = get_pending_dir_moves(sctx, parent_ino);
				3396	if (!pm)
				3397	return 0;
				3398
				3399	INIT_LIST_HEAD(&stack);
				3400	tail_append_pending_moves(sctx, pm, &stack);
				3401
				3402	while (!list_empty(&stack)) {
				3403	pm = list_first_entry(&stack, struct pending_dir_move, list);
				3404	parent_ino = pm->ino;
				3405	ret = apply_dir_move(sctx, pm);
				3406	free_pending_move(sctx, pm);
				3407	if (ret)
				3408	goto out;
				3409	pm = get_pending_dir_moves(sctx, parent_ino);
				3410	if (pm)
				3411	tail_append_pending_moves(sctx, pm, &stack);
				3412	}
				3413	return 0;
				3414
				3415	out:
				3416	while (!list_empty(&stack)) {
				3417	pm = list_first_entry(&stack, struct pending_dir_move, list);
				3418	free_pending_move(sctx, pm);
				3419	}
				3420	return ret;
				3421	}
				3422
				3423	/*
				3424	* We might need to delay a directory rename even when no ancestor directory
				3425	* (in the send root) with a higher inode number than ours (sctx->cur_ino) was
				3426	* renamed. This happens when we rename a directory to the old name (the name
				3427	* in the parent root) of some other unrelated directory that got its rename
				3428	* delayed due to some ancestor with higher number that got renamed.
				3429	*
				3430	* Example:
				3431	*
				3432	* Parent snapshot:
				3433	* . (ino 256)
				3434	* \|---- a/ (ino 257)
				3435	* \| \|---- file (ino 260)
				3436	* \|
				3437	* \|---- b/ (ino 258)
				3438	* \|---- c/ (ino 259)
				3439	*
				3440	* Send snapshot:
				3441	* . (ino 256)
				3442	* \|---- a/ (ino 258)
				3443	* \|---- x/ (ino 259)
				3444	* \|---- y/ (ino 257)
				3445	* \|----- file (ino 260)
				3446	*
				3447	* Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
				3448	* from 'a' to 'x/y' happening first, which in turn depends on the rename of
				3449	* inode 259 from 'c' to 'x'. So the order of rename commands the send stream
				3450	* must issue is:
				3451	*
				3452	* 1 - rename 259 from 'c' to 'x'
				3453	* 2 - rename 257 from 'a' to 'x/y'
				3454	* 3 - rename 258 from 'b' to 'a'
				3455	*
				3456	* Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
				3457	* be done right away and < 0 on error.
				3458	*/
				3459	static int wait_for_dest_dir_move(struct send_ctx *sctx,
				3460	struct recorded_ref *parent_ref,
				3461	const bool is_orphan)
				3462	{
				3463	struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
				3464	struct btrfs_path *path;
				3465	struct btrfs_key key;
				3466	struct btrfs_key di_key;
				3467	struct btrfs_dir_item *di;
				3468	u64 left_gen;
				3469	u64 right_gen;
				3470	int ret = 0;
				3471	struct waiting_dir_move *wdm;
				3472
				3473	if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
				3474	return 0;
				3475
				3476	path = alloc_path_for_send();
				3477	if (!path)
				3478	return -ENOMEM;
				3479
				3480	key.objectid = parent_ref->dir;
				3481	key.type = BTRFS_DIR_ITEM_KEY;
				3482	key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
				3483
				3484	ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
				3485	if (ret < 0) {
				3486	goto out;
				3487	} else if (ret > 0) {
				3488	ret = 0;
				3489	goto out;
				3490	}
				3491
				3492	di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
				3493	parent_ref->name_len);
				3494	if (!di) {
				3495	ret = 0;
				3496	goto out;
				3497	}
				3498	/*
				3499	* di_key.objectid has the number of the inode that has a dentry in the
				3500	* parent directory with the same name that sctx->cur_ino is being
				3501	* renamed to. We need to check if that inode is in the send root as
				3502	* well and if it is currently marked as an inode with a pending rename,
				3503	* if it is, we need to delay the rename of sctx->cur_ino as well, so
				3504	* that it happens after that other inode is renamed.
				3505	*/
				3506	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
				3507	if (di_key.type != BTRFS_INODE_ITEM_KEY) {
				3508	ret = 0;
				3509	goto out;
				3510	}
				3511
				3512	ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
				3513	&left_gen, NULL, NULL, NULL, NULL);
				3514	if (ret < 0)
				3515	goto out;
				3516	ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
				3517	&right_gen, NULL, NULL, NULL, NULL);
				3518	if (ret < 0) {
				3519	if (ret == -ENOENT)
				3520	ret = 0;
				3521	goto out;
				3522	}
				3523
				3524	/* Different inode, no need to delay the rename of sctx->cur_ino */
				3525	if (right_gen != left_gen) {
				3526	ret = 0;
				3527	goto out;
				3528	}
				3529
				3530	wdm = get_waiting_dir_move(sctx, di_key.objectid);
				3531	if (wdm && !wdm->orphanized) {
				3532	ret = add_pending_dir_move(sctx,
				3533	sctx->cur_ino,
				3534	sctx->cur_inode_gen,
				3535	di_key.objectid,
				3536	&sctx->new_refs,
				3537	&sctx->deleted_refs,
				3538	is_orphan);
				3539	if (!ret)
				3540	ret = 1;
				3541	}
				3542	out:
				3543	btrfs_free_path(path);
				3544	return ret;
				3545	}
				3546
				3547	/*
				3548	* Check if inode ino2, or any of its ancestors, is inode ino1.
				3549	* Return 1 if true, 0 if false and < 0 on error.
				3550	*/
				3551	static int check_ino_in_path(struct btrfs_root *root,
				3552	const u64 ino1,
				3553	const u64 ino1_gen,
				3554	const u64 ino2,
				3555	const u64 ino2_gen,
				3556	struct fs_path *fs_path)
				3557	{
				3558	u64 ino = ino2;
				3559
				3560	if (ino1 == ino2)
				3561	return ino1_gen == ino2_gen;
				3562
				3563	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
				3564	u64 parent;
				3565	u64 parent_gen;
				3566	int ret;
				3567
				3568	fs_path_reset(fs_path);
				3569	ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
				3570	if (ret < 0)
				3571	return ret;
				3572	if (parent == ino1)
				3573	return parent_gen == ino1_gen;
				3574	ino = parent;
				3575	}
				3576	return 0;
				3577	}
				3578
				3579	/*
				3580	* Check if ino ino1 is an ancestor of inode ino2 in the given root for any
				3581	* possible path (in case ino2 is not a directory and has multiple hard links).
				3582	* Return 1 if true, 0 if false and < 0 on error.
				3583	*/
				3584	static int is_ancestor(struct btrfs_root *root,
				3585	const u64 ino1,
				3586	const u64 ino1_gen,
				3587	const u64 ino2,
				3588	struct fs_path *fs_path)
				3589	{
				3590	bool free_fs_path = false;
				3591	int ret = 0;
				3592	struct btrfs_path *path = NULL;
				3593	struct btrfs_key key;
				3594
				3595	if (!fs_path) {
				3596	fs_path = fs_path_alloc();
				3597	if (!fs_path)
				3598	return -ENOMEM;
				3599	free_fs_path = true;
				3600	}
				3601
				3602	path = alloc_path_for_send();
				3603	if (!path) {
				3604	ret = -ENOMEM;
				3605	goto out;
				3606	}
				3607
				3608	key.objectid = ino2;
				3609	key.type = BTRFS_INODE_REF_KEY;
				3610	key.offset = 0;
				3611
				3612	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				3613	if (ret < 0)
				3614	goto out;
				3615
				3616	while (true) {
				3617	struct extent_buffer *leaf = path->nodes[0];
				3618	int slot = path->slots[0];
				3619	u32 cur_offset = 0;
				3620	u32 item_size;
				3621
				3622	if (slot >= btrfs_header_nritems(leaf)) {
				3623	ret = btrfs_next_leaf(root, path);
				3624	if (ret < 0)
				3625	goto out;
				3626	if (ret > 0)
				3627	break;
				3628	continue;
				3629	}
				3630
				3631	btrfs_item_key_to_cpu(leaf, &key, slot);
				3632	if (key.objectid != ino2)
				3633	break;
				3634	if (key.type != BTRFS_INODE_REF_KEY &&
				3635	key.type != BTRFS_INODE_EXTREF_KEY)
				3636	break;
				3637
				3638	item_size = btrfs_item_size_nr(leaf, slot);
				3639	while (cur_offset < item_size) {
				3640	u64 parent;
				3641	u64 parent_gen;
				3642
				3643	if (key.type == BTRFS_INODE_EXTREF_KEY) {
				3644	unsigned long ptr;
				3645	struct btrfs_inode_extref *extref;
				3646
				3647	ptr = btrfs_item_ptr_offset(leaf, slot);
				3648	extref = (struct btrfs_inode_extref *)
				3649	(ptr + cur_offset);
				3650	parent = btrfs_inode_extref_parent(leaf,
				3651	extref);
				3652	cur_offset += sizeof(*extref);
				3653	cur_offset += btrfs_inode_extref_name_len(leaf,
				3654	extref);
				3655	} else {
				3656	parent = key.offset;
				3657	cur_offset = item_size;
				3658	}
				3659
				3660	ret = get_inode_info(root, parent, NULL, &parent_gen,
				3661	NULL, NULL, NULL, NULL);
				3662	if (ret < 0)
				3663	goto out;
				3664	ret = check_ino_in_path(root, ino1, ino1_gen,
				3665	parent, parent_gen, fs_path);
				3666	if (ret)
				3667	goto out;
				3668	}
				3669	path->slots[0]++;
				3670	}
				3671	ret = 0;
				3672	out:
				3673	btrfs_free_path(path);
				3674	if (free_fs_path)
				3675	fs_path_free(fs_path);
				3676	return ret;
				3677	}
				3678
				3679	static int wait_for_parent_move(struct send_ctx *sctx,
				3680	struct recorded_ref *parent_ref,
				3681	const bool is_orphan)
				3682	{
				3683	int ret = 0;
				3684	u64 ino = parent_ref->dir;
				3685	u64 ino_gen = parent_ref->dir_gen;
				3686	u64 parent_ino_before, parent_ino_after;
				3687	struct fs_path *path_before = NULL;
				3688	struct fs_path *path_after = NULL;
				3689	int len1, len2;
				3690
				3691	path_after = fs_path_alloc();
				3692	path_before = fs_path_alloc();
				3693	if (!path_after \|\| !path_before) {
				3694	ret = -ENOMEM;
				3695	goto out;
				3696	}
				3697
				3698	/*
				3699	* Our current directory inode may not yet be renamed/moved because some
				3700	* ancestor (immediate or not) has to be renamed/moved first. So find if
				3701	* such ancestor exists and make sure our own rename/move happens after
				3702	* that ancestor is processed to avoid path build infinite loops (done
				3703	* at get_cur_path()).
				3704	*/
				3705	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
				3706	u64 parent_ino_after_gen;
				3707
				3708	if (is_waiting_for_move(sctx, ino)) {
				3709	/*
				3710	* If the current inode is an ancestor of ino in the
				3711	* parent root, we need to delay the rename of the
				3712	* current inode, otherwise don't delayed the rename
				3713	* because we can end up with a circular dependency
				3714	* of renames, resulting in some directories never
				3715	* getting the respective rename operations issued in
				3716	* the send stream or getting into infinite path build
				3717	* loops.
				3718	*/
				3719	ret = is_ancestor(sctx->parent_root,
				3720	sctx->cur_ino, sctx->cur_inode_gen,
				3721	ino, path_before);
				3722	if (ret)
				3723	break;
				3724	}
				3725
				3726	fs_path_reset(path_before);
				3727	fs_path_reset(path_after);
				3728
				3729	ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
				3730	&parent_ino_after_gen, path_after);
				3731	if (ret < 0)
				3732	goto out;
				3733	ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
				3734	NULL, path_before);
				3735	if (ret < 0 && ret != -ENOENT) {
				3736	goto out;
				3737	} else if (ret == -ENOENT) {
				3738	ret = 0;
				3739	break;
				3740	}
				3741
				3742	len1 = fs_path_len(path_before);
				3743	len2 = fs_path_len(path_after);
				3744	if (ino > sctx->cur_ino &&
				3745	(parent_ino_before != parent_ino_after \|\| len1 != len2 \|\|
				3746	memcmp(path_before->start, path_after->start, len1))) {
				3747	u64 parent_ino_gen;
				3748
				3749	ret = get_inode_info(sctx->parent_root, ino, NULL,
				3750	&parent_ino_gen, NULL, NULL, NULL,
				3751	NULL);
				3752	if (ret < 0)
				3753	goto out;
				3754	if (ino_gen == parent_ino_gen) {
				3755	ret = 1;
				3756	break;
				3757	}
				3758	}
				3759	ino = parent_ino_after;
				3760	ino_gen = parent_ino_after_gen;
				3761	}
				3762
				3763	out:
				3764	fs_path_free(path_before);
				3765	fs_path_free(path_after);
				3766
				3767	if (ret == 1) {
				3768	ret = add_pending_dir_move(sctx,
				3769	sctx->cur_ino,
				3770	sctx->cur_inode_gen,
				3771	ino,
				3772	&sctx->new_refs,
				3773	&sctx->deleted_refs,
				3774	is_orphan);
				3775	if (!ret)
				3776	ret = 1;
				3777	}
				3778
				3779	return ret;
				3780	}
				3781
				3782	static int update_ref_path(struct send_ctx sctx, struct recorded_ref ref)
				3783	{
				3784	int ret;
				3785	struct fs_path *new_path;
				3786
				3787	/*
				3788	* Our reference's name member points to its full_path member string, so
				3789	* we use here a new path.
				3790	*/
				3791	new_path = fs_path_alloc();
				3792	if (!new_path)
				3793	return -ENOMEM;
				3794
				3795	ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
				3796	if (ret < 0) {
				3797	fs_path_free(new_path);
				3798	return ret;
				3799	}
				3800	ret = fs_path_add(new_path, ref->name, ref->name_len);
				3801	if (ret < 0) {
				3802	fs_path_free(new_path);
				3803	return ret;
				3804	}
				3805
				3806	fs_path_free(ref->full_path);
				3807	set_ref_path(ref, new_path);
				3808
				3809	return 0;
				3810	}
				3811
				3812	/*
				3813	* This does all the move/link/unlink/rmdir magic.
				3814	*/
				3815	static int process_recorded_refs(struct send_ctx sctx, int pending_move)
				3816	{
				3817	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				3818	int ret = 0;
				3819	struct recorded_ref *cur;
				3820	struct recorded_ref *cur2;
				3821	struct list_head check_dirs;
				3822	struct fs_path *valid_path = NULL;
				3823	u64 ow_inode = 0;
				3824	u64 ow_gen;
				3825	u64 ow_mode;
				3826	int did_overwrite = 0;
				3827	int is_orphan = 0;
				3828	u64 last_dir_ino_rm = 0;
				3829	bool can_rename = true;
				3830	bool orphanized_dir = false;
				3831	bool orphanized_ancestor = false;
				3832
				3833	btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
				3834
				3835	/*
				3836	* This should never happen as the root dir always has the same ref
				3837	* which is always '..'
				3838	*/
				3839	BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
				3840	INIT_LIST_HEAD(&check_dirs);
				3841
				3842	valid_path = fs_path_alloc();
				3843	if (!valid_path) {
				3844	ret = -ENOMEM;
				3845	goto out;
				3846	}
				3847
				3848	/*
				3849	* First, check if the first ref of the current inode was overwritten
				3850	* before. If yes, we know that the current inode was already orphanized
				3851	* and thus use the orphan name. If not, we can use get_cur_path to
				3852	* get the path of the first ref as it would like while receiving at
				3853	* this point in time.
				3854	* New inodes are always orphan at the beginning, so force to use the
				3855	* orphan name in this case.
				3856	* The first ref is stored in valid_path and will be updated if it
				3857	* gets moved around.
				3858	*/
				3859	if (!sctx->cur_inode_new) {
				3860	ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
				3861	sctx->cur_inode_gen);
				3862	if (ret < 0)
				3863	goto out;
				3864	if (ret)
				3865	did_overwrite = 1;
				3866	}
				3867	if (sctx->cur_inode_new \|\| did_overwrite) {
				3868	ret = gen_unique_name(sctx, sctx->cur_ino,
				3869	sctx->cur_inode_gen, valid_path);
				3870	if (ret < 0)
				3871	goto out;
				3872	is_orphan = 1;
				3873	} else {
				3874	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
				3875	valid_path);
				3876	if (ret < 0)
				3877	goto out;
				3878	}
				3879
				3880	list_for_each_entry(cur, &sctx->new_refs, list) {
				3881	/*
				3882	* We may have refs where the parent directory does not exist
				3883	* yet. This happens if the parent directories inum is higher
				3884	* the the current inum. To handle this case, we create the
				3885	* parent directory out of order. But we need to check if this
				3886	* did already happen before due to other refs in the same dir.
				3887	*/
				3888	ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
				3889	if (ret < 0)
				3890	goto out;
				3891	if (ret == inode_state_will_create) {
				3892	ret = 0;
				3893	/*
				3894	* First check if any of the current inodes refs did
				3895	* already create the dir.
				3896	*/
				3897	list_for_each_entry(cur2, &sctx->new_refs, list) {
				3898	if (cur == cur2)
				3899	break;
				3900	if (cur2->dir == cur->dir) {
				3901	ret = 1;
				3902	break;
				3903	}
				3904	}
				3905
				3906	/*
				3907	* If that did not happen, check if a previous inode
				3908	* did already create the dir.
				3909	*/
				3910	if (!ret)
				3911	ret = did_create_dir(sctx, cur->dir);
				3912	if (ret < 0)
				3913	goto out;
				3914	if (!ret) {
				3915	ret = send_create_inode(sctx, cur->dir);
				3916	if (ret < 0)
				3917	goto out;
				3918	}
				3919	}
				3920
				3921	/*
				3922	* Check if this new ref would overwrite the first ref of
				3923	* another unprocessed inode. If yes, orphanize the
				3924	* overwritten inode. If we find an overwritten ref that is
				3925	* not the first ref, simply unlink it.
				3926	*/
				3927	ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
				3928	cur->name, cur->name_len,
				3929	&ow_inode, &ow_gen, &ow_mode);
				3930	if (ret < 0)
				3931	goto out;
				3932	if (ret) {
				3933	ret = is_first_ref(sctx->parent_root,
				3934	ow_inode, cur->dir, cur->name,
				3935	cur->name_len);
				3936	if (ret < 0)
				3937	goto out;
				3938	if (ret) {
				3939	struct name_cache_entry *nce;
				3940	struct waiting_dir_move *wdm;
				3941
				3942	ret = orphanize_inode(sctx, ow_inode, ow_gen,
				3943	cur->full_path);
				3944	if (ret < 0)
				3945	goto out;
				3946	if (S_ISDIR(ow_mode))
				3947	orphanized_dir = true;
				3948
				3949	/*
				3950	* If ow_inode has its rename operation delayed
				3951	* make sure that its orphanized name is used in
				3952	* the source path when performing its rename
				3953	* operation.
				3954	*/
				3955	if (is_waiting_for_move(sctx, ow_inode)) {
				3956	wdm = get_waiting_dir_move(sctx,
				3957	ow_inode);
				3958	ASSERT(wdm);
				3959	wdm->orphanized = true;
				3960	}
				3961
				3962	/*
				3963	* Make sure we clear our orphanized inode's
				3964	* name from the name cache. This is because the
				3965	* inode ow_inode might be an ancestor of some
				3966	* other inode that will be orphanized as well
				3967	* later and has an inode number greater than
				3968	* sctx->send_progress. We need to prevent
				3969	* future name lookups from using the old name
				3970	* and get instead the orphan name.
				3971	*/
				3972	nce = name_cache_search(sctx, ow_inode, ow_gen);
				3973	if (nce) {
				3974	name_cache_delete(sctx, nce);
				3975	kfree(nce);
				3976	}
				3977
				3978	/*
				3979	* ow_inode might currently be an ancestor of
				3980	* cur_ino, therefore compute valid_path (the
				3981	* current path of cur_ino) again because it
				3982	* might contain the pre-orphanization name of
				3983	* ow_inode, which is no longer valid.
				3984	*/
				3985	ret = is_ancestor(sctx->parent_root,
				3986	ow_inode, ow_gen,
				3987	sctx->cur_ino, NULL);
				3988	if (ret > 0) {
				3989	orphanized_ancestor = true;
				3990	fs_path_reset(valid_path);
				3991	ret = get_cur_path(sctx, sctx->cur_ino,
				3992	sctx->cur_inode_gen,
				3993	valid_path);
				3994	}
				3995	if (ret < 0)
				3996	goto out;
				3997	} else {
				3998	ret = send_unlink(sctx, cur->full_path);
				3999	if (ret < 0)
				4000	goto out;
				4001	}
				4002	}
				4003
				4004	if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
				4005	ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
				4006	if (ret < 0)
				4007	goto out;
				4008	if (ret == 1) {
				4009	can_rename = false;
				4010	*pending_move = 1;
				4011	}
				4012	}
				4013
				4014	if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
				4015	can_rename) {
				4016	ret = wait_for_parent_move(sctx, cur, is_orphan);
				4017	if (ret < 0)
				4018	goto out;
				4019	if (ret == 1) {
				4020	can_rename = false;
				4021	*pending_move = 1;
				4022	}
				4023	}
				4024
				4025	/*
				4026	* link/move the ref to the new place. If we have an orphan
				4027	* inode, move it and update valid_path. If not, link or move
				4028	* it depending on the inode mode.
				4029	*/
				4030	if (is_orphan && can_rename) {
				4031	ret = send_rename(sctx, valid_path, cur->full_path);
				4032	if (ret < 0)
				4033	goto out;
				4034	is_orphan = 0;
				4035	ret = fs_path_copy(valid_path, cur->full_path);
				4036	if (ret < 0)
				4037	goto out;
				4038	} else if (can_rename) {
				4039	if (S_ISDIR(sctx->cur_inode_mode)) {
				4040	/*
				4041	* Dirs can't be linked, so move it. For moved
				4042	* dirs, we always have one new and one deleted
				4043	* ref. The deleted ref is ignored later.
				4044	*/
				4045	ret = send_rename(sctx, valid_path,
				4046	cur->full_path);
				4047	if (!ret)
				4048	ret = fs_path_copy(valid_path,
				4049	cur->full_path);
				4050	if (ret < 0)
				4051	goto out;
				4052	} else {
				4053	/*
				4054	* We might have previously orphanized an inode
				4055	* which is an ancestor of our current inode,
				4056	* so our reference's full path, which was
				4057	* computed before any such orphanizations, must
				4058	* be updated.
				4059	*/
				4060	if (orphanized_dir) {
				4061	ret = update_ref_path(sctx, cur);
				4062	if (ret < 0)
				4063	goto out;
				4064	}
				4065	ret = send_link(sctx, cur->full_path,
				4066	valid_path);
				4067	if (ret < 0)
				4068	goto out;
				4069	}
				4070	}
				4071	ret = dup_ref(cur, &check_dirs);
				4072	if (ret < 0)
				4073	goto out;
				4074	}
				4075
				4076	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
				4077	/*
				4078	* Check if we can already rmdir the directory. If not,
				4079	* orphanize it. For every dir item inside that gets deleted
				4080	* later, we do this check again and rmdir it then if possible.
				4081	* See the use of check_dirs for more details.
				4082	*/
				4083	ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
				4084	sctx->cur_ino);
				4085	if (ret < 0)
				4086	goto out;
				4087	if (ret) {
				4088	ret = send_rmdir(sctx, valid_path);
				4089	if (ret < 0)
				4090	goto out;
				4091	} else if (!is_orphan) {
				4092	ret = orphanize_inode(sctx, sctx->cur_ino,
				4093	sctx->cur_inode_gen, valid_path);
				4094	if (ret < 0)
				4095	goto out;
				4096	is_orphan = 1;
				4097	}
				4098
				4099	list_for_each_entry(cur, &sctx->deleted_refs, list) {
				4100	ret = dup_ref(cur, &check_dirs);
				4101	if (ret < 0)
				4102	goto out;
				4103	}
				4104	} else if (S_ISDIR(sctx->cur_inode_mode) &&
				4105	!list_empty(&sctx->deleted_refs)) {
				4106	/*
				4107	* We have a moved dir. Add the old parent to check_dirs
				4108	*/
				4109	cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
				4110	list);
				4111	ret = dup_ref(cur, &check_dirs);
				4112	if (ret < 0)
				4113	goto out;
				4114	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
				4115	/*
				4116	* We have a non dir inode. Go through all deleted refs and
				4117	* unlink them if they were not already overwritten by other
				4118	* inodes.
				4119	*/
				4120	list_for_each_entry(cur, &sctx->deleted_refs, list) {
				4121	ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
				4122	sctx->cur_ino, sctx->cur_inode_gen,
				4123	cur->name, cur->name_len);
				4124	if (ret < 0)
				4125	goto out;
				4126	if (!ret) {
				4127	/*
				4128	* If we orphanized any ancestor before, we need
				4129	* to recompute the full path for deleted names,
				4130	* since any such path was computed before we
				4131	* processed any references and orphanized any
				4132	* ancestor inode.
				4133	*/
				4134	if (orphanized_ancestor) {
				4135	ret = update_ref_path(sctx, cur);
				4136	if (ret < 0)
				4137	goto out;
				4138	}
				4139	ret = send_unlink(sctx, cur->full_path);
				4140	if (ret < 0)
				4141	goto out;
				4142	}
				4143	ret = dup_ref(cur, &check_dirs);
				4144	if (ret < 0)
				4145	goto out;
				4146	}
				4147	/*
				4148	* If the inode is still orphan, unlink the orphan. This may
				4149	* happen when a previous inode did overwrite the first ref
				4150	* of this inode and no new refs were added for the current
				4151	* inode. Unlinking does not mean that the inode is deleted in
				4152	* all cases. There may still be links to this inode in other
				4153	* places.
				4154	*/
				4155	if (is_orphan) {
				4156	ret = send_unlink(sctx, valid_path);
				4157	if (ret < 0)
				4158	goto out;
				4159	}
				4160	}
				4161
				4162	/*
				4163	* We did collect all parent dirs where cur_inode was once located. We
				4164	* now go through all these dirs and check if they are pending for
				4165	* deletion and if it's finally possible to perform the rmdir now.
				4166	* We also update the inode stats of the parent dirs here.
				4167	*/
				4168	list_for_each_entry(cur, &check_dirs, list) {
				4169	/*
				4170	* In case we had refs into dirs that were not processed yet,
				4171	* we don't need to do the utime and rmdir logic for these dirs.
				4172	* The dir will be processed later.
				4173	*/
				4174	if (cur->dir > sctx->cur_ino)
				4175	continue;
				4176
				4177	ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
				4178	if (ret < 0)
				4179	goto out;
				4180
				4181	if (ret == inode_state_did_create \|\|
				4182	ret == inode_state_no_change) {
				4183	/* TODO delayed utimes */
				4184	ret = send_utimes(sctx, cur->dir, cur->dir_gen);
				4185	if (ret < 0)
				4186	goto out;
				4187	} else if (ret == inode_state_did_delete &&
				4188	cur->dir != last_dir_ino_rm) {
				4189	ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
				4190	sctx->cur_ino);
				4191	if (ret < 0)
				4192	goto out;
				4193	if (ret) {
				4194	ret = get_cur_path(sctx, cur->dir,
				4195	cur->dir_gen, valid_path);
				4196	if (ret < 0)
				4197	goto out;
				4198	ret = send_rmdir(sctx, valid_path);
				4199	if (ret < 0)
				4200	goto out;
				4201	last_dir_ino_rm = cur->dir;
				4202	}
				4203	}
				4204	}
				4205
				4206	ret = 0;
				4207
				4208	out:
				4209	__free_recorded_refs(&check_dirs);
				4210	free_recorded_refs(sctx);
				4211	fs_path_free(valid_path);
				4212	return ret;
				4213	}
				4214
				4215	static int record_ref(struct btrfs_root root, u64 dir, struct fs_path name,
				4216	void ctx, struct list_head refs)
				4217	{
				4218	int ret = 0;
				4219	struct send_ctx *sctx = ctx;
				4220	struct fs_path *p;
				4221	u64 gen;
				4222
				4223	p = fs_path_alloc();
				4224	if (!p)
				4225	return -ENOMEM;
				4226
				4227	ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
				4228	NULL, NULL);
				4229	if (ret < 0)
				4230	goto out;
				4231
				4232	ret = get_cur_path(sctx, dir, gen, p);
				4233	if (ret < 0)
				4234	goto out;
				4235	ret = fs_path_add_path(p, name);
				4236	if (ret < 0)
				4237	goto out;
				4238
				4239	ret = __record_ref(refs, dir, gen, p);
				4240
				4241	out:
				4242	if (ret)
				4243	fs_path_free(p);
				4244	return ret;
				4245	}
				4246
				4247	static int __record_new_ref(int num, u64 dir, int index,
				4248	struct fs_path *name,
				4249	void *ctx)
				4250	{
				4251	struct send_ctx *sctx = ctx;
				4252	return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
				4253	}
				4254
				4255
				4256	static int __record_deleted_ref(int num, u64 dir, int index,
				4257	struct fs_path *name,
				4258	void *ctx)
				4259	{
				4260	struct send_ctx *sctx = ctx;
				4261	return record_ref(sctx->parent_root, dir, name, ctx,
				4262	&sctx->deleted_refs);
				4263	}
				4264
				4265	static int record_new_ref(struct send_ctx *sctx)
				4266	{
				4267	int ret;
				4268
				4269	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
				4270	sctx->cmp_key, 0, __record_new_ref, sctx);
				4271	if (ret < 0)
				4272	goto out;
				4273	ret = 0;
				4274
				4275	out:
				4276	return ret;
				4277	}
				4278
				4279	static int record_deleted_ref(struct send_ctx *sctx)
				4280	{
				4281	int ret;
				4282
				4283	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
				4284	sctx->cmp_key, 0, __record_deleted_ref, sctx);
				4285	if (ret < 0)
				4286	goto out;
				4287	ret = 0;
				4288
				4289	out:
				4290	return ret;
				4291	}
				4292
				4293	struct find_ref_ctx {
				4294	u64 dir;
				4295	u64 dir_gen;
				4296	struct btrfs_root *root;
				4297	struct fs_path *name;
				4298	int found_idx;
				4299	};
				4300
				4301	static int __find_iref(int num, u64 dir, int index,
				4302	struct fs_path *name,
				4303	void *ctx_)
				4304	{
				4305	struct find_ref_ctx *ctx = ctx_;
				4306	u64 dir_gen;
				4307	int ret;
				4308
				4309	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
				4310	strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
				4311	/*
				4312	* To avoid doing extra lookups we'll only do this if everything
				4313	* else matches.
				4314	*/
				4315	ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
				4316	NULL, NULL, NULL);
				4317	if (ret)
				4318	return ret;
				4319	if (dir_gen != ctx->dir_gen)
				4320	return 0;
				4321	ctx->found_idx = num;
				4322	return 1;
				4323	}
				4324	return 0;
				4325	}
				4326
				4327	static int find_iref(struct btrfs_root *root,
				4328	struct btrfs_path *path,
				4329	struct btrfs_key *key,
				4330	u64 dir, u64 dir_gen, struct fs_path *name)
				4331	{
				4332	int ret;
				4333	struct find_ref_ctx ctx;
				4334
				4335	ctx.dir = dir;
				4336	ctx.name = name;
				4337	ctx.dir_gen = dir_gen;
				4338	ctx.found_idx = -1;
				4339	ctx.root = root;
				4340
				4341	ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
				4342	if (ret < 0)
				4343	return ret;
				4344
				4345	if (ctx.found_idx == -1)
				4346	return -ENOENT;
				4347
				4348	return ctx.found_idx;
				4349	}
				4350
				4351	static int __record_changed_new_ref(int num, u64 dir, int index,
				4352	struct fs_path *name,
				4353	void *ctx)
				4354	{
				4355	u64 dir_gen;
				4356	int ret;
				4357	struct send_ctx *sctx = ctx;
				4358
				4359	ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
				4360	NULL, NULL, NULL);
				4361	if (ret)
				4362	return ret;
				4363
				4364	ret = find_iref(sctx->parent_root, sctx->right_path,
				4365	sctx->cmp_key, dir, dir_gen, name);
				4366	if (ret == -ENOENT)
				4367	ret = __record_new_ref(num, dir, index, name, sctx);
				4368	else if (ret > 0)
				4369	ret = 0;
				4370
				4371	return ret;
				4372	}
				4373
				4374	static int __record_changed_deleted_ref(int num, u64 dir, int index,
				4375	struct fs_path *name,
				4376	void *ctx)
				4377	{
				4378	u64 dir_gen;
				4379	int ret;
				4380	struct send_ctx *sctx = ctx;
				4381
				4382	ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
				4383	NULL, NULL, NULL);
				4384	if (ret)
				4385	return ret;
				4386
				4387	ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
				4388	dir, dir_gen, name);
				4389	if (ret == -ENOENT)
				4390	ret = __record_deleted_ref(num, dir, index, name, sctx);
				4391	else if (ret > 0)
				4392	ret = 0;
				4393
				4394	return ret;
				4395	}
				4396
				4397	static int record_changed_ref(struct send_ctx *sctx)
				4398	{
				4399	int ret = 0;
				4400
				4401	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
				4402	sctx->cmp_key, 0, __record_changed_new_ref, sctx);
				4403	if (ret < 0)
				4404	goto out;
				4405	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
				4406	sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
				4407	if (ret < 0)
				4408	goto out;
				4409	ret = 0;
				4410
				4411	out:
				4412	return ret;
				4413	}
				4414
				4415	/*
				4416	* Record and process all refs at once. Needed when an inode changes the
				4417	* generation number, which means that it was deleted and recreated.
				4418	*/
				4419	static int process_all_refs(struct send_ctx *sctx,
				4420	enum btrfs_compare_tree_result cmd)
				4421	{
				4422	int ret;
				4423	struct btrfs_root *root;
				4424	struct btrfs_path *path;
				4425	struct btrfs_key key;
				4426	struct btrfs_key found_key;
				4427	struct extent_buffer *eb;
				4428	int slot;
				4429	iterate_inode_ref_t cb;
				4430	int pending_move = 0;
				4431
				4432	path = alloc_path_for_send();
				4433	if (!path)
				4434	return -ENOMEM;
				4435
				4436	if (cmd == BTRFS_COMPARE_TREE_NEW) {
				4437	root = sctx->send_root;
				4438	cb = __record_new_ref;
				4439	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
				4440	root = sctx->parent_root;
				4441	cb = __record_deleted_ref;
				4442	} else {
				4443	btrfs_err(sctx->send_root->fs_info,
				4444	"Wrong command %d in process_all_refs", cmd);
				4445	ret = -EINVAL;
				4446	goto out;
				4447	}
				4448
				4449	key.objectid = sctx->cmp_key->objectid;
				4450	key.type = BTRFS_INODE_REF_KEY;
				4451	key.offset = 0;
				4452	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				4453	if (ret < 0)
				4454	goto out;
				4455
				4456	while (1) {
				4457	eb = path->nodes[0];
				4458	slot = path->slots[0];
				4459	if (slot >= btrfs_header_nritems(eb)) {
				4460	ret = btrfs_next_leaf(root, path);
				4461	if (ret < 0)
				4462	goto out;
				4463	else if (ret > 0)
				4464	break;
				4465	continue;
				4466	}
				4467
				4468	btrfs_item_key_to_cpu(eb, &found_key, slot);
				4469
				4470	if (found_key.objectid != key.objectid \|\|
				4471	(found_key.type != BTRFS_INODE_REF_KEY &&
				4472	found_key.type != BTRFS_INODE_EXTREF_KEY))
				4473	break;
				4474
				4475	ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
				4476	if (ret < 0)
				4477	goto out;
				4478
				4479	path->slots[0]++;
				4480	}
				4481	btrfs_release_path(path);
				4482
				4483	/*
				4484	* We don't actually care about pending_move as we are simply
				4485	* re-creating this inode and will be rename'ing it into place once we
				4486	* rename the parent directory.
				4487	*/
				4488	ret = process_recorded_refs(sctx, &pending_move);
				4489	out:
				4490	btrfs_free_path(path);
				4491	return ret;
				4492	}
				4493
				4494	static int send_set_xattr(struct send_ctx *sctx,
				4495	struct fs_path *path,
				4496	const char *name, int name_len,
				4497	const char *data, int data_len)
				4498	{
				4499	int ret = 0;
				4500
				4501	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
				4502	if (ret < 0)
				4503	goto out;
				4504
				4505	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
				4506	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
				4507	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
				4508
				4509	ret = send_cmd(sctx);
				4510
				4511	tlv_put_failure:
				4512	out:
				4513	return ret;
				4514	}
				4515
				4516	static int send_remove_xattr(struct send_ctx *sctx,
				4517	struct fs_path *path,
				4518	const char *name, int name_len)
				4519	{
				4520	int ret = 0;
				4521
				4522	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
				4523	if (ret < 0)
				4524	goto out;
				4525
				4526	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
				4527	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
				4528
				4529	ret = send_cmd(sctx);
				4530
				4531	tlv_put_failure:
				4532	out:
				4533	return ret;
				4534	}
				4535
				4536	static int __process_new_xattr(int num, struct btrfs_key *di_key,
				4537	const char *name, int name_len,
				4538	const char *data, int data_len,
				4539	u8 type, void *ctx)
				4540	{
				4541	int ret;
				4542	struct send_ctx *sctx = ctx;
				4543	struct fs_path *p;
				4544	struct posix_acl_xattr_header dummy_acl;
				4545
				4546	p = fs_path_alloc();
				4547	if (!p)
				4548	return -ENOMEM;
				4549
				4550	/*
				4551	* This hack is needed because empty acls are stored as zero byte
				4552	* data in xattrs. Problem with that is, that receiving these zero byte
				4553	* acls will fail later. To fix this, we send a dummy acl list that
				4554	* only contains the version number and no entries.
				4555	*/
				4556	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) \|\|
				4557	!strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
				4558	if (data_len == 0) {
				4559	dummy_acl.a_version =
				4560	cpu_to_le32(POSIX_ACL_XATTR_VERSION);
				4561	data = (char *)&dummy_acl;
				4562	data_len = sizeof(dummy_acl);
				4563	}
				4564	}
				4565
				4566	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
				4567	if (ret < 0)
				4568	goto out;
				4569
				4570	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
				4571
				4572	out:
				4573	fs_path_free(p);
				4574	return ret;
				4575	}
				4576
				4577	static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
				4578	const char *name, int name_len,
				4579	const char *data, int data_len,
				4580	u8 type, void *ctx)
				4581	{
				4582	int ret;
				4583	struct send_ctx *sctx = ctx;
				4584	struct fs_path *p;
				4585
				4586	p = fs_path_alloc();
				4587	if (!p)
				4588	return -ENOMEM;
				4589
				4590	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
				4591	if (ret < 0)
				4592	goto out;
				4593
				4594	ret = send_remove_xattr(sctx, p, name, name_len);
				4595
				4596	out:
				4597	fs_path_free(p);
				4598	return ret;
				4599	}
				4600
				4601	static int process_new_xattr(struct send_ctx *sctx)
				4602	{
				4603	int ret = 0;
				4604
				4605	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
				4606	__process_new_xattr, sctx);
				4607
				4608	return ret;
				4609	}
				4610
				4611	static int process_deleted_xattr(struct send_ctx *sctx)
				4612	{
				4613	return iterate_dir_item(sctx->parent_root, sctx->right_path,
				4614	__process_deleted_xattr, sctx);
				4615	}
				4616
				4617	struct find_xattr_ctx {
				4618	const char *name;
				4619	int name_len;
				4620	int found_idx;
				4621	char *found_data;
				4622	int found_data_len;
				4623	};
				4624
				4625	static int __find_xattr(int num, struct btrfs_key *di_key,
				4626	const char *name, int name_len,
				4627	const char *data, int data_len,
				4628	u8 type, void *vctx)
				4629	{
				4630	struct find_xattr_ctx *ctx = vctx;
				4631
				4632	if (name_len == ctx->name_len &&
				4633	strncmp(name, ctx->name, name_len) == 0) {
				4634	ctx->found_idx = num;
				4635	ctx->found_data_len = data_len;
				4636	ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
				4637	if (!ctx->found_data)
				4638	return -ENOMEM;
				4639	return 1;
				4640	}
				4641	return 0;
				4642	}
				4643
				4644	static int find_xattr(struct btrfs_root *root,
				4645	struct btrfs_path *path,
				4646	struct btrfs_key *key,
				4647	const char *name, int name_len,
				4648	char *data, int data_len)
				4649	{
				4650	int ret;
				4651	struct find_xattr_ctx ctx;
				4652
				4653	ctx.name = name;
				4654	ctx.name_len = name_len;
				4655	ctx.found_idx = -1;
				4656	ctx.found_data = NULL;
				4657	ctx.found_data_len = 0;
				4658
				4659	ret = iterate_dir_item(root, path, __find_xattr, &ctx);
				4660	if (ret < 0)
				4661	return ret;
				4662
				4663	if (ctx.found_idx == -1)
				4664	return -ENOENT;
				4665	if (data) {
				4666	*data = ctx.found_data;
				4667	*data_len = ctx.found_data_len;
				4668	} else {
				4669	kfree(ctx.found_data);
				4670	}
				4671	return ctx.found_idx;
				4672	}
				4673
				4674
				4675	static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
				4676	const char *name, int name_len,
				4677	const char *data, int data_len,
				4678	u8 type, void *ctx)
				4679	{
				4680	int ret;
				4681	struct send_ctx *sctx = ctx;
				4682	char *found_data = NULL;
				4683	int found_data_len = 0;
				4684
				4685	ret = find_xattr(sctx->parent_root, sctx->right_path,
				4686	sctx->cmp_key, name, name_len, &found_data,
				4687	&found_data_len);
				4688	if (ret == -ENOENT) {
				4689	ret = __process_new_xattr(num, di_key, name, name_len, data,
				4690	data_len, type, ctx);
				4691	} else if (ret >= 0) {
				4692	if (data_len != found_data_len \|\|
				4693	memcmp(data, found_data, data_len)) {
				4694	ret = __process_new_xattr(num, di_key, name, name_len,
				4695	data, data_len, type, ctx);
				4696	} else {
				4697	ret = 0;
				4698	}
				4699	}
				4700
				4701	kfree(found_data);
				4702	return ret;
				4703	}
				4704
				4705	static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
				4706	const char *name, int name_len,
				4707	const char *data, int data_len,
				4708	u8 type, void *ctx)
				4709	{
				4710	int ret;
				4711	struct send_ctx *sctx = ctx;
				4712
				4713	ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
				4714	name, name_len, NULL, NULL);
				4715	if (ret == -ENOENT)
				4716	ret = __process_deleted_xattr(num, di_key, name, name_len, data,
				4717	data_len, type, ctx);
				4718	else if (ret >= 0)
				4719	ret = 0;
				4720
				4721	return ret;
				4722	}
				4723
				4724	static int process_changed_xattr(struct send_ctx *sctx)
				4725	{
				4726	int ret = 0;
				4727
				4728	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
				4729	__process_changed_new_xattr, sctx);
				4730	if (ret < 0)
				4731	goto out;
				4732	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
				4733	__process_changed_deleted_xattr, sctx);
				4734
				4735	out:
				4736	return ret;
				4737	}
				4738
				4739	static int process_all_new_xattrs(struct send_ctx *sctx)
				4740	{
				4741	int ret;
				4742	struct btrfs_root *root;
				4743	struct btrfs_path *path;
				4744	struct btrfs_key key;
				4745	struct btrfs_key found_key;
				4746	struct extent_buffer *eb;
				4747	int slot;
				4748
				4749	path = alloc_path_for_send();
				4750	if (!path)
				4751	return -ENOMEM;
				4752
				4753	root = sctx->send_root;
				4754
				4755	key.objectid = sctx->cmp_key->objectid;
				4756	key.type = BTRFS_XATTR_ITEM_KEY;
				4757	key.offset = 0;
				4758	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				4759	if (ret < 0)
				4760	goto out;
				4761
				4762	while (1) {
				4763	eb = path->nodes[0];
				4764	slot = path->slots[0];
				4765	if (slot >= btrfs_header_nritems(eb)) {
				4766	ret = btrfs_next_leaf(root, path);
				4767	if (ret < 0) {
				4768	goto out;
				4769	} else if (ret > 0) {
				4770	ret = 0;
				4771	break;
				4772	}
				4773	continue;
				4774	}
				4775
				4776	btrfs_item_key_to_cpu(eb, &found_key, slot);
				4777	if (found_key.objectid != key.objectid \|\|
				4778	found_key.type != key.type) {
				4779	ret = 0;
				4780	goto out;
				4781	}
				4782
				4783	ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
				4784	if (ret < 0)
				4785	goto out;
				4786
				4787	path->slots[0]++;
				4788	}
				4789
				4790	out:
				4791	btrfs_free_path(path);
				4792	return ret;
				4793	}
				4794
				4795	static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
				4796	{
				4797	struct btrfs_root *root = sctx->send_root;
				4798	struct btrfs_fs_info *fs_info = root->fs_info;
				4799	struct inode *inode;
				4800	struct page *page;
				4801	char *addr;
				4802	struct btrfs_key key;
				4803	pgoff_t index = offset >> PAGE_SHIFT;
				4804	pgoff_t last_index;
				4805	unsigned pg_offset = offset & ~PAGE_MASK;
				4806	ssize_t ret = 0;
				4807
				4808	key.objectid = sctx->cur_ino;
				4809	key.type = BTRFS_INODE_ITEM_KEY;
				4810	key.offset = 0;
				4811
				4812	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
				4813	if (IS_ERR(inode))
				4814	return PTR_ERR(inode);
				4815
				4816	if (offset + len > i_size_read(inode)) {
				4817	if (offset > i_size_read(inode))
				4818	len = 0;
				4819	else
				4820	len = offset - i_size_read(inode);
				4821	}
				4822	if (len == 0)
				4823	goto out;
				4824
				4825	last_index = (offset + len - 1) >> PAGE_SHIFT;
				4826
				4827	/* initial readahead */
				4828	memset(&sctx->ra, 0, sizeof(struct file_ra_state));
				4829	file_ra_state_init(&sctx->ra, inode->i_mapping);
				4830
				4831	while (index <= last_index) {
				4832	unsigned cur_len = min_t(unsigned, len,
				4833	PAGE_SIZE - pg_offset);
				4834
				4835	page = find_lock_page(inode->i_mapping, index);
				4836	if (!page) {
				4837	page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
				4838	NULL, index, last_index + 1 - index);
				4839
				4840	page = find_or_create_page(inode->i_mapping, index,
				4841	GFP_KERNEL);
				4842	if (!page) {
				4843	ret = -ENOMEM;
				4844	break;
				4845	}
				4846	}
				4847
				4848	if (PageReadahead(page)) {
				4849	page_cache_async_readahead(inode->i_mapping, &sctx->ra,
				4850	NULL, page, index, last_index + 1 - index);
				4851	}
				4852
				4853	if (!PageUptodate(page)) {
				4854	btrfs_readpage(NULL, page);
				4855	lock_page(page);
				4856	if (!PageUptodate(page)) {
				4857	unlock_page(page);
				4858	put_page(page);
				4859	ret = -EIO;
				4860	break;
				4861	}
				4862	}
				4863
				4864	addr = kmap(page);
				4865	memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
				4866	kunmap(page);
				4867	unlock_page(page);
				4868	put_page(page);
				4869	index++;
				4870	pg_offset = 0;
				4871	len -= cur_len;
				4872	ret += cur_len;
				4873	}
				4874	out:
				4875	iput(inode);
				4876	return ret;
				4877	}
				4878
				4879	/*
				4880	* Read some bytes from the current inode/file and send a write command to
				4881	* user space.
				4882	*/
				4883	static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
				4884	{
				4885	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
				4886	int ret = 0;
				4887	struct fs_path *p;
				4888	ssize_t num_read = 0;
				4889
				4890	p = fs_path_alloc();
				4891	if (!p)
				4892	return -ENOMEM;
				4893
				4894	btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
				4895
				4896	num_read = fill_read_buf(sctx, offset, len);
				4897	if (num_read <= 0) {
				4898	if (num_read < 0)
				4899	ret = num_read;
				4900	goto out;
				4901	}
				4902
				4903	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
				4904	if (ret < 0)
				4905	goto out;
				4906
				4907	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
				4908	if (ret < 0)
				4909	goto out;
				4910
				4911	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				4912	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
				4913	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
				4914
				4915	ret = send_cmd(sctx);
				4916
				4917	tlv_put_failure:
				4918	out:
				4919	fs_path_free(p);
				4920	if (ret < 0)
				4921	return ret;
				4922	return num_read;
				4923	}
				4924
				4925	/*
				4926	* Send a clone command to user space.
				4927	*/
				4928	static int send_clone(struct send_ctx *sctx,
				4929	u64 offset, u32 len,
				4930	struct clone_root *clone_root)
				4931	{
				4932	int ret = 0;
				4933	struct fs_path *p;
				4934	u64 gen;
				4935
				4936	btrfs_debug(sctx->send_root->fs_info,
				4937	"send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
				4938	offset, len, clone_root->root->objectid, clone_root->ino,
				4939	clone_root->offset);
				4940
				4941	p = fs_path_alloc();
				4942	if (!p)
				4943	return -ENOMEM;
				4944
				4945	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
				4946	if (ret < 0)
				4947	goto out;
				4948
				4949	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
				4950	if (ret < 0)
				4951	goto out;
				4952
				4953	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
				4954	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
				4955	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				4956
				4957	if (clone_root->root == sctx->send_root) {
				4958	ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
				4959	&gen, NULL, NULL, NULL, NULL);
				4960	if (ret < 0)
				4961	goto out;
				4962	ret = get_cur_path(sctx, clone_root->ino, gen, p);
				4963	} else {
				4964	ret = get_inode_path(clone_root->root, clone_root->ino, p);
				4965	}
				4966	if (ret < 0)
				4967	goto out;
				4968
				4969	/*
				4970	* If the parent we're using has a received_uuid set then use that as
				4971	* our clone source as that is what we will look for when doing a
				4972	* receive.
				4973	*
				4974	* This covers the case that we create a snapshot off of a received
				4975	* subvolume and then use that as the parent and try to receive on a
				4976	* different host.
				4977	*/
				4978	if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
				4979	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
				4980	clone_root->root->root_item.received_uuid);
				4981	else
				4982	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
				4983	clone_root->root->root_item.uuid);
				4984	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
				4985	le64_to_cpu(clone_root->root->root_item.ctransid));
				4986	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
				4987	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
				4988	clone_root->offset);
				4989
				4990	ret = send_cmd(sctx);
				4991
				4992	tlv_put_failure:
				4993	out:
				4994	fs_path_free(p);
				4995	return ret;
				4996	}
				4997
				4998	/*
				4999	* Send an update extent command to user space.
				5000	*/
				5001	static int send_update_extent(struct send_ctx *sctx,
				5002	u64 offset, u32 len)
				5003	{
				5004	int ret = 0;
				5005	struct fs_path *p;
				5006
				5007	p = fs_path_alloc();
				5008	if (!p)
				5009	return -ENOMEM;
				5010
				5011	ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
				5012	if (ret < 0)
				5013	goto out;
				5014
				5015	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
				5016	if (ret < 0)
				5017	goto out;
				5018
				5019	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				5020	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
				5021	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
				5022
				5023	ret = send_cmd(sctx);
				5024
				5025	tlv_put_failure:
				5026	out:
				5027	fs_path_free(p);
				5028	return ret;
				5029	}
				5030
				5031	static int send_hole(struct send_ctx *sctx, u64 end)
				5032	{
				5033	struct fs_path *p = NULL;
				5034	u64 offset = sctx->cur_inode_last_extent;
				5035	u64 len;
				5036	int ret = 0;
				5037
				5038	/*
				5039	* A hole that starts at EOF or beyond it. Since we do not yet support
				5040	* fallocate (for extent preallocation and hole punching), sending a
				5041	* write of zeroes starting at EOF or beyond would later require issuing
				5042	* a truncate operation which would undo the write and achieve nothing.
				5043	*/
				5044	if (offset >= sctx->cur_inode_size)
				5045	return 0;
				5046
				5047	/*
				5048	* Don't go beyond the inode's i_size due to prealloc extents that start
				5049	* after the i_size.
				5050	*/
				5051	end = min_t(u64, end, sctx->cur_inode_size);
				5052
				5053	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
				5054	return send_update_extent(sctx, offset, end - offset);
				5055
				5056	p = fs_path_alloc();
				5057	if (!p)
				5058	return -ENOMEM;
				5059	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
				5060	if (ret < 0)
				5061	goto tlv_put_failure;
				5062	memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
				5063	while (offset < end) {
				5064	len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
				5065
				5066	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
				5067	if (ret < 0)
				5068	break;
				5069	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
				5070	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
				5071	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
				5072	ret = send_cmd(sctx);
				5073	if (ret < 0)
				5074	break;
				5075	offset += len;
				5076	}
				5077	sctx->cur_inode_next_write_offset = offset;
				5078	tlv_put_failure:
				5079	fs_path_free(p);
				5080	return ret;
				5081	}
				5082
				5083	static int send_extent_data(struct send_ctx *sctx,
				5084	const u64 offset,
				5085	const u64 len)
				5086	{
				5087	u64 sent = 0;
				5088
				5089	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
				5090	return send_update_extent(sctx, offset, len);
				5091
				5092	while (sent < len) {
				5093	u64 size = len - sent;
				5094	int ret;
				5095
				5096	if (size > BTRFS_SEND_READ_SIZE)
				5097	size = BTRFS_SEND_READ_SIZE;
				5098	ret = send_write(sctx, offset + sent, size);
				5099	if (ret < 0)
				5100	return ret;
				5101	if (!ret)
				5102	break;
				5103	sent += ret;
				5104	}
				5105	return 0;
				5106	}
				5107
				5108	static int clone_range(struct send_ctx *sctx,
				5109	struct clone_root *clone_root,
				5110	const u64 disk_byte,
				5111	u64 data_offset,
				5112	u64 offset,
				5113	u64 len)
				5114	{
				5115	struct btrfs_path *path;
				5116	struct btrfs_key key;
				5117	int ret;
				5118
				5119	/*
				5120	* Prevent cloning from a zero offset with a length matching the sector
				5121	* size because in some scenarios this will make the receiver fail.
				5122	*
				5123	* For example, if in the source filesystem the extent at offset 0
				5124	* has a length of sectorsize and it was written using direct IO, then
				5125	* it can never be an inline extent (even if compression is enabled).
				5126	* Then this extent can be cloned in the original filesystem to a non
				5127	* zero file offset, but it may not be possible to clone in the
				5128	* destination filesystem because it can be inlined due to compression
				5129	* on the destination filesystem (as the receiver's write operations are
				5130	* always done using buffered IO). The same happens when the original
				5131	* filesystem does not have compression enabled but the destination
				5132	* filesystem has.
				5133	*/
				5134	if (clone_root->offset == 0 &&
				5135	len == sctx->send_root->fs_info->sectorsize)
				5136	return send_extent_data(sctx, offset, len);
				5137
				5138	path = alloc_path_for_send();
				5139	if (!path)
				5140	return -ENOMEM;
				5141
				5142	/*
				5143	* We can't send a clone operation for the entire range if we find
				5144	* extent items in the respective range in the source file that
				5145	* refer to different extents or if we find holes.
				5146	* So check for that and do a mix of clone and regular write/copy
				5147	* operations if needed.
				5148	*
				5149	* Example:
				5150	*
				5151	* mkfs.btrfs -f /dev/sda
				5152	* mount /dev/sda /mnt
				5153	* xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
				5154	* cp --reflink=always /mnt/foo /mnt/bar
				5155	* xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
				5156	* btrfs subvolume snapshot -r /mnt /mnt/snap
				5157	*
				5158	* If when we send the snapshot and we are processing file bar (which
				5159	* has a higher inode number than foo) we blindly send a clone operation
				5160	* for the [0, 100K[ range from foo to bar, the receiver ends up getting
				5161	* a file bar that matches the content of file foo - iow, doesn't match
				5162	* the content from bar in the original filesystem.
				5163	*/
				5164	key.objectid = clone_root->ino;
				5165	key.type = BTRFS_EXTENT_DATA_KEY;
				5166	key.offset = clone_root->offset;
				5167	ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
				5168	if (ret < 0)
				5169	goto out;
				5170	if (ret > 0 && path->slots[0] > 0) {
				5171	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
				5172	if (key.objectid == clone_root->ino &&
				5173	key.type == BTRFS_EXTENT_DATA_KEY)
				5174	path->slots[0]--;
				5175	}
				5176
				5177	while (true) {
				5178	struct extent_buffer *leaf = path->nodes[0];
				5179	int slot = path->slots[0];
				5180	struct btrfs_file_extent_item *ei;
				5181	u8 type;
				5182	u64 ext_len;
				5183	u64 clone_len;
				5184
				5185	if (slot >= btrfs_header_nritems(leaf)) {
				5186	ret = btrfs_next_leaf(clone_root->root, path);
				5187	if (ret < 0)
				5188	goto out;
				5189	else if (ret > 0)
				5190	break;
				5191	continue;
				5192	}
				5193
				5194	btrfs_item_key_to_cpu(leaf, &key, slot);
				5195
				5196	/*
				5197	* We might have an implicit trailing hole (NO_HOLES feature
				5198	* enabled). We deal with it after leaving this loop.
				5199	*/
				5200	if (key.objectid != clone_root->ino \|\|
				5201	key.type != BTRFS_EXTENT_DATA_KEY)
				5202	break;
				5203
				5204	ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
				5205	type = btrfs_file_extent_type(leaf, ei);
				5206	if (type == BTRFS_FILE_EXTENT_INLINE) {
				5207	ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
				5208	ext_len = PAGE_ALIGN(ext_len);
				5209	} else {
				5210	ext_len = btrfs_file_extent_num_bytes(leaf, ei);
				5211	}
				5212
				5213	if (key.offset + ext_len <= clone_root->offset)
				5214	goto next;
				5215
				5216	if (key.offset > clone_root->offset) {
				5217	/* Implicit hole, NO_HOLES feature enabled. */
				5218	u64 hole_len = key.offset - clone_root->offset;
				5219
				5220	if (hole_len > len)
				5221	hole_len = len;
				5222	ret = send_extent_data(sctx, offset, hole_len);
				5223	if (ret < 0)
				5224	goto out;
				5225
				5226	len -= hole_len;
				5227	if (len == 0)
				5228	break;
				5229	offset += hole_len;
				5230	clone_root->offset += hole_len;
				5231	data_offset += hole_len;
				5232	}
				5233
				5234	if (key.offset >= clone_root->offset + len)
				5235	break;
				5236
				5237	clone_len = min_t(u64, ext_len, len);
				5238
				5239	if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
				5240	btrfs_file_extent_offset(leaf, ei) == data_offset)
				5241	ret = send_clone(sctx, offset, clone_len, clone_root);
				5242	else
				5243	ret = send_extent_data(sctx, offset, clone_len);
				5244
				5245	if (ret < 0)
				5246	goto out;
				5247
				5248	len -= clone_len;
				5249	if (len == 0)
				5250	break;
				5251	offset += clone_len;
				5252	clone_root->offset += clone_len;
				5253	data_offset += clone_len;
				5254	next:
				5255	path->slots[0]++;
				5256	}
				5257
				5258	if (len > 0)
				5259	ret = send_extent_data(sctx, offset, len);
				5260	else
				5261	ret = 0;
				5262	out:
				5263	btrfs_free_path(path);
				5264	return ret;
				5265	}
				5266
				5267	static int send_write_or_clone(struct send_ctx *sctx,
				5268	struct btrfs_path *path,
				5269	struct btrfs_key *key,
				5270	struct clone_root *clone_root)
				5271	{
				5272	int ret = 0;
				5273	struct btrfs_file_extent_item *ei;
				5274	u64 offset = key->offset;
				5275	u64 len;
				5276	u8 type;
				5277	u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
				5278
				5279	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
				5280	struct btrfs_file_extent_item);
				5281	type = btrfs_file_extent_type(path->nodes[0], ei);
				5282	if (type == BTRFS_FILE_EXTENT_INLINE) {
				5283	len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
				5284	/*
				5285	* it is possible the inline item won't cover the whole page,
				5286	* but there may be items after this page. Make
				5287	* sure to send the whole thing
				5288	*/
				5289	len = PAGE_ALIGN(len);
				5290	} else {
				5291	len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
				5292	}
				5293
				5294	if (offset >= sctx->cur_inode_size) {
				5295	ret = 0;
				5296	goto out;
				5297	}
				5298	if (offset + len > sctx->cur_inode_size)
				5299	len = sctx->cur_inode_size - offset;
				5300	if (len == 0) {
				5301	ret = 0;
				5302	goto out;
				5303	}
				5304
				5305	if (clone_root && IS_ALIGNED(offset + len, bs)) {
				5306	u64 disk_byte;
				5307	u64 data_offset;
				5308
				5309	disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
				5310	data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
				5311	ret = clone_range(sctx, clone_root, disk_byte, data_offset,
				5312	offset, len);
				5313	} else {
				5314	ret = send_extent_data(sctx, offset, len);
				5315	}
				5316	sctx->cur_inode_next_write_offset = offset + len;
				5317	out:
				5318	return ret;
				5319	}
				5320
				5321	static int is_extent_unchanged(struct send_ctx *sctx,
				5322	struct btrfs_path *left_path,
				5323	struct btrfs_key *ekey)
				5324	{
				5325	int ret = 0;
				5326	struct btrfs_key key;
				5327	struct btrfs_path *path = NULL;
				5328	struct extent_buffer *eb;
				5329	int slot;
				5330	struct btrfs_key found_key;
				5331	struct btrfs_file_extent_item *ei;
				5332	u64 left_disknr;
				5333	u64 right_disknr;
				5334	u64 left_offset;
				5335	u64 right_offset;
				5336	u64 left_offset_fixed;
				5337	u64 left_len;
				5338	u64 right_len;
				5339	u64 left_gen;
				5340	u64 right_gen;
				5341	u8 left_type;
				5342	u8 right_type;
				5343
				5344	path = alloc_path_for_send();
				5345	if (!path)
				5346	return -ENOMEM;
				5347
				5348	eb = left_path->nodes[0];
				5349	slot = left_path->slots[0];
				5350	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
				5351	left_type = btrfs_file_extent_type(eb, ei);
				5352
				5353	if (left_type != BTRFS_FILE_EXTENT_REG) {
				5354	ret = 0;
				5355	goto out;
				5356	}
				5357	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
				5358	left_len = btrfs_file_extent_num_bytes(eb, ei);
				5359	left_offset = btrfs_file_extent_offset(eb, ei);
				5360	left_gen = btrfs_file_extent_generation(eb, ei);
				5361
				5362	/*
				5363	* Following comments will refer to these graphics. L is the left
				5364	* extents which we are checking at the moment. 1-8 are the right
				5365	* extents that we iterate.
				5366	*
				5367	* \|-----L-----\|
				5368	* \|-1-\|-2a-\|-3-\|-4-\|-5-\|-6-\|
				5369	*
				5370	* \|-----L-----\|
				5371	* \|--1--\|-2b-\|...(same as above)
				5372	*
				5373	* Alternative situation. Happens on files where extents got split.
				5374	* \|-----L-----\|
				5375	* \|-----------7-----------\|-6-\|
				5376	*
				5377	* Alternative situation. Happens on files which got larger.
				5378	* \|-----L-----\|
				5379	* \|-8-\|
				5380	* Nothing follows after 8.
				5381	*/
				5382
				5383	key.objectid = ekey->objectid;
				5384	key.type = BTRFS_EXTENT_DATA_KEY;
				5385	key.offset = ekey->offset;
				5386	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
				5387	if (ret < 0)
				5388	goto out;
				5389	if (ret) {
				5390	ret = 0;
				5391	goto out;
				5392	}
				5393
				5394	/*
				5395	* Handle special case where the right side has no extents at all.
				5396	*/
				5397	eb = path->nodes[0];
				5398	slot = path->slots[0];
				5399	btrfs_item_key_to_cpu(eb, &found_key, slot);
				5400	if (found_key.objectid != key.objectid \|\|
				5401	found_key.type != key.type) {
				5402	/* If we're a hole then just pretend nothing changed */
				5403	ret = (left_disknr) ? 0 : 1;
				5404	goto out;
				5405	}
				5406
				5407	/*
				5408	* We're now on 2a, 2b or 7.
				5409	*/
				5410	key = found_key;
				5411	while (key.offset < ekey->offset + left_len) {
				5412	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
				5413	right_type = btrfs_file_extent_type(eb, ei);
				5414	if (right_type != BTRFS_FILE_EXTENT_REG &&
				5415	right_type != BTRFS_FILE_EXTENT_INLINE) {
				5416	ret = 0;
				5417	goto out;
				5418	}
				5419
				5420	if (right_type == BTRFS_FILE_EXTENT_INLINE) {
				5421	right_len = btrfs_file_extent_ram_bytes(eb, ei);
				5422	right_len = PAGE_ALIGN(right_len);
				5423	} else {
				5424	right_len = btrfs_file_extent_num_bytes(eb, ei);
				5425	}
				5426
				5427	/*
				5428	* Are we at extent 8? If yes, we know the extent is changed.
				5429	* This may only happen on the first iteration.
				5430	*/
				5431	if (found_key.offset + right_len <= ekey->offset) {
				5432	/* If we're a hole just pretend nothing changed */
				5433	ret = (left_disknr) ? 0 : 1;
				5434	goto out;
				5435	}
				5436
				5437	/*
				5438	* We just wanted to see if when we have an inline extent, what
				5439	* follows it is a regular extent (wanted to check the above
				5440	* condition for inline extents too). This should normally not
				5441	* happen but it's possible for example when we have an inline
				5442	* compressed extent representing data with a size matching
				5443	* the page size (currently the same as sector size).
				5444	*/
				5445	if (right_type == BTRFS_FILE_EXTENT_INLINE) {
				5446	ret = 0;
				5447	goto out;
				5448	}
				5449
				5450	right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
				5451	right_offset = btrfs_file_extent_offset(eb, ei);
				5452	right_gen = btrfs_file_extent_generation(eb, ei);
				5453
				5454	left_offset_fixed = left_offset;
				5455	if (key.offset < ekey->offset) {
				5456	/* Fix the right offset for 2a and 7. */
				5457	right_offset += ekey->offset - key.offset;
				5458	} else {
				5459	/* Fix the left offset for all behind 2a and 2b */
				5460	left_offset_fixed += key.offset - ekey->offset;
				5461	}
				5462
				5463	/*
				5464	* Check if we have the same extent.
				5465	*/
				5466	if (left_disknr != right_disknr \|\|
				5467	left_offset_fixed != right_offset \|\|
				5468	left_gen != right_gen) {
				5469	ret = 0;
				5470	goto out;
				5471	}
				5472
				5473	/*
				5474	* Go to the next extent.
				5475	*/
				5476	ret = btrfs_next_item(sctx->parent_root, path);
				5477	if (ret < 0)
				5478	goto out;
				5479	if (!ret) {
				5480	eb = path->nodes[0];
				5481	slot = path->slots[0];
				5482	btrfs_item_key_to_cpu(eb, &found_key, slot);
				5483	}
				5484	if (ret \|\| found_key.objectid != key.objectid \|\|
				5485	found_key.type != key.type) {
				5486	key.offset += right_len;
				5487	break;
				5488	}
				5489	if (found_key.offset != key.offset + right_len) {
				5490	ret = 0;
				5491	goto out;
				5492	}
				5493	key = found_key;
				5494	}
				5495
				5496	/*
				5497	* We're now behind the left extent (treat as unchanged) or at the end
				5498	* of the right side (treat as changed).
				5499	*/
				5500	if (key.offset >= ekey->offset + left_len)
				5501	ret = 1;
				5502	else
				5503	ret = 0;
				5504
				5505
				5506	out:
				5507	btrfs_free_path(path);
				5508	return ret;
				5509	}
				5510
				5511	static int get_last_extent(struct send_ctx *sctx, u64 offset)
				5512	{
				5513	struct btrfs_path *path;
				5514	struct btrfs_root *root = sctx->send_root;
				5515	struct btrfs_file_extent_item *fi;
				5516	struct btrfs_key key;
				5517	u64 extent_end;
				5518	u8 type;
				5519	int ret;
				5520
				5521	path = alloc_path_for_send();
				5522	if (!path)
				5523	return -ENOMEM;
				5524
				5525	sctx->cur_inode_last_extent = 0;
				5526
				5527	key.objectid = sctx->cur_ino;
				5528	key.type = BTRFS_EXTENT_DATA_KEY;
				5529	key.offset = offset;
				5530	ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
				5531	if (ret < 0)
				5532	goto out;
				5533	ret = 0;
				5534	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				5535	if (key.objectid != sctx->cur_ino \|\| key.type != BTRFS_EXTENT_DATA_KEY)
				5536	goto out;
				5537
				5538	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
				5539	struct btrfs_file_extent_item);
				5540	type = btrfs_file_extent_type(path->nodes[0], fi);
				5541	if (type == BTRFS_FILE_EXTENT_INLINE) {
				5542	u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
				5543	extent_end = ALIGN(key.offset + size,
				5544	sctx->send_root->fs_info->sectorsize);
				5545	} else {
				5546	extent_end = key.offset +
				5547	btrfs_file_extent_num_bytes(path->nodes[0], fi);
				5548	}
				5549	sctx->cur_inode_last_extent = extent_end;
				5550	out:
				5551	btrfs_free_path(path);
				5552	return ret;
				5553	}
				5554
				5555	static int range_is_hole_in_parent(struct send_ctx *sctx,
				5556	const u64 start,
				5557	const u64 end)
				5558	{
				5559	struct btrfs_path *path;
				5560	struct btrfs_key key;
				5561	struct btrfs_root *root = sctx->parent_root;
				5562	u64 search_start = start;
				5563	int ret;
				5564
				5565	path = alloc_path_for_send();
				5566	if (!path)
				5567	return -ENOMEM;
				5568
				5569	key.objectid = sctx->cur_ino;
				5570	key.type = BTRFS_EXTENT_DATA_KEY;
				5571	key.offset = search_start;
				5572	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				5573	if (ret < 0)
				5574	goto out;
				5575	if (ret > 0 && path->slots[0] > 0)
				5576	path->slots[0]--;
				5577
				5578	while (search_start < end) {
				5579	struct extent_buffer *leaf = path->nodes[0];
				5580	int slot = path->slots[0];
				5581	struct btrfs_file_extent_item *fi;
				5582	u64 extent_end;
				5583
				5584	if (slot >= btrfs_header_nritems(leaf)) {
				5585	ret = btrfs_next_leaf(root, path);
				5586	if (ret < 0)
				5587	goto out;
				5588	else if (ret > 0)
				5589	break;
				5590	continue;
				5591	}
				5592
				5593	btrfs_item_key_to_cpu(leaf, &key, slot);
				5594	if (key.objectid < sctx->cur_ino \|\|
				5595	key.type < BTRFS_EXTENT_DATA_KEY)
				5596	goto next;
				5597	if (key.objectid > sctx->cur_ino \|\|
				5598	key.type > BTRFS_EXTENT_DATA_KEY \|\|
				5599	key.offset >= end)
				5600	break;
				5601
				5602	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
				5603	if (btrfs_file_extent_type(leaf, fi) ==
				5604	BTRFS_FILE_EXTENT_INLINE) {
				5605	u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
				5606
				5607	extent_end = ALIGN(key.offset + size,
				5608	root->fs_info->sectorsize);
				5609	} else {
				5610	extent_end = key.offset +
				5611	btrfs_file_extent_num_bytes(leaf, fi);
				5612	}
				5613	if (extent_end <= start)
				5614	goto next;
				5615	if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
				5616	search_start = extent_end;
				5617	goto next;
				5618	}
				5619	ret = 0;
				5620	goto out;
				5621	next:
				5622	path->slots[0]++;
				5623	}
				5624	ret = 1;
				5625	out:
				5626	btrfs_free_path(path);
				5627	return ret;
				5628	}
				5629
				5630	static int maybe_send_hole(struct send_ctx sctx, struct btrfs_path path,
				5631	struct btrfs_key *key)
				5632	{
				5633	struct btrfs_file_extent_item *fi;
				5634	u64 extent_end;
				5635	u8 type;
				5636	int ret = 0;
				5637
				5638	if (sctx->cur_ino != key->objectid \|\| !need_send_hole(sctx))
				5639	return 0;
				5640
				5641	if (sctx->cur_inode_last_extent == (u64)-1) {
				5642	ret = get_last_extent(sctx, key->offset - 1);
				5643	if (ret)
				5644	return ret;
				5645	}
				5646
				5647	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
				5648	struct btrfs_file_extent_item);
				5649	type = btrfs_file_extent_type(path->nodes[0], fi);
				5650	if (type == BTRFS_FILE_EXTENT_INLINE) {
				5651	u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
				5652	extent_end = ALIGN(key->offset + size,
				5653	sctx->send_root->fs_info->sectorsize);
				5654	} else {
				5655	extent_end = key->offset +
				5656	btrfs_file_extent_num_bytes(path->nodes[0], fi);
				5657	}
				5658
				5659	if (path->slots[0] == 0 &&
				5660	sctx->cur_inode_last_extent < key->offset) {
				5661	/*
				5662	* We might have skipped entire leafs that contained only
				5663	* file extent items for our current inode. These leafs have
				5664	* a generation number smaller (older) than the one in the
				5665	* current leaf and the leaf our last extent came from, and
				5666	* are located between these 2 leafs.
				5667	*/
				5668	ret = get_last_extent(sctx, key->offset - 1);
				5669	if (ret)
				5670	return ret;
				5671	}
				5672
				5673	if (sctx->cur_inode_last_extent < key->offset) {
				5674	ret = range_is_hole_in_parent(sctx,
				5675	sctx->cur_inode_last_extent,
				5676	key->offset);
				5677	if (ret < 0)
				5678	return ret;
				5679	else if (ret == 0)
				5680	ret = send_hole(sctx, key->offset);
				5681	else
				5682	ret = 0;
				5683	}
				5684	sctx->cur_inode_last_extent = extent_end;
				5685	return ret;
				5686	}
				5687
				5688	static int process_extent(struct send_ctx *sctx,
				5689	struct btrfs_path *path,
				5690	struct btrfs_key *key)
				5691	{
				5692	struct clone_root *found_clone = NULL;
				5693	int ret = 0;
				5694
				5695	if (S_ISLNK(sctx->cur_inode_mode))
				5696	return 0;
				5697
				5698	if (sctx->parent_root && !sctx->cur_inode_new) {
				5699	ret = is_extent_unchanged(sctx, path, key);
				5700	if (ret < 0)
				5701	goto out;
				5702	if (ret) {
				5703	ret = 0;
				5704	goto out_hole;
				5705	}
				5706	} else {
				5707	struct btrfs_file_extent_item *ei;
				5708	u8 type;
				5709
				5710	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
				5711	struct btrfs_file_extent_item);
				5712	type = btrfs_file_extent_type(path->nodes[0], ei);
				5713	if (type == BTRFS_FILE_EXTENT_PREALLOC \|\|
				5714	type == BTRFS_FILE_EXTENT_REG) {
				5715	/*
				5716	* The send spec does not have a prealloc command yet,
				5717	* so just leave a hole for prealloc'ed extents until
				5718	* we have enough commands queued up to justify rev'ing
				5719	* the send spec.
				5720	*/
				5721	if (type == BTRFS_FILE_EXTENT_PREALLOC) {
				5722	ret = 0;
				5723	goto out;
				5724	}
				5725
				5726	/* Have a hole, just skip it. */
				5727	if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
				5728	ret = 0;
				5729	goto out;
				5730	}
				5731	}
				5732	}
				5733
				5734	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
				5735	sctx->cur_inode_size, &found_clone);
				5736	if (ret != -ENOENT && ret < 0)
				5737	goto out;
				5738
				5739	ret = send_write_or_clone(sctx, path, key, found_clone);
				5740	if (ret)
				5741	goto out;
				5742	out_hole:
				5743	ret = maybe_send_hole(sctx, path, key);
				5744	out:
				5745	return ret;
				5746	}
				5747
				5748	static int process_all_extents(struct send_ctx *sctx)
				5749	{
				5750	int ret;
				5751	struct btrfs_root *root;
				5752	struct btrfs_path *path;
				5753	struct btrfs_key key;
				5754	struct btrfs_key found_key;
				5755	struct extent_buffer *eb;
				5756	int slot;
				5757
				5758	root = sctx->send_root;
				5759	path = alloc_path_for_send();
				5760	if (!path)
				5761	return -ENOMEM;
				5762
				5763	key.objectid = sctx->cmp_key->objectid;
				5764	key.type = BTRFS_EXTENT_DATA_KEY;
				5765	key.offset = 0;
				5766	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				5767	if (ret < 0)
				5768	goto out;
				5769
				5770	while (1) {
				5771	eb = path->nodes[0];
				5772	slot = path->slots[0];
				5773
				5774	if (slot >= btrfs_header_nritems(eb)) {
				5775	ret = btrfs_next_leaf(root, path);
				5776	if (ret < 0) {
				5777	goto out;
				5778	} else if (ret > 0) {
				5779	ret = 0;
				5780	break;
				5781	}
				5782	continue;
				5783	}
				5784
				5785	btrfs_item_key_to_cpu(eb, &found_key, slot);
				5786
				5787	if (found_key.objectid != key.objectid \|\|
				5788	found_key.type != key.type) {
				5789	ret = 0;
				5790	goto out;
				5791	}
				5792
				5793	ret = process_extent(sctx, path, &found_key);
				5794	if (ret < 0)
				5795	goto out;
				5796
				5797	path->slots[0]++;
				5798	}
				5799
				5800	out:
				5801	btrfs_free_path(path);
				5802	return ret;
				5803	}
				5804
				5805	static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
				5806	int *pending_move,
				5807	int *refs_processed)
				5808	{
				5809	int ret = 0;
				5810
				5811	if (sctx->cur_ino == 0)
				5812	goto out;
				5813	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
				5814	sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
				5815	goto out;
				5816	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
				5817	goto out;
				5818
				5819	ret = process_recorded_refs(sctx, pending_move);
				5820	if (ret < 0)
				5821	goto out;
				5822
				5823	*refs_processed = 1;
				5824	out:
				5825	return ret;
				5826	}
				5827
				5828	static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
				5829	{
				5830	int ret = 0;
				5831	u64 left_mode;
				5832	u64 left_uid;
				5833	u64 left_gid;
				5834	u64 right_mode;
				5835	u64 right_uid;
				5836	u64 right_gid;
				5837	int need_chmod = 0;
				5838	int need_chown = 0;
				5839	int need_truncate = 1;
				5840	int pending_move = 0;
				5841	int refs_processed = 0;
				5842
				5843	if (sctx->ignore_cur_inode)
				5844	return 0;
				5845
				5846	ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
				5847	&refs_processed);
				5848	if (ret < 0)
				5849	goto out;
				5850
				5851	/*
				5852	* We have processed the refs and thus need to advance send_progress.
				5853	* Now, calls to get_cur_xxx will take the updated refs of the current
				5854	* inode into account.
				5855	*
				5856	* On the other hand, if our current inode is a directory and couldn't
				5857	* be moved/renamed because its parent was renamed/moved too and it has
				5858	* a higher inode number, we can only move/rename our current inode
				5859	* after we moved/renamed its parent. Therefore in this case operate on
				5860	* the old path (pre move/rename) of our current inode, and the
				5861	* move/rename will be performed later.
				5862	*/
				5863	if (refs_processed && !pending_move)
				5864	sctx->send_progress = sctx->cur_ino + 1;
				5865
				5866	if (sctx->cur_ino == 0 \|\| sctx->cur_inode_deleted)
				5867	goto out;
				5868	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
				5869	goto out;
				5870
				5871	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
				5872	&left_mode, &left_uid, &left_gid, NULL);
				5873	if (ret < 0)
				5874	goto out;
				5875
				5876	if (!sctx->parent_root \|\| sctx->cur_inode_new) {
				5877	need_chown = 1;
				5878	if (!S_ISLNK(sctx->cur_inode_mode))
				5879	need_chmod = 1;
				5880	if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
				5881	need_truncate = 0;
				5882	} else {
				5883	u64 old_size;
				5884
				5885	ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
				5886	&old_size, NULL, &right_mode, &right_uid,
				5887	&right_gid, NULL);
				5888	if (ret < 0)
				5889	goto out;
				5890
				5891	if (left_uid != right_uid \|\| left_gid != right_gid)
				5892	need_chown = 1;
				5893	if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
				5894	need_chmod = 1;
				5895	if ((old_size == sctx->cur_inode_size) \|\|
				5896	(sctx->cur_inode_size > old_size &&
				5897	sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
				5898	need_truncate = 0;
				5899	}
				5900
				5901	if (S_ISREG(sctx->cur_inode_mode)) {
				5902	if (need_send_hole(sctx)) {
				5903	if (sctx->cur_inode_last_extent == (u64)-1 \|\|
				5904	sctx->cur_inode_last_extent <
				5905	sctx->cur_inode_size) {
				5906	ret = get_last_extent(sctx, (u64)-1);
				5907	if (ret)
				5908	goto out;
				5909	}
				5910	if (sctx->cur_inode_last_extent <
				5911	sctx->cur_inode_size) {
				5912	ret = send_hole(sctx, sctx->cur_inode_size);
				5913	if (ret)
				5914	goto out;
				5915	}
				5916	}
				5917	if (need_truncate) {
				5918	ret = send_truncate(sctx, sctx->cur_ino,
				5919	sctx->cur_inode_gen,
				5920	sctx->cur_inode_size);
				5921	if (ret < 0)
				5922	goto out;
				5923	}
				5924	}
				5925
				5926	if (need_chown) {
				5927	ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
				5928	left_uid, left_gid);
				5929	if (ret < 0)
				5930	goto out;
				5931	}
				5932	if (need_chmod) {
				5933	ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
				5934	left_mode);
				5935	if (ret < 0)
				5936	goto out;
				5937	}
				5938
				5939	/*
				5940	* If other directory inodes depended on our current directory
				5941	* inode's move/rename, now do their move/rename operations.
				5942	*/
				5943	if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
				5944	ret = apply_children_dir_moves(sctx);
				5945	if (ret)
				5946	goto out;
				5947	/*
				5948	* Need to send that every time, no matter if it actually
				5949	* changed between the two trees as we have done changes to
				5950	* the inode before. If our inode is a directory and it's
				5951	* waiting to be moved/renamed, we will send its utimes when
				5952	* it's moved/renamed, therefore we don't need to do it here.
				5953	*/
				5954	sctx->send_progress = sctx->cur_ino + 1;
				5955	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
				5956	if (ret < 0)
				5957	goto out;
				5958	}
				5959
				5960	out:
				5961	return ret;
				5962	}
				5963
				5964	struct parent_paths_ctx {
				5965	struct list_head *refs;
				5966	struct send_ctx *sctx;
				5967	};
				5968
				5969	static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
				5970	void *ctx)
				5971	{
				5972	struct parent_paths_ctx *ppctx = ctx;
				5973
				5974	return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
				5975	ppctx->refs);
				5976	}
				5977
				5978	/*
				5979	* Issue unlink operations for all paths of the current inode found in the
				5980	* parent snapshot.
				5981	*/
				5982	static int btrfs_unlink_all_paths(struct send_ctx *sctx)
				5983	{
				5984	LIST_HEAD(deleted_refs);
				5985	struct btrfs_path *path;
				5986	struct btrfs_key key;
				5987	struct parent_paths_ctx ctx;
				5988	int ret;
				5989
				5990	path = alloc_path_for_send();
				5991	if (!path)
				5992	return -ENOMEM;
				5993
				5994	key.objectid = sctx->cur_ino;
				5995	key.type = BTRFS_INODE_REF_KEY;
				5996	key.offset = 0;
				5997	ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
				5998	if (ret < 0)
				5999	goto out;
				6000
				6001	ctx.refs = &deleted_refs;
				6002	ctx.sctx = sctx;
				6003
				6004	while (true) {
				6005	struct extent_buffer *eb = path->nodes[0];
				6006	int slot = path->slots[0];
				6007
				6008	if (slot >= btrfs_header_nritems(eb)) {
				6009	ret = btrfs_next_leaf(sctx->parent_root, path);
				6010	if (ret < 0)
				6011	goto out;
				6012	else if (ret > 0)
				6013	break;
				6014	continue;
				6015	}
				6016
				6017	btrfs_item_key_to_cpu(eb, &key, slot);
				6018	if (key.objectid != sctx->cur_ino)
				6019	break;
				6020	if (key.type != BTRFS_INODE_REF_KEY &&
				6021	key.type != BTRFS_INODE_EXTREF_KEY)
				6022	break;
				6023
				6024	ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
				6025	record_parent_ref, &ctx);
				6026	if (ret < 0)
				6027	goto out;
				6028
				6029	path->slots[0]++;
				6030	}
				6031
				6032	while (!list_empty(&deleted_refs)) {
				6033	struct recorded_ref *ref;
				6034
				6035	ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
				6036	ret = send_unlink(sctx, ref->full_path);
				6037	if (ret < 0)
				6038	goto out;
				6039	fs_path_free(ref->full_path);
				6040	list_del(&ref->list);
				6041	kfree(ref);
				6042	}
				6043	ret = 0;
				6044	out:
				6045	btrfs_free_path(path);
				6046	if (ret)
				6047	__free_recorded_refs(&deleted_refs);
				6048	return ret;
				6049	}
				6050
				6051	static int changed_inode(struct send_ctx *sctx,
				6052	enum btrfs_compare_tree_result result)
				6053	{
				6054	int ret = 0;
				6055	struct btrfs_key *key = sctx->cmp_key;
				6056	struct btrfs_inode_item *left_ii = NULL;
				6057	struct btrfs_inode_item *right_ii = NULL;
				6058	u64 left_gen = 0;
				6059	u64 right_gen = 0;
				6060
				6061	sctx->cur_ino = key->objectid;
				6062	sctx->cur_inode_new_gen = 0;
				6063	sctx->cur_inode_last_extent = (u64)-1;
				6064	sctx->cur_inode_next_write_offset = 0;
				6065	sctx->ignore_cur_inode = false;
				6066
				6067	/*
				6068	* Set send_progress to current inode. This will tell all get_cur_xxx
				6069	* functions that the current inode's refs are not updated yet. Later,
				6070	* when process_recorded_refs is finished, it is set to cur_ino + 1.
				6071	*/
				6072	sctx->send_progress = sctx->cur_ino;
				6073
				6074	if (result == BTRFS_COMPARE_TREE_NEW \|\|
				6075	result == BTRFS_COMPARE_TREE_CHANGED) {
				6076	left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
				6077	sctx->left_path->slots[0],
				6078	struct btrfs_inode_item);
				6079	left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
				6080	left_ii);
				6081	} else {
				6082	right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
				6083	sctx->right_path->slots[0],
				6084	struct btrfs_inode_item);
				6085	right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
				6086	right_ii);
				6087	}
				6088	if (result == BTRFS_COMPARE_TREE_CHANGED) {
				6089	right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
				6090	sctx->right_path->slots[0],
				6091	struct btrfs_inode_item);
				6092
				6093	right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
				6094	right_ii);
				6095
				6096	/*
				6097	* The cur_ino = root dir case is special here. We can't treat
				6098	* the inode as deleted+reused because it would generate a
				6099	* stream that tries to delete/mkdir the root dir.
				6100	*/
				6101	if (left_gen != right_gen &&
				6102	sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
				6103	sctx->cur_inode_new_gen = 1;
				6104	}
				6105
				6106	/*
				6107	* Normally we do not find inodes with a link count of zero (orphans)
				6108	* because the most common case is to create a snapshot and use it
				6109	* for a send operation. However other less common use cases involve
				6110	* using a subvolume and send it after turning it to RO mode just
				6111	* after deleting all hard links of a file while holding an open
				6112	* file descriptor against it or turning a RO snapshot into RW mode,
				6113	* keep an open file descriptor against a file, delete it and then
				6114	* turn the snapshot back to RO mode before using it for a send
				6115	* operation. So if we find such cases, ignore the inode and all its
				6116	* items completely if it's a new inode, or if it's a changed inode
				6117	* make sure all its previous paths (from the parent snapshot) are all
				6118	* unlinked and all other the inode items are ignored.
				6119	*/
				6120	if (result == BTRFS_COMPARE_TREE_NEW \|\|
				6121	result == BTRFS_COMPARE_TREE_CHANGED) {
				6122	u32 nlinks;
				6123
				6124	nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
				6125	if (nlinks == 0) {
				6126	sctx->ignore_cur_inode = true;
				6127	if (result == BTRFS_COMPARE_TREE_CHANGED)
				6128	ret = btrfs_unlink_all_paths(sctx);
				6129	goto out;
				6130	}
				6131	}
				6132
				6133	if (result == BTRFS_COMPARE_TREE_NEW) {
				6134	sctx->cur_inode_gen = left_gen;
				6135	sctx->cur_inode_new = 1;
				6136	sctx->cur_inode_deleted = 0;
				6137	sctx->cur_inode_size = btrfs_inode_size(
				6138	sctx->left_path->nodes[0], left_ii);
				6139	sctx->cur_inode_mode = btrfs_inode_mode(
				6140	sctx->left_path->nodes[0], left_ii);
				6141	sctx->cur_inode_rdev = btrfs_inode_rdev(
				6142	sctx->left_path->nodes[0], left_ii);
				6143	if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
				6144	ret = send_create_inode_if_needed(sctx);
				6145	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
				6146	sctx->cur_inode_gen = right_gen;
				6147	sctx->cur_inode_new = 0;
				6148	sctx->cur_inode_deleted = 1;
				6149	sctx->cur_inode_size = btrfs_inode_size(
				6150	sctx->right_path->nodes[0], right_ii);
				6151	sctx->cur_inode_mode = btrfs_inode_mode(
				6152	sctx->right_path->nodes[0], right_ii);
				6153	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
				6154	/*
				6155	* We need to do some special handling in case the inode was
				6156	* reported as changed with a changed generation number. This
				6157	* means that the original inode was deleted and new inode
				6158	* reused the same inum. So we have to treat the old inode as
				6159	* deleted and the new one as new.
				6160	*/
				6161	if (sctx->cur_inode_new_gen) {
				6162	/*
				6163	* First, process the inode as if it was deleted.
				6164	*/
				6165	sctx->cur_inode_gen = right_gen;
				6166	sctx->cur_inode_new = 0;
				6167	sctx->cur_inode_deleted = 1;
				6168	sctx->cur_inode_size = btrfs_inode_size(
				6169	sctx->right_path->nodes[0], right_ii);
				6170	sctx->cur_inode_mode = btrfs_inode_mode(
				6171	sctx->right_path->nodes[0], right_ii);
				6172	ret = process_all_refs(sctx,
				6173	BTRFS_COMPARE_TREE_DELETED);
				6174	if (ret < 0)
				6175	goto out;
				6176
				6177	/*
				6178	* Now process the inode as if it was new.
				6179	*/
				6180	sctx->cur_inode_gen = left_gen;
				6181	sctx->cur_inode_new = 1;
				6182	sctx->cur_inode_deleted = 0;
				6183	sctx->cur_inode_size = btrfs_inode_size(
				6184	sctx->left_path->nodes[0], left_ii);
				6185	sctx->cur_inode_mode = btrfs_inode_mode(
				6186	sctx->left_path->nodes[0], left_ii);
				6187	sctx->cur_inode_rdev = btrfs_inode_rdev(
				6188	sctx->left_path->nodes[0], left_ii);
				6189	ret = send_create_inode_if_needed(sctx);
				6190	if (ret < 0)
				6191	goto out;
				6192
				6193	ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
				6194	if (ret < 0)
				6195	goto out;
				6196	/*
				6197	* Advance send_progress now as we did not get into
				6198	* process_recorded_refs_if_needed in the new_gen case.
				6199	*/
				6200	sctx->send_progress = sctx->cur_ino + 1;
				6201
				6202	/*
				6203	* Now process all extents and xattrs of the inode as if
				6204	* they were all new.
				6205	*/
				6206	ret = process_all_extents(sctx);
				6207	if (ret < 0)
				6208	goto out;
				6209	ret = process_all_new_xattrs(sctx);
				6210	if (ret < 0)
				6211	goto out;
				6212	} else {
				6213	sctx->cur_inode_gen = left_gen;
				6214	sctx->cur_inode_new = 0;
				6215	sctx->cur_inode_new_gen = 0;
				6216	sctx->cur_inode_deleted = 0;
				6217	sctx->cur_inode_size = btrfs_inode_size(
				6218	sctx->left_path->nodes[0], left_ii);
				6219	sctx->cur_inode_mode = btrfs_inode_mode(
				6220	sctx->left_path->nodes[0], left_ii);
				6221	}
				6222	}
				6223
				6224	out:
				6225	return ret;
				6226	}
				6227
				6228	/*
				6229	* We have to process new refs before deleted refs, but compare_trees gives us
				6230	* the new and deleted refs mixed. To fix this, we record the new/deleted refs
				6231	* first and later process them in process_recorded_refs.
				6232	* For the cur_inode_new_gen case, we skip recording completely because
				6233	* changed_inode did already initiate processing of refs. The reason for this is
				6234	* that in this case, compare_tree actually compares the refs of 2 different
				6235	* inodes. To fix this, process_all_refs is used in changed_inode to handle all
				6236	* refs of the right tree as deleted and all refs of the left tree as new.
				6237	*/
				6238	static int changed_ref(struct send_ctx *sctx,
				6239	enum btrfs_compare_tree_result result)
				6240	{
				6241	int ret = 0;
				6242
				6243	if (sctx->cur_ino != sctx->cmp_key->objectid) {
				6244	inconsistent_snapshot_error(sctx, result, "reference");
				6245	return -EIO;
				6246	}
				6247
				6248	if (!sctx->cur_inode_new_gen &&
				6249	sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
				6250	if (result == BTRFS_COMPARE_TREE_NEW)
				6251	ret = record_new_ref(sctx);
				6252	else if (result == BTRFS_COMPARE_TREE_DELETED)
				6253	ret = record_deleted_ref(sctx);
				6254	else if (result == BTRFS_COMPARE_TREE_CHANGED)
				6255	ret = record_changed_ref(sctx);
				6256	}
				6257
				6258	return ret;
				6259	}
				6260
				6261	/*
				6262	* Process new/deleted/changed xattrs. We skip processing in the
				6263	* cur_inode_new_gen case because changed_inode did already initiate processing
				6264	* of xattrs. The reason is the same as in changed_ref
				6265	*/
				6266	static int changed_xattr(struct send_ctx *sctx,
				6267	enum btrfs_compare_tree_result result)
				6268	{
				6269	int ret = 0;
				6270
				6271	if (sctx->cur_ino != sctx->cmp_key->objectid) {
				6272	inconsistent_snapshot_error(sctx, result, "xattr");
				6273	return -EIO;
				6274	}
				6275
				6276	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
				6277	if (result == BTRFS_COMPARE_TREE_NEW)
				6278	ret = process_new_xattr(sctx);
				6279	else if (result == BTRFS_COMPARE_TREE_DELETED)
				6280	ret = process_deleted_xattr(sctx);
				6281	else if (result == BTRFS_COMPARE_TREE_CHANGED)
				6282	ret = process_changed_xattr(sctx);
				6283	}
				6284
				6285	return ret;
				6286	}
				6287
				6288	/*
				6289	* Process new/deleted/changed extents. We skip processing in the
				6290	* cur_inode_new_gen case because changed_inode did already initiate processing
				6291	* of extents. The reason is the same as in changed_ref
				6292	*/
				6293	static int changed_extent(struct send_ctx *sctx,
				6294	enum btrfs_compare_tree_result result)
				6295	{
				6296	int ret = 0;
				6297
				6298	/*
				6299	* We have found an extent item that changed without the inode item
				6300	* having changed. This can happen either after relocation (where the
				6301	* disk_bytenr of an extent item is replaced at
				6302	* relocation.c:replace_file_extents()) or after deduplication into a
				6303	* file in both the parent and send snapshots (where an extent item can
				6304	* get modified or replaced with a new one). Note that deduplication
				6305	* updates the inode item, but it only changes the iversion (sequence
				6306	* field in the inode item) of the inode, so if a file is deduplicated
				6307	* the same amount of times in both the parent and send snapshots, its
				6308	* iversion becames the same in both snapshots, whence the inode item is
				6309	* the same on both snapshots.
				6310	*/
				6311	if (sctx->cur_ino != sctx->cmp_key->objectid)
				6312	return 0;
				6313
				6314	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
				6315	if (result != BTRFS_COMPARE_TREE_DELETED)
				6316	ret = process_extent(sctx, sctx->left_path,
				6317	sctx->cmp_key);
				6318	}
				6319
				6320	return ret;
				6321	}
				6322
				6323	static int dir_changed(struct send_ctx *sctx, u64 dir)
				6324	{
				6325	u64 orig_gen, new_gen;
				6326	int ret;
				6327
				6328	ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
				6329	NULL, NULL);
				6330	if (ret)
				6331	return ret;
				6332
				6333	ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
				6334	NULL, NULL, NULL);
				6335	if (ret)
				6336	return ret;
				6337
				6338	return (orig_gen != new_gen) ? 1 : 0;
				6339	}
				6340
				6341	static int compare_refs(struct send_ctx sctx, struct btrfs_path path,
				6342	struct btrfs_key *key)
				6343	{
				6344	struct btrfs_inode_extref *extref;
				6345	struct extent_buffer *leaf;
				6346	u64 dirid = 0, last_dirid = 0;
				6347	unsigned long ptr;
				6348	u32 item_size;
				6349	u32 cur_offset = 0;
				6350	int ref_name_len;
				6351	int ret = 0;
				6352
				6353	/* Easy case, just check this one dirid */
				6354	if (key->type == BTRFS_INODE_REF_KEY) {
				6355	dirid = key->offset;
				6356
				6357	ret = dir_changed(sctx, dirid);
				6358	goto out;
				6359	}
				6360
				6361	leaf = path->nodes[0];
				6362	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				6363	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
				6364	while (cur_offset < item_size) {
				6365	extref = (struct btrfs_inode_extref *)(ptr +
				6366	cur_offset);
				6367	dirid = btrfs_inode_extref_parent(leaf, extref);
				6368	ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
				6369	cur_offset += ref_name_len + sizeof(*extref);
				6370	if (dirid == last_dirid)
				6371	continue;
				6372	ret = dir_changed(sctx, dirid);
				6373	if (ret)
				6374	break;
				6375	last_dirid = dirid;
				6376	}
				6377	out:
				6378	return ret;
				6379	}
				6380
				6381	/*
				6382	* Updates compare related fields in sctx and simply forwards to the actual
				6383	* changed_xxx functions.
				6384	*/
				6385	static int changed_cb(struct btrfs_path *left_path,
				6386	struct btrfs_path *right_path,
				6387	struct btrfs_key *key,
				6388	enum btrfs_compare_tree_result result,
				6389	void *ctx)
				6390	{
				6391	int ret = 0;
				6392	struct send_ctx *sctx = ctx;
				6393
				6394	if (result == BTRFS_COMPARE_TREE_SAME) {
				6395	if (key->type == BTRFS_INODE_REF_KEY \|\|
				6396	key->type == BTRFS_INODE_EXTREF_KEY) {
				6397	ret = compare_refs(sctx, left_path, key);
				6398	if (!ret)
				6399	return 0;
				6400	if (ret < 0)
				6401	return ret;
				6402	} else if (key->type == BTRFS_EXTENT_DATA_KEY) {
				6403	return maybe_send_hole(sctx, left_path, key);
				6404	} else {
				6405	return 0;
				6406	}
				6407	result = BTRFS_COMPARE_TREE_CHANGED;
				6408	ret = 0;
				6409	}
				6410
				6411	sctx->left_path = left_path;
				6412	sctx->right_path = right_path;
				6413	sctx->cmp_key = key;
				6414
				6415	ret = finish_inode_if_needed(sctx, 0);
				6416	if (ret < 0)
				6417	goto out;
				6418
				6419	/* Ignore non-FS objects */
				6420	if (key->objectid == BTRFS_FREE_INO_OBJECTID \|\|
				6421	key->objectid == BTRFS_FREE_SPACE_OBJECTID)
				6422	goto out;
				6423
				6424	if (key->type == BTRFS_INODE_ITEM_KEY) {
				6425	ret = changed_inode(sctx, result);
				6426	} else if (!sctx->ignore_cur_inode) {
				6427	if (key->type == BTRFS_INODE_REF_KEY \|\|
				6428	key->type == BTRFS_INODE_EXTREF_KEY)
				6429	ret = changed_ref(sctx, result);
				6430	else if (key->type == BTRFS_XATTR_ITEM_KEY)
				6431	ret = changed_xattr(sctx, result);
				6432	else if (key->type == BTRFS_EXTENT_DATA_KEY)
				6433	ret = changed_extent(sctx, result);
				6434	}
				6435
				6436	out:
				6437	return ret;
				6438	}
				6439
				6440	static int full_send_tree(struct send_ctx *sctx)
				6441	{
				6442	int ret;
				6443	struct btrfs_root *send_root = sctx->send_root;
				6444	struct btrfs_key key;
				6445	struct btrfs_path *path;
				6446	struct extent_buffer *eb;
				6447	int slot;
				6448
				6449	path = alloc_path_for_send();
				6450	if (!path)
				6451	return -ENOMEM;
				6452
				6453	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
				6454	key.type = BTRFS_INODE_ITEM_KEY;
				6455	key.offset = 0;
				6456
				6457	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
				6458	if (ret < 0)
				6459	goto out;
				6460	if (ret)
				6461	goto out_finish;
				6462
				6463	while (1) {
				6464	eb = path->nodes[0];
				6465	slot = path->slots[0];
				6466	btrfs_item_key_to_cpu(eb, &key, slot);
				6467
				6468	ret = changed_cb(path, NULL, &key,
				6469	BTRFS_COMPARE_TREE_NEW, sctx);
				6470	if (ret < 0)
				6471	goto out;
				6472
				6473	ret = btrfs_next_item(send_root, path);
				6474	if (ret < 0)
				6475	goto out;
				6476	if (ret) {
				6477	ret = 0;
				6478	break;
				6479	}
				6480	}
				6481
				6482	out_finish:
				6483	ret = finish_inode_if_needed(sctx, 1);
				6484
				6485	out:
				6486	btrfs_free_path(path);
				6487	return ret;
				6488	}
				6489
				6490	static int send_subvol(struct send_ctx *sctx)
				6491	{
				6492	int ret;
				6493
				6494	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
				6495	ret = send_header(sctx);
				6496	if (ret < 0)
				6497	goto out;
				6498	}
				6499
				6500	ret = send_subvol_begin(sctx);
				6501	if (ret < 0)
				6502	goto out;
				6503
				6504	if (sctx->parent_root) {
				6505	ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
				6506	changed_cb, sctx);
				6507	if (ret < 0)
				6508	goto out;
				6509	ret = finish_inode_if_needed(sctx, 1);
				6510	if (ret < 0)
				6511	goto out;
				6512	} else {
				6513	ret = full_send_tree(sctx);
				6514	if (ret < 0)
				6515	goto out;
				6516	}
				6517
				6518	out:
				6519	free_recorded_refs(sctx);
				6520	return ret;
				6521	}
				6522
				6523	/*
				6524	* If orphan cleanup did remove any orphans from a root, it means the tree
				6525	* was modified and therefore the commit root is not the same as the current
				6526	* root anymore. This is a problem, because send uses the commit root and
				6527	* therefore can see inode items that don't exist in the current root anymore,
				6528	* and for example make calls to btrfs_iget, which will do tree lookups based
				6529	* on the current root and not on the commit root. Those lookups will fail,
				6530	* returning a -ESTALE error, and making send fail with that error. So make
				6531	* sure a send does not see any orphans we have just removed, and that it will
				6532	* see the same inodes regardless of whether a transaction commit happened
				6533	* before it started (meaning that the commit root will be the same as the
				6534	* current root) or not.
				6535	*/
				6536	static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
				6537	{
				6538	int i;
				6539	struct btrfs_trans_handle *trans = NULL;
				6540
				6541	again:
				6542	if (sctx->parent_root &&
				6543	sctx->parent_root->node != sctx->parent_root->commit_root)
				6544	goto commit_trans;
				6545
				6546	for (i = 0; i < sctx->clone_roots_cnt; i++)
				6547	if (sctx->clone_roots[i].root->node !=
				6548	sctx->clone_roots[i].root->commit_root)
				6549	goto commit_trans;
				6550
				6551	if (trans)
				6552	return btrfs_end_transaction(trans);
				6553
				6554	return 0;
				6555
				6556	commit_trans:
				6557	/* Use any root, all fs roots will get their commit roots updated. */
				6558	if (!trans) {
				6559	trans = btrfs_join_transaction(sctx->send_root);
				6560	if (IS_ERR(trans))
				6561	return PTR_ERR(trans);
				6562	goto again;
				6563	}
				6564
				6565	return btrfs_commit_transaction(trans);
				6566	}
				6567
				6568	/*
				6569	* Make sure any existing dellaloc is flushed for any root used by a send
				6570	* operation so that we do not miss any data and we do not race with writeback
				6571	* finishing and changing a tree while send is using the tree. This could
				6572	* happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
				6573	* a send operation then uses the subvolume.
				6574	* After flushing delalloc ensure_commit_roots_uptodate() must be called.
				6575	*/
				6576	static int flush_delalloc_roots(struct send_ctx *sctx)
				6577	{
				6578	struct btrfs_root *root = sctx->parent_root;
				6579	int ret;
				6580	int i;
				6581
				6582	if (root) {
				6583	ret = btrfs_start_delalloc_snapshot(root);
				6584	if (ret)
				6585	return ret;
				6586	btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
				6587	}
				6588
				6589	for (i = 0; i < sctx->clone_roots_cnt; i++) {
				6590	root = sctx->clone_roots[i].root;
				6591	ret = btrfs_start_delalloc_snapshot(root);
				6592	if (ret)
				6593	return ret;
				6594	btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
				6595	}
				6596
				6597	return 0;
				6598	}
				6599
				6600	static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
				6601	{
				6602	spin_lock(&root->root_item_lock);
				6603	root->send_in_progress--;
				6604	/*
				6605	* Not much left to do, we don't know why it's unbalanced and
				6606	* can't blindly reset it to 0.
				6607	*/
				6608	if (root->send_in_progress < 0)
				6609	btrfs_err(root->fs_info,
				6610	"send_in_progress unbalanced %d root %llu",
				6611	root->send_in_progress, root->root_key.objectid);
				6612	spin_unlock(&root->root_item_lock);
				6613	}
				6614
				6615	long btrfs_ioctl_send(struct file mnt_file, struct btrfs_ioctl_send_args arg)
				6616	{
				6617	int ret = 0;
				6618	struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
				6619	struct btrfs_fs_info *fs_info = send_root->fs_info;
				6620	struct btrfs_root *clone_root;
				6621	struct btrfs_key key;
				6622	struct send_ctx *sctx = NULL;
				6623	u32 i;
				6624	u64 *clone_sources_tmp = NULL;
				6625	int clone_sources_to_rollback = 0;
				6626	unsigned alloc_size;
				6627	int sort_clone_roots = 0;
				6628	int index;
				6629
				6630	if (!capable(CAP_SYS_ADMIN))
				6631	return -EPERM;
				6632
				6633	/*
				6634	* The subvolume must remain read-only during send, protect against
				6635	* making it RW. This also protects against deletion.
				6636	*/
				6637	spin_lock(&send_root->root_item_lock);
				6638	send_root->send_in_progress++;
				6639	spin_unlock(&send_root->root_item_lock);
				6640
				6641	/*
				6642	* Userspace tools do the checks and warn the user if it's
				6643	* not RO.
				6644	*/
				6645	if (!btrfs_root_readonly(send_root)) {
				6646	ret = -EPERM;
				6647	goto out;
				6648	}
				6649
				6650	/*
				6651	* Check that we don't overflow at later allocations, we request
				6652	* clone_sources_count + 1 items, and compare to unsigned long inside
				6653	* access_ok.
				6654	*/
				6655	if (arg->clone_sources_count >
				6656	ULONG_MAX / sizeof(struct clone_root) - 1) {
				6657	ret = -EINVAL;
				6658	goto out;
				6659	}
				6660
				6661	if (!access_ok(VERIFY_READ, arg->clone_sources,
				6662	sizeof(arg->clone_sources)
				6663	arg->clone_sources_count)) {
				6664	ret = -EFAULT;
				6665	goto out;
				6666	}
				6667
				6668	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
				6669	ret = -EINVAL;
				6670	goto out;
				6671	}
				6672
				6673	sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
				6674	if (!sctx) {
				6675	ret = -ENOMEM;
				6676	goto out;
				6677	}
				6678
				6679	INIT_LIST_HEAD(&sctx->new_refs);
				6680	INIT_LIST_HEAD(&sctx->deleted_refs);
				6681	INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
				6682	INIT_LIST_HEAD(&sctx->name_cache_list);
				6683
				6684	sctx->flags = arg->flags;
				6685
				6686	sctx->send_filp = fget(arg->send_fd);
				6687	if (!sctx->send_filp) {
				6688	ret = -EBADF;
				6689	goto out;
				6690	}
				6691
				6692	sctx->send_root = send_root;
				6693	/*
				6694	* Unlikely but possible, if the subvolume is marked for deletion but
				6695	* is slow to remove the directory entry, send can still be started
				6696	*/
				6697	if (btrfs_root_dead(sctx->send_root)) {
				6698	ret = -EPERM;
				6699	goto out;
				6700	}
				6701
				6702	sctx->clone_roots_cnt = arg->clone_sources_count;
				6703
				6704	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
				6705	sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
				6706	if (!sctx->send_buf) {
				6707	ret = -ENOMEM;
				6708	goto out;
				6709	}
				6710
				6711	sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
				6712	if (!sctx->read_buf) {
				6713	ret = -ENOMEM;
				6714	goto out;
				6715	}
				6716
				6717	sctx->pending_dir_moves = RB_ROOT;
				6718	sctx->waiting_dir_moves = RB_ROOT;
				6719	sctx->orphan_dirs = RB_ROOT;
				6720
				6721	alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
				6722
				6723	sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
				6724	if (!sctx->clone_roots) {
				6725	ret = -ENOMEM;
				6726	goto out;
				6727	}
				6728
				6729	alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
				6730
				6731	if (arg->clone_sources_count) {
				6732	clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
				6733	if (!clone_sources_tmp) {
				6734	ret = -ENOMEM;
				6735	goto out;
				6736	}
				6737
				6738	ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
				6739	alloc_size);
				6740	if (ret) {
				6741	ret = -EFAULT;
				6742	goto out;
				6743	}
				6744
				6745	for (i = 0; i < arg->clone_sources_count; i++) {
				6746	key.objectid = clone_sources_tmp[i];
				6747	key.type = BTRFS_ROOT_ITEM_KEY;
				6748	key.offset = (u64)-1;
				6749
				6750	index = srcu_read_lock(&fs_info->subvol_srcu);
				6751
				6752	clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
				6753	if (IS_ERR(clone_root)) {
				6754	srcu_read_unlock(&fs_info->subvol_srcu, index);
				6755	ret = PTR_ERR(clone_root);
				6756	goto out;
				6757	}
				6758	spin_lock(&clone_root->root_item_lock);
				6759	if (!btrfs_root_readonly(clone_root) \|\|
				6760	btrfs_root_dead(clone_root)) {
				6761	spin_unlock(&clone_root->root_item_lock);
				6762	srcu_read_unlock(&fs_info->subvol_srcu, index);
				6763	ret = -EPERM;
				6764	goto out;
				6765	}
				6766	clone_root->send_in_progress++;
				6767	spin_unlock(&clone_root->root_item_lock);
				6768	srcu_read_unlock(&fs_info->subvol_srcu, index);
				6769
				6770	sctx->clone_roots[i].root = clone_root;
				6771	clone_sources_to_rollback = i + 1;
				6772	}
				6773	kvfree(clone_sources_tmp);
				6774	clone_sources_tmp = NULL;
				6775	}
				6776
				6777	if (arg->parent_root) {
				6778	key.objectid = arg->parent_root;
				6779	key.type = BTRFS_ROOT_ITEM_KEY;
				6780	key.offset = (u64)-1;
				6781
				6782	index = srcu_read_lock(&fs_info->subvol_srcu);
				6783
				6784	sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
				6785	if (IS_ERR(sctx->parent_root)) {
				6786	srcu_read_unlock(&fs_info->subvol_srcu, index);
				6787	ret = PTR_ERR(sctx->parent_root);
				6788	goto out;
				6789	}
				6790
				6791	spin_lock(&sctx->parent_root->root_item_lock);
				6792	sctx->parent_root->send_in_progress++;
				6793	if (!btrfs_root_readonly(sctx->parent_root) \|\|
				6794	btrfs_root_dead(sctx->parent_root)) {
				6795	spin_unlock(&sctx->parent_root->root_item_lock);
				6796	srcu_read_unlock(&fs_info->subvol_srcu, index);
				6797	ret = -EPERM;
				6798	goto out;
				6799	}
				6800	spin_unlock(&sctx->parent_root->root_item_lock);
				6801
				6802	srcu_read_unlock(&fs_info->subvol_srcu, index);
				6803	}
				6804
				6805	/*
				6806	* Clones from send_root are allowed, but only if the clone source
				6807	* is behind the current send position. This is checked while searching
				6808	* for possible clone sources.
				6809	*/
				6810	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
				6811
				6812	/* We do a bsearch later */
				6813	sort(sctx->clone_roots, sctx->clone_roots_cnt,
				6814	sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
				6815	NULL);
				6816	sort_clone_roots = 1;
				6817
				6818	ret = flush_delalloc_roots(sctx);
				6819	if (ret)
				6820	goto out;
				6821
				6822	ret = ensure_commit_roots_uptodate(sctx);
				6823	if (ret)
				6824	goto out;
				6825
				6826	current->journal_info = BTRFS_SEND_TRANS_STUB;
				6827	ret = send_subvol(sctx);
				6828	current->journal_info = NULL;
				6829	if (ret < 0)
				6830	goto out;
				6831
				6832	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
				6833	ret = begin_cmd(sctx, BTRFS_SEND_C_END);
				6834	if (ret < 0)
				6835	goto out;
				6836	ret = send_cmd(sctx);
				6837	if (ret < 0)
				6838	goto out;
				6839	}
				6840
				6841	out:
				6842	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
				6843	while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
				6844	struct rb_node *n;
				6845	struct pending_dir_move *pm;
				6846
				6847	n = rb_first(&sctx->pending_dir_moves);
				6848	pm = rb_entry(n, struct pending_dir_move, node);
				6849	while (!list_empty(&pm->list)) {
				6850	struct pending_dir_move *pm2;
				6851
				6852	pm2 = list_first_entry(&pm->list,
				6853	struct pending_dir_move, list);
				6854	free_pending_move(sctx, pm2);
				6855	}
				6856	free_pending_move(sctx, pm);
				6857	}
				6858
				6859	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
				6860	while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
				6861	struct rb_node *n;
				6862	struct waiting_dir_move *dm;
				6863
				6864	n = rb_first(&sctx->waiting_dir_moves);
				6865	dm = rb_entry(n, struct waiting_dir_move, node);
				6866	rb_erase(&dm->node, &sctx->waiting_dir_moves);
				6867	kfree(dm);
				6868	}
				6869
				6870	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
				6871	while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
				6872	struct rb_node *n;
				6873	struct orphan_dir_info *odi;
				6874
				6875	n = rb_first(&sctx->orphan_dirs);
				6876	odi = rb_entry(n, struct orphan_dir_info, node);
				6877	free_orphan_dir_info(sctx, odi);
				6878	}
				6879
				6880	if (sort_clone_roots) {
				6881	for (i = 0; i < sctx->clone_roots_cnt; i++)
				6882	btrfs_root_dec_send_in_progress(
				6883	sctx->clone_roots[i].root);
				6884	} else {
				6885	for (i = 0; sctx && i < clone_sources_to_rollback; i++)
				6886	btrfs_root_dec_send_in_progress(
				6887	sctx->clone_roots[i].root);
				6888
				6889	btrfs_root_dec_send_in_progress(send_root);
				6890	}
				6891	if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
				6892	btrfs_root_dec_send_in_progress(sctx->parent_root);
				6893
				6894	kvfree(clone_sources_tmp);
				6895
				6896	if (sctx) {
				6897	if (sctx->send_filp)
				6898	fput(sctx->send_filp);
				6899
				6900	kvfree(sctx->clone_roots);
				6901	kvfree(sctx->send_buf);
				6902	kvfree(sctx->read_buf);
				6903
				6904	name_cache_free(sctx);
				6905
				6906	kfree(sctx);
				6907	}
				6908
				6909	return ret;
				6910	}