Blame - src/support/ab_tools/scripts/blockimgdiff.py - T800

blob: 24c5b2de7f31c3c46f807d20688a0a2d5b363aee [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
				15	from __future__ import print_function
				16
				17	import array
				18	import copy
				19	import functools
				20	import heapq
				21	import itertools
				22	import multiprocessing
				23	import os
				24	import os.path
				25	import re
				26	import subprocess
				27	import sys
				28	import threading
				29	from collections import deque, OrderedDict
				30	from hashlib import sha1
				31
				32	import common
				33	from rangelib import RangeSet
				34
				35
				36	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				37
				38
				39	def compute_patch(srcfile, tgtfile, imgdiff=False):
				40	patchfile = common.MakeTempFile(prefix='patch-')
				41
				42	cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
				43	cmd.extend([srcfile, tgtfile, patchfile])
				44
				45	# Don't dump the bsdiff/imgdiff commands, which are not useful for the case
				46	# here, since they contain temp filenames only.
				47	p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
				48	stderr=subprocess.STDOUT)
				49	output, _ = p.communicate()
				50
				51	if p.returncode != 0:
				52	raise ValueError(output)
				53
				54	with open(patchfile, 'rb') as f:
				55	return f.read()
				56
				57
				58	class Image(object):
				59	def RangeSha1(self, ranges):
				60	raise NotImplementedError
				61
				62	def ReadRangeSet(self, ranges):
				63	raise NotImplementedError
				64
				65	def TotalSha1(self, include_clobbered_blocks=False):
				66	raise NotImplementedError
				67
				68	def WriteRangeDataToFd(self, ranges, fd):
				69	raise NotImplementedError
				70
				71
				72	class EmptyImage(Image):
				73	"""A zero-length image."""
				74
				75	def __init__(self):
				76	self.blocksize = 4096
				77	self.care_map = RangeSet()
				78	self.clobbered_blocks = RangeSet()
				79	self.extended = RangeSet()
				80	self.total_blocks = 0
				81	self.file_map = {}
				82
				83	def RangeSha1(self, ranges):
				84	return sha1().hexdigest()
				85
				86	def ReadRangeSet(self, ranges):
				87	return ()
				88
				89	def TotalSha1(self, include_clobbered_blocks=False):
				90	# EmptyImage always carries empty clobbered_blocks, so
				91	# include_clobbered_blocks can be ignored.
				92	assert self.clobbered_blocks.size() == 0
				93	return sha1().hexdigest()
				94
				95	def WriteRangeDataToFd(self, ranges, fd):
				96	raise ValueError("Can't write data from EmptyImage to file")
				97
				98
				99	class DataImage(Image):
				100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
				109	padded = False
				110	if partial > 0:
				111	if trim:
				112	self.data = self.data[:-partial]
				113	elif pad:
				114	self.data += '\0' * (self.blocksize - partial)
				115	padded = True
				116	else:
				117	raise ValueError(("data for DataImage must be multiple of %d bytes "
				118	"unless trim or pad is specified") %
				119	(self.blocksize,))
				120
				121	assert len(self.data) % self.blocksize == 0
				122
				123	self.total_blocks = len(self.data) / self.blocksize
				124	self.care_map = RangeSet(data=(0, self.total_blocks))
				125	# When the last block is padded, we always write the whole block even for
				126	# incremental OTAs. Because otherwise the last block may get skipped if
				127	# unchanged for an incremental, but would fail the post-install
				128	# verification if it has non-zero contents in the padding bytes.
				129	# Bug: 23828506
				130	if padded:
				131	clobbered_blocks = [self.total_blocks-1, self.total_blocks]
				132	else:
				133	clobbered_blocks = []
				134	self.clobbered_blocks = clobbered_blocks
				135	self.extended = RangeSet()
				136
				137	zero_blocks = []
				138	nonzero_blocks = []
				139	reference = '\0' * self.blocksize
				140
				141	for i in range(self.total_blocks-1 if padded else self.total_blocks):
				142	d = self.data[iself.blocksize : (i+1)self.blocksize]
				143	if d == reference:
				144	zero_blocks.append(i)
				145	zero_blocks.append(i+1)
				146	else:
				147	nonzero_blocks.append(i)
				148	nonzero_blocks.append(i+1)
				149
				150	assert zero_blocks or nonzero_blocks or clobbered_blocks
				151
				152	self.file_map = dict()
				153	if zero_blocks:
				154	self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
				155	if nonzero_blocks:
				156	self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
				157	if clobbered_blocks:
				158	self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
				159
				160	def _GetRangeData(self, ranges):
				161	for s, e in ranges:
				162	yield self.data[sself.blocksize:eself.blocksize]
				163
				164	def RangeSha1(self, ranges):
				165	h = sha1()
				166	for data in self._GetRangeData(ranges):
				167	h.update(data)
				168	return h.hexdigest()
				169
				170	def ReadRangeSet(self, ranges):
				171	return [self._GetRangeData(ranges)]
				172
				173	def TotalSha1(self, include_clobbered_blocks=False):
				174	if not include_clobbered_blocks:
				175	return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
				176	else:
				177	return sha1(self.data).hexdigest()
				178
				179	def WriteRangeDataToFd(self, ranges, fd):
				180	for data in self._GetRangeData(ranges):
				181	fd.write(data)
				182
				183
				184	class Transfer(object):
				185	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
				186	src_sha1, style, by_id):
				187	self.tgt_name = tgt_name
				188	self.src_name = src_name
				189	self.tgt_ranges = tgt_ranges
				190	self.src_ranges = src_ranges
				191	self.tgt_sha1 = tgt_sha1
				192	self.src_sha1 = src_sha1
				193	self.style = style
				194
				195	# We use OrderedDict rather than dict so that the output is repeatable;
				196	# otherwise it would depend on the hash values of the Transfer objects.
				197	self.goes_before = OrderedDict()
				198	self.goes_after = OrderedDict()
				199
				200	self.stash_before = []
				201	self.use_stash = []
				202
				203	self.id = len(by_id)
				204	by_id.append(self)
				205
				206	self._patch = None
				207
				208	@property
				209	def patch(self):
				210	return self._patch
				211
				212	@patch.setter
				213	def patch(self, patch):
				214	if patch:
				215	assert self.style == "diff"
				216	self._patch = patch
				217
				218	def NetStashChange(self):
				219	return (sum(sr.size() for (_, sr) in self.stash_before) -
				220	sum(sr.size() for (_, sr) in self.use_stash))
				221
				222	def ConvertToNew(self):
				223	assert self.style != "new"
				224	self.use_stash = []
				225	self.style = "new"
				226	self.src_ranges = RangeSet()
				227	self.patch = None
				228
				229	def __str__(self):
				230	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				231	" to " + str(self.tgt_ranges) + ">")
				232
				233
				234	@functools.total_ordering
				235	class HeapItem(object):
				236	def __init__(self, item):
				237	self.item = item
				238	# Negate the score since python's heap is a min-heap and we want the
				239	# maximum score.
				240	self.score = -item.score
				241
				242	def clear(self):
				243	self.item = None
				244
				245	def __bool__(self):
				246	return self.item is not None
				247
				248	# Python 2 uses __nonzero__, while Python 3 uses __bool__.
				249	__nonzero__ = __bool__
				250
				251	# The rest operations are generated by functools.total_ordering decorator.
				252	def __eq__(self, other):
				253	return self.score == other.score
				254
				255	def __le__(self, other):
				256	return self.score <= other.score
				257
				258
				259	class ImgdiffStats(object):
				260	"""A class that collects imgdiff stats.
				261
				262	It keeps track of the files that will be applied imgdiff while generating
				263	BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
				264	reasons. The stats is only meaningful when imgdiff not being disabled by the
				265	caller of BlockImageDiff. In addition, only files with supported types
				266	(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
				267	"""
				268
				269	USED_IMGDIFF = "APK files diff'd with imgdiff"
				270	USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
				271
				272	# Reasons for not applying imgdiff on APKs.
				273	SKIPPED_TRIMMED = "Not used imgdiff due to trimmed RangeSet"
				274	SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
				275	SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
				276	SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
				277
				278	# The list of valid reasons, which will also be the dumped order in a report.
				279	REASONS = (
				280	USED_IMGDIFF,
				281	USED_IMGDIFF_LARGE_APK,
				282	SKIPPED_TRIMMED,
				283	SKIPPED_NONMONOTONIC,
				284	SKIPPED_SHARED_BLOCKS,
				285	SKIPPED_INCOMPLETE,
				286	)
				287
				288	def __init__(self):
				289	self.stats = {}
				290
				291	def Log(self, filename, reason):
				292	"""Logs why imgdiff can or cannot be applied to the given filename.
				293
				294	Args:
				295	filename: The filename string.
				296	reason: One of the reason constants listed in REASONS.
				297
				298	Raises:
				299	AssertionError: On unsupported filetypes or invalid reason.
				300	"""
				301	assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
				302	assert reason in self.REASONS
				303
				304	if reason not in self.stats:
				305	self.stats[reason] = set()
				306	self.stats[reason].add(filename)
				307
				308	def Report(self):
				309	"""Prints a report of the collected imgdiff stats."""
				310
				311	def print_header(header, separator):
				312	print(header)
				313	print(separator * len(header) + '\n')
				314
				315	print_header(' Imgdiff Stats Report ', '=')
				316	for key in self.REASONS:
				317	if key not in self.stats:
				318	continue
				319	values = self.stats[key]
				320	section_header = ' {} (count: {}) '.format(key, len(values))
				321	print_header(section_header, '-')
				322	print(''.join([' {}\n'.format(name) for name in values]))
				323
				324
				325	# BlockImageDiff works on two image objects. An image object is
				326	# anything that provides the following attributes:
				327	#
				328	# blocksize: the size in bytes of a block, currently must be 4096.
				329	#
				330	# total_blocks: the total size of the partition/image, in blocks.
				331	#
				332	# care_map: a RangeSet containing which blocks (in the range [0,
				333	# total_blocks) we actually care about; i.e. which blocks contain
				334	# data.
				335	#
				336	# file_map: a dict that partitions the blocks contained in care_map
				337	# into smaller domains that are useful for doing diffs on.
				338	# (Typically a domain is a file, and the key in file_map is the
				339	# pathname.)
				340	#
				341	# clobbered_blocks: a RangeSet containing which blocks contain data
				342	# but may be altered by the FS. They need to be excluded when
				343	# verifying the partition integrity.
				344	#
				345	# ReadRangeSet(): a function that takes a RangeSet and returns the
				346	# data contained in the image blocks of that RangeSet. The data
				347	# is returned as a list or tuple of strings; concatenating the
				348	# elements together should produce the requested data.
				349	# Implementations are free to break up the data into list/tuple
				350	# elements in any way that is convenient.
				351	#
				352	# RangeSha1(): a function that returns (as a hex string) the SHA-1
				353	# hash of all the data in the specified range.
				354	#
				355	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				356	# hash of all the data in the image (ie, all the blocks in the
				357	# care_map minus clobbered_blocks, or including the clobbered
				358	# blocks if include_clobbered_blocks is True).
				359	#
				360	# When creating a BlockImageDiff, the src image may be None, in which
				361	# case the list of transfers produced will never read from the
				362	# original image.
				363
				364	class BlockImageDiff(object):
				365	def __init__(self, tgt, src=None, threads=None, version=4,
				366	disable_imgdiff=False):
				367	if threads is None:
				368	threads = multiprocessing.cpu_count() // 2
				369	if threads == 0:
				370	threads = 1
				371	self.threads = threads
				372	self.version = version
				373	self.transfers = []
				374	self.src_basenames = {}
				375	self.src_numpatterns = {}
				376	self._max_stashed_size = 0
				377	self.touched_src_ranges = RangeSet()
				378	self.touched_src_sha1 = None
				379	self.disable_imgdiff = disable_imgdiff
				380	self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
				381
				382	assert version in (3, 4)
				383
				384	self.tgt = tgt
				385	if src is None:
				386	src = EmptyImage()
				387	self.src = src
				388
				389	# The updater code that installs the patch always uses 4k blocks.
				390	assert tgt.blocksize == 4096
				391	assert src.blocksize == 4096
				392
				393	# The range sets in each filemap should comprise a partition of
				394	# the care map.
				395	self.AssertPartition(src.care_map, src.file_map.values())
				396	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				397
				398	@property
				399	def max_stashed_size(self):
				400	return self._max_stashed_size
				401
				402	@staticmethod
				403	def FileTypeSupportedByImgdiff(filename):
				404	"""Returns whether the file type is supported by imgdiff."""
				405	return filename.lower().endswith(('.apk', '.jar', '.zip'))
				406
				407	def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
				408	"""Checks whether we can apply imgdiff for the given RangeSets.
				409
				410	For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
				411	'imgdiff -z' if possible. Because it usually produces significantly smaller
				412	patches than bsdiff.
				413
				414	This is permissible if all of the following conditions hold.
				415	- The imgdiff hasn't been disabled by the caller (e.g. squashfs);
				416	- The file type is supported by imgdiff;
				417	- The source and target blocks are monotonic (i.e. the data is stored with
				418	blocks in increasing order);
				419	- Both files don't contain shared blocks;
				420	- Both files have complete lists of blocks;
				421	- We haven't removed any blocks from the source set.
				422
				423	If all these conditions are satisfied, concatenating all the blocks in the
				424	RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
				425	in the last block). imgdiff is fine with extra zeros at the end of the file.
				426
				427	Args:
				428	name: The filename to be diff'd.
				429	tgt_ranges: The target RangeSet.
				430	src_ranges: The source RangeSet.
				431	large_apk: Whether this is to split a large APK.
				432
				433	Returns:
				434	A boolean result.
				435	"""
				436	if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
				437	return False
				438
				439	if not tgt_ranges.monotonic or not src_ranges.monotonic:
				440	self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
				441	return False
				442
				443	if (tgt_ranges.extra.get('uses_shared_blocks') or
				444	src_ranges.extra.get('uses_shared_blocks')):
				445	self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
				446	return False
				447
				448	if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
				449	self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
				450	return False
				451
				452	if tgt_ranges.extra.get('trimmed') or src_ranges.extra.get('trimmed'):
				453	self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_TRIMMED)
				454	return False
				455
				456	reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
				457	else ImgdiffStats.USED_IMGDIFF)
				458	self.imgdiff_stats.Log(name, reason)
				459	return True
				460
				461	def Compute(self, prefix):
				462	# When looking for a source file to use as the diff input for a
				463	# target file, we try:
				464	# 1) an exact path match if available, otherwise
				465	# 2) a exact basename match if available, otherwise
				466	# 3) a basename match after all runs of digits are replaced by
				467	# "#" if available, otherwise
				468	# 4) we have no source for this target.
				469	self.AbbreviateSourceNames()
				470	self.FindTransfers()
				471
				472	# Find the ordering dependencies among transfers (this is O(n^2)
				473	# in the number of transfers).
				474	self.GenerateDigraph()
				475	# Find a sequence of transfers that satisfies as many ordering
				476	# dependencies as possible (heuristically).
				477	self.FindVertexSequence()
				478	# Fix up the ordering dependencies that the sequence didn't
				479	# satisfy.
				480	self.ReverseBackwardEdges()
				481	self.ImproveVertexSequence()
				482
				483	# Ensure the runtime stash size is under the limit.
				484	if common.OPTIONS.cache_size is not None:
				485	self.ReviseStashSize()
				486
				487	# Double-check our work.
				488	self.AssertSequenceGood()
				489	self.AssertSha1Good()
				490
				491	self.ComputePatches(prefix)
				492	self.WriteTransfers(prefix)
				493
				494	# Report the imgdiff stats.
				495	if common.OPTIONS.verbose and not self.disable_imgdiff:
				496	self.imgdiff_stats.Report()
				497
				498	def WriteTransfers(self, prefix):
				499	def WriteSplitTransfers(out, style, target_blocks):
				500	"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
				501
				502	This prevents the target size of one command from being too large; and
				503	might help to avoid fsync errors on some devices."""
				504
				505	assert style == "new" or style == "zero"
				506	blocks_limit = 1024
				507	total = 0
				508	while target_blocks:
				509	blocks_to_write = target_blocks.first(blocks_limit)
				510	out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
				511	total += blocks_to_write.size()
				512	target_blocks = target_blocks.subtract(blocks_to_write)
				513	return total
				514
				515	out = []
				516	total = 0
				517
				518	# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
				519	# id. 'stashes' records the map from 'hash' to the ref count. The stash
				520	# will be freed only if the count decrements to zero.
				521	stashes = {}
				522	stashed_blocks = 0
				523	max_stashed_blocks = 0
				524
				525	for xf in self.transfers:
				526
				527	for _, sr in xf.stash_before:
				528	sh = self.src.RangeSha1(sr)
				529	if sh in stashes:
				530	stashes[sh] += 1
				531	else:
				532	stashes[sh] = 1
				533	stashed_blocks += sr.size()
				534	self.touched_src_ranges = self.touched_src_ranges.union(sr)
				535	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
				536
				537	if stashed_blocks > max_stashed_blocks:
				538	max_stashed_blocks = stashed_blocks
				539
				540	free_string = []
				541	free_size = 0
				542
				543	# <# blocks> <src ranges>
				544	# OR
				545	# <# blocks> <src ranges> <src locs> <stash refs...>
				546	# OR
				547	# <# blocks> - <stash refs...>
				548
				549	size = xf.src_ranges.size()
				550	src_str_buffer = [str(size)]
				551
				552	unstashed_src_ranges = xf.src_ranges
				553	mapped_stashes = []
				554	for _, sr in xf.use_stash:
				555	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
				556	sh = self.src.RangeSha1(sr)
				557	sr = xf.src_ranges.map_within(sr)
				558	mapped_stashes.append(sr)
				559	assert sh in stashes
				560	src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
				561	stashes[sh] -= 1
				562	if stashes[sh] == 0:
				563	free_string.append("free %s\n" % (sh,))
				564	free_size += sr.size()
				565	stashes.pop(sh)
				566
				567	if unstashed_src_ranges:
				568	src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
				569	if xf.use_stash:
				570	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
				571	src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
				572	mapped_stashes.append(mapped_unstashed)
				573	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				574	else:
				575	src_str_buffer.insert(1, "-")
				576	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				577
				578	src_str = " ".join(src_str_buffer)
				579
				580	# version 3+:
				581	# zero <rangeset>
				582	# new <rangeset>
				583	# erase <rangeset>
				584	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				585	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				586	# move hash <tgt rangeset> <src_str>
				587
				588	tgt_size = xf.tgt_ranges.size()
				589
				590	if xf.style == "new":
				591	assert xf.tgt_ranges
				592	assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
				593	total += tgt_size
				594	elif xf.style == "move":
				595	assert xf.tgt_ranges
				596	assert xf.src_ranges.size() == tgt_size
				597	if xf.src_ranges != xf.tgt_ranges:
				598	# take into account automatic stashing of overlapping blocks
				599	if xf.src_ranges.overlaps(xf.tgt_ranges):
				600	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
				601	if temp_stash_usage > max_stashed_blocks:
				602	max_stashed_blocks = temp_stash_usage
				603
				604	self.touched_src_ranges = self.touched_src_ranges.union(
				605	xf.src_ranges)
				606
				607	out.append("%s %s %s %s\n" % (
				608	xf.style,
				609	xf.tgt_sha1,
				610	xf.tgt_ranges.to_string_raw(), src_str))
				611	total += tgt_size
				612	elif xf.style in ("bsdiff", "imgdiff"):
				613	assert xf.tgt_ranges
				614	assert xf.src_ranges
				615	# take into account automatic stashing of overlapping blocks
				616	if xf.src_ranges.overlaps(xf.tgt_ranges):
				617	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
				618	if temp_stash_usage > max_stashed_blocks:
				619	max_stashed_blocks = temp_stash_usage
				620
				621	self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
				622
				623	out.append("%s %d %d %s %s %s %s\n" % (
				624	xf.style,
				625	xf.patch_start, xf.patch_len,
				626	xf.src_sha1,
				627	xf.tgt_sha1,
				628	xf.tgt_ranges.to_string_raw(), src_str))
				629	total += tgt_size
				630	elif xf.style == "zero":
				631	assert xf.tgt_ranges
				632	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				633	assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
				634	total += to_zero.size()
				635	else:
				636	raise ValueError("unknown transfer style '%s'\n" % xf.style)
				637
				638	if free_string:
				639	out.append("".join(free_string))
				640	stashed_blocks -= free_size
				641
				642	if common.OPTIONS.cache_size is not None:
				643	# Sanity check: abort if we're going to need more stash space than
				644	# the allowed size (cache_size * threshold). There are two purposes
				645	# of having a threshold here. a) Part of the cache may have been
				646	# occupied by some recovery logs. b) It will buy us some time to deal
				647	# with the oversize issue.
				648	cache_size = common.OPTIONS.cache_size
				649	stash_threshold = common.OPTIONS.stash_threshold
				650	max_allowed = cache_size * stash_threshold
				651	assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
				652	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				653	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				654	self.tgt.blocksize, max_allowed, cache_size,
				655	stash_threshold)
				656
				657	self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
				658
				659	# Zero out extended blocks as a workaround for bug 20881595.
				660	if self.tgt.extended:
				661	assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
				662	self.tgt.extended.size())
				663	total += self.tgt.extended.size()
				664
				665	# We erase all the blocks on the partition that a) don't contain useful
				666	# data in the new image; b) will not be touched by dm-verity. Out of those
				667	# blocks, we erase the ones that won't be used in this update at the
				668	# beginning of an update. The rest would be erased at the end. This is to
				669	# work around the eMMC issue observed on some devices, which may otherwise
				670	# get starving for clean blocks and thus fail the update. (b/28347095)
				671	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
				672	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				673	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				674
				675	erase_first = new_dontcare.subtract(self.touched_src_ranges)
				676	if erase_first:
				677	out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
				678
				679	erase_last = new_dontcare.subtract(erase_first)
				680	if erase_last:
				681	out.append("erase %s\n" % (erase_last.to_string_raw(),))
				682
				683	out.insert(0, "%d\n" % (self.version,)) # format version number
				684	out.insert(1, "%d\n" % (total,))
				685	# v3+: the number of stash slots is unused.
				686	out.insert(2, "0\n")
				687	out.insert(3, str(max_stashed_blocks) + "\n")
				688
				689	with open(prefix + ".transfer.list", "wb") as f:
				690	for i in out:
				691	f.write(i)
				692
				693	self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
				694	OPTIONS = common.OPTIONS
				695	if OPTIONS.cache_size is not None:
				696	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				697	print("max stashed blocks: %d (%d bytes), "
				698	"limit: %d bytes (%.2f%%)\n" % (
				699	max_stashed_blocks, self._max_stashed_size, max_allowed,
				700	self._max_stashed_size * 100.0 / max_allowed))
				701	else:
				702	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
				703	max_stashed_blocks, self._max_stashed_size))
				704
				705	def ReviseStashSize(self):
				706	print("Revising stash size...")
				707	stash_map = {}
				708
				709	# Create the map between a stash and its def/use points. For example, for a
				710	# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
				711	for xf in self.transfers:
				712	# Command xf defines (stores) all the stashes in stash_before.
				713	for stash_raw_id, sr in xf.stash_before:
				714	stash_map[stash_raw_id] = (sr, xf)
				715
				716	# Record all the stashes command xf uses.
				717	for stash_raw_id, _ in xf.use_stash:
				718	stash_map[stash_raw_id] += (xf,)
				719
				720	# Compute the maximum blocks available for stash based on /cache size and
				721	# the threshold.
				722	cache_size = common.OPTIONS.cache_size
				723	stash_threshold = common.OPTIONS.stash_threshold
				724	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				725
				726	# See the comments for 'stashes' in WriteTransfers().
				727	stashes = {}
				728	stashed_blocks = 0
				729	new_blocks = 0
				730
				731	# Now go through all the commands. Compute the required stash size on the
				732	# fly. If a command requires excess stash than available, it deletes the
				733	# stash by replacing the command that uses the stash with a "new" command
				734	# instead.
				735	for xf in self.transfers:
				736	replaced_cmds = []
				737
				738	# xf.stash_before generates explicit stash commands.
				739	for stash_raw_id, sr in xf.stash_before:
				740	# Check the post-command stashed_blocks.
				741	stashed_blocks_after = stashed_blocks
				742	sh = self.src.RangeSha1(sr)
				743	if sh not in stashes:
				744	stashed_blocks_after += sr.size()
				745
				746	if stashed_blocks_after > max_allowed:
				747	# We cannot stash this one for a later command. Find out the command
				748	# that will use this stash and replace the command with "new".
				749	use_cmd = stash_map[stash_raw_id][2]
				750	replaced_cmds.append(use_cmd)
				751	print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
				752	else:
				753	# Update the stashes map.
				754	if sh in stashes:
				755	stashes[sh] += 1
				756	else:
				757	stashes[sh] = 1
				758	stashed_blocks = stashed_blocks_after
				759
				760	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				761	# ComputePatches(), they both have the style of "diff".
				762	if xf.style == "diff":
				763	assert xf.tgt_ranges and xf.src_ranges
				764	if xf.src_ranges.overlaps(xf.tgt_ranges):
				765	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				766	replaced_cmds.append(xf)
				767	print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
				768
				769	# Replace the commands in replaced_cmds with "new"s.
				770	for cmd in replaced_cmds:
				771	# It no longer uses any commands in "use_stash". Remove the def points
				772	# for all those stashes.
				773	for stash_raw_id, sr in cmd.use_stash:
				774	def_cmd = stash_map[stash_raw_id][1]
				775	assert (stash_raw_id, sr) in def_cmd.stash_before
				776	def_cmd.stash_before.remove((stash_raw_id, sr))
				777
				778	# Add up blocks that violates space limit and print total number to
				779	# screen later.
				780	new_blocks += cmd.tgt_ranges.size()
				781	cmd.ConvertToNew()
				782
				783	# xf.use_stash may generate free commands.
				784	for _, sr in xf.use_stash:
				785	sh = self.src.RangeSha1(sr)
				786	assert sh in stashes
				787	stashes[sh] -= 1
				788	if stashes[sh] == 0:
				789	stashed_blocks -= sr.size()
				790	stashes.pop(sh)
				791
				792	num_of_bytes = new_blocks * self.tgt.blocksize
				793	print(" Total %d blocks (%d bytes) are packed as new blocks due to "
				794	"insufficient cache size." % (new_blocks, num_of_bytes))
				795	return new_blocks
				796
				797	def ComputePatches(self, prefix):
				798	print("Reticulating splines...")
				799	diff_queue = []
				800	patch_num = 0
				801	with open(prefix + ".new.dat", "wb") as new_f:
				802	for index, xf in enumerate(self.transfers):
				803	if xf.style == "zero":
				804	tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
				805	print("%10d %10d (%6.2f%%) %7s %s %s" % (
				806	tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
				807	str(xf.tgt_ranges)))
				808
				809	elif xf.style == "new":
				810	self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
				811	tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
				812	print("%10d %10d (%6.2f%%) %7s %s %s" % (
				813	tgt_size, tgt_size, 100.0, xf.style,
				814	xf.tgt_name, str(xf.tgt_ranges)))
				815
				816	elif xf.style == "diff":
				817	# We can't compare src and tgt directly because they may have
				818	# the same content but be broken up into blocks differently, eg:
				819	#
				820	# ["he", "llo"] vs ["h", "ello"]
				821	#
				822	# We want those to compare equal, ideally without having to
				823	# actually concatenate the strings (these may be tens of
				824	# megabytes).
				825	if xf.src_sha1 == xf.tgt_sha1:
				826	# These are identical; we don't need to generate a patch,
				827	# just issue copy commands on the device.
				828	xf.style = "move"
				829	xf.patch = None
				830	tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
				831	if xf.src_ranges != xf.tgt_ranges:
				832	print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
				833	tgt_size, tgt_size, 100.0, xf.style,
				834	xf.tgt_name if xf.tgt_name == xf.src_name else (
				835	xf.tgt_name + " (from " + xf.src_name + ")"),
				836	str(xf.tgt_ranges), str(xf.src_ranges)))
				837	else:
				838	if xf.patch:
				839	# We have already generated the patch with imgdiff. Check if the
				840	# transfer is intact.
				841	assert not self.disable_imgdiff
				842	imgdiff = True
				843	if (xf.src_ranges.extra.get('trimmed') or
				844	xf.tgt_ranges.extra.get('trimmed')):
				845	imgdiff = False
				846	xf.patch = None
				847	else:
				848	imgdiff = self.CanUseImgdiff(
				849	xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
				850	xf.style = "imgdiff" if imgdiff else "bsdiff"
				851	diff_queue.append((index, imgdiff, patch_num))
				852	patch_num += 1
				853
				854	else:
				855	assert False, "unknown style " + xf.style
				856
				857	if diff_queue:
				858	if self.threads > 1:
				859	print("Computing patches (using %d threads)..." % (self.threads,))
				860	else:
				861	print("Computing patches...")
				862
				863	diff_total = len(diff_queue)
				864	patches = [None] * diff_total
				865	error_messages = []
				866
				867	# Using multiprocessing doesn't give additional benefits, due to the
				868	# pattern of the code. The diffing work is done by subprocess.call, which
				869	# already runs in a separate process (not affected much by the GIL -
				870	# Global Interpreter Lock). Using multiprocess also requires either a)
				871	# writing the diff input files in the main process before forking, or b)
				872	# reopening the image file (SparseImage) in the worker processes. Doing
				873	# neither of them further improves the performance.
				874	lock = threading.Lock()
				875	def diff_worker():
				876	while True:
				877	with lock:
				878	if not diff_queue:
				879	return
				880	xf_index, imgdiff, patch_index = diff_queue.pop()
				881	xf = self.transfers[xf_index]
				882
				883	if sys.stdout.isatty():
				884	diff_left = len(diff_queue)
				885	progress = (diff_total - diff_left) * 100 / diff_total
				886	# '\033[K' is to clear to EOL.
				887	print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
				888	sys.stdout.flush()
				889
				890	patch = xf.patch
				891	if not patch:
				892	src_ranges = xf.src_ranges
				893	tgt_ranges = xf.tgt_ranges
				894
				895	src_file = common.MakeTempFile(prefix="src-")
				896	with open(src_file, "wb") as fd:
				897	self.src.WriteRangeDataToFd(src_ranges, fd)
				898
				899	tgt_file = common.MakeTempFile(prefix="tgt-")
				900	with open(tgt_file, "wb") as fd:
				901	self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
				902
				903	message = []
				904	try:
				905	patch = compute_patch(src_file, tgt_file, imgdiff)
				906	except ValueError as e:
				907	message.append(
				908	"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
				909	"imgdiff" if imgdiff else "bsdiff",
				910	xf.tgt_name if xf.tgt_name == xf.src_name else
				911	xf.tgt_name + " (from " + xf.src_name + ")",
				912	xf.tgt_ranges, xf.src_ranges, e.message))
				913	if message:
				914	with lock:
				915	error_messages.extend(message)
				916
				917	with lock:
				918	patches[patch_index] = (xf_index, patch)
				919
				920	threads = [threading.Thread(target=diff_worker)
				921	for _ in range(self.threads)]
				922	for th in threads:
				923	th.start()
				924	while threads:
				925	threads.pop().join()
				926
				927	if sys.stdout.isatty():
				928	print('\n')
				929
				930	if error_messages:
				931	print('ERROR:')
				932	print('\n'.join(error_messages))
				933	print('\n\n\n')
				934	sys.exit(1)
				935	else:
				936	patches = []
				937
				938	offset = 0
				939	with open(prefix + ".patch.dat", "wb") as patch_fd:
				940	for index, patch in patches:
				941	xf = self.transfers[index]
				942	xf.patch_len = len(patch)
				943	xf.patch_start = offset
				944	offset += xf.patch_len
				945	patch_fd.write(patch)
				946
				947	if common.OPTIONS.verbose:
				948	tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
				949	print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
				950	xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
				951	xf.style,
				952	xf.tgt_name if xf.tgt_name == xf.src_name else (
				953	xf.tgt_name + " (from " + xf.src_name + ")"),
				954	xf.tgt_ranges, xf.src_ranges))
				955
				956	def AssertSha1Good(self):
				957	"""Check the SHA-1 of the src & tgt blocks in the transfer list.
				958
				959	Double check the SHA-1 value to avoid the issue in b/71908713, where
				960	SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
				961	environment. That specific problem has been fixed by protecting the
				962	underlying generator function 'SparseImage._GetRangeData()' with lock.
				963	"""
				964	for xf in self.transfers:
				965	tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
				966	assert xf.tgt_sha1 == tgt_sha1
				967	if xf.style == "diff":
				968	src_sha1 = self.src.RangeSha1(xf.src_ranges)
				969	assert xf.src_sha1 == src_sha1
				970
				971	def AssertSequenceGood(self):
				972	# Simulate the sequences of transfers we will output, and check that:
				973	# - we never read a block after writing it, and
				974	# - we write every block we care about exactly once.
				975
				976	# Start with no blocks having been touched yet.
				977	touched = array.array("B", "\0" * self.tgt.total_blocks)
				978
				979	# Imagine processing the transfers in order.
				980	for xf in self.transfers:
				981	# Check that the input blocks for this transfer haven't yet been touched.
				982
				983	x = xf.src_ranges
				984	for _, sr in xf.use_stash:
				985	x = x.subtract(sr)
				986
				987	for s, e in x:
				988	# Source image could be larger. Don't check the blocks that are in the
				989	# source image only. Since they are not in 'touched', and won't ever
				990	# be touched.
				991	for i in range(s, min(e, self.tgt.total_blocks)):
				992	assert touched[i] == 0
				993
				994	# Check that the output blocks for this transfer haven't yet
				995	# been touched, and touch all the blocks written by this
				996	# transfer.
				997	for s, e in xf.tgt_ranges:
				998	for i in range(s, e):
				999	assert touched[i] == 0
				1000	touched[i] = 1
				1001
				1002	# Check that we've written every target block.
				1003	for s, e in self.tgt.care_map:
				1004	for i in range(s, e):
				1005	assert touched[i] == 1
				1006
				1007	def ImproveVertexSequence(self):
				1008	print("Improving vertex order...")
				1009
				1010	# At this point our digraph is acyclic; we reversed any edges that
				1011	# were backwards in the heuristically-generated sequence. The
				1012	# previously-generated order is still acceptable, but we hope to
				1013	# find a better order that needs less memory for stashed data.
				1014	# Now we do a topological sort to generate a new vertex order,
				1015	# using a greedy algorithm to choose which vertex goes next
				1016	# whenever we have a choice.
				1017
				1018	# Make a copy of the edge set; this copy will get destroyed by the
				1019	# algorithm.
				1020	for xf in self.transfers:
				1021	xf.incoming = xf.goes_after.copy()
				1022	xf.outgoing = xf.goes_before.copy()
				1023
				1024	L = [] # the new vertex order
				1025
				1026	# S is the set of sources in the remaining graph; we always choose
				1027	# the one that leaves the least amount of stashed data after it's
				1028	# executed.
				1029	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				1030	if not u.incoming]
				1031	heapq.heapify(S)
				1032
				1033	while S:
				1034	_, _, xf = heapq.heappop(S)
				1035	L.append(xf)
				1036	for u in xf.outgoing:
				1037	del u.incoming[xf]
				1038	if not u.incoming:
				1039	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				1040
				1041	# if this fails then our graph had a cycle.
				1042	assert len(L) == len(self.transfers)
				1043
				1044	self.transfers = L
				1045	for i, xf in enumerate(L):
				1046	xf.order = i
				1047
				1048	def RemoveBackwardEdges(self):
				1049	print("Removing backward edges...")
				1050	in_order = 0
				1051	out_of_order = 0
				1052	lost_source = 0
				1053
				1054	for xf in self.transfers:
				1055	lost = 0
				1056	size = xf.src_ranges.size()
				1057	for u in xf.goes_before:
				1058	# xf should go before u
				1059	if xf.order < u.order:
				1060	# it does, hurray!
				1061	in_order += 1
				1062	else:
				1063	# it doesn't, boo. trim the blocks that u writes from xf's
				1064	# source, so that xf can go after u.
				1065	out_of_order += 1
				1066	assert xf.src_ranges.overlaps(u.tgt_ranges)
				1067	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				1068	xf.src_ranges.extra['trimmed'] = True
				1069
				1070	if xf.style == "diff" and not xf.src_ranges:
				1071	# nothing left to diff from; treat as new data
				1072	xf.style = "new"
				1073
				1074	lost = size - xf.src_ranges.size()
				1075	lost_source += lost
				1076
				1077	print((" %d/%d dependencies (%.2f%%) were violated; "
				1078	"%d source blocks removed.") %
				1079	(out_of_order, in_order + out_of_order,
				1080	(out_of_order * 100.0 / (in_order + out_of_order))
				1081	if (in_order + out_of_order) else 0.0,
				1082	lost_source))
				1083
				1084	def ReverseBackwardEdges(self):
				1085	"""Reverse unsatisfying edges and compute pairs of stashed blocks.
				1086
				1087	For each transfer, make sure it properly stashes the blocks it touches and
				1088	will be used by later transfers. It uses pairs of (stash_raw_id, range) to
				1089	record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
				1090	identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
				1091	it is possible to have multiple pairs with different 'stash_raw_id's. Each
				1092	'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
				1093	blocks will be written to the same stash slot in WriteTransfers().
				1094	"""
				1095
				1096	print("Reversing backward edges...")
				1097	in_order = 0
				1098	out_of_order = 0
				1099	stash_raw_id = 0
				1100	stash_size = 0
				1101
				1102	for xf in self.transfers:
				1103	for u in xf.goes_before.copy():
				1104	# xf should go before u
				1105	if xf.order < u.order:
				1106	# it does, hurray!
				1107	in_order += 1
				1108	else:
				1109	# it doesn't, boo. modify u to stash the blocks that it
				1110	# writes that xf wants to read, and then require u to go
				1111	# before xf.
				1112	out_of_order += 1
				1113
				1114	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				1115	assert overlap
				1116
				1117	u.stash_before.append((stash_raw_id, overlap))
				1118	xf.use_stash.append((stash_raw_id, overlap))
				1119	stash_raw_id += 1
				1120	stash_size += overlap.size()
				1121
				1122	# reverse the edge direction; now xf must go after u
				1123	del xf.goes_before[u]
				1124	del u.goes_after[xf]
				1125	xf.goes_after[u] = None # value doesn't matter
				1126	u.goes_before[xf] = None
				1127
				1128	print((" %d/%d dependencies (%.2f%%) were violated; "
				1129	"%d source blocks stashed.") %
				1130	(out_of_order, in_order + out_of_order,
				1131	(out_of_order * 100.0 / (in_order + out_of_order))
				1132	if (in_order + out_of_order) else 0.0,
				1133	stash_size))
				1134
				1135	def FindVertexSequence(self):
				1136	print("Finding vertex sequence...")
				1137
				1138	# This is based on "A Fast & Effective Heuristic for the Feedback
				1139	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				1140	# it as starting with the digraph G and moving all the vertices to
				1141	# be on a horizontal line in some order, trying to minimize the
				1142	# number of edges that end up pointing to the left. Left-pointing
				1143	# edges will get removed to turn the digraph into a DAG. In this
				1144	# case each edge has a weight which is the number of source blocks
				1145	# we'll lose if that edge is removed; we try to minimize the total
				1146	# weight rather than just the number of edges.
				1147
				1148	# Make a copy of the edge set; this copy will get destroyed by the
				1149	# algorithm.
				1150	for xf in self.transfers:
				1151	xf.incoming = xf.goes_after.copy()
				1152	xf.outgoing = xf.goes_before.copy()
				1153	xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
				1154
				1155	# We use an OrderedDict instead of just a set so that the output
				1156	# is repeatable; otherwise it would depend on the hash values of
				1157	# the transfer objects.
				1158	G = OrderedDict()
				1159	for xf in self.transfers:
				1160	G[xf] = None
				1161	s1 = deque() # the left side of the sequence, built from left to right
				1162	s2 = deque() # the right side of the sequence, built from right to left
				1163
				1164	heap = []
				1165	for xf in self.transfers:
				1166	xf.heap_item = HeapItem(xf)
				1167	heap.append(xf.heap_item)
				1168	heapq.heapify(heap)
				1169
				1170	# Use OrderedDict() instead of set() to preserve the insertion order. Need
				1171	# to use 'sinks[key] = None' to add key into the set. sinks will look like
				1172	# { key1: None, key2: None, ... }.
				1173	sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
				1174	sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
				1175
				1176	def adjust_score(iu, delta):
				1177	iu.score += delta
				1178	iu.heap_item.clear()
				1179	iu.heap_item = HeapItem(iu)
				1180	heapq.heappush(heap, iu.heap_item)
				1181
				1182	while G:
				1183	# Put all sinks at the end of the sequence.
				1184	while sinks:
				1185	new_sinks = OrderedDict()
				1186	for u in sinks:
				1187	if u not in G:
				1188	continue
				1189	s2.appendleft(u)
				1190	del G[u]
				1191	for iu in u.incoming:
				1192	adjust_score(iu, -iu.outgoing.pop(u))
				1193	if not iu.outgoing:
				1194	new_sinks[iu] = None
				1195	sinks = new_sinks
				1196
				1197	# Put all the sources at the beginning of the sequence.
				1198	while sources:
				1199	new_sources = OrderedDict()
				1200	for u in sources:
				1201	if u not in G:
				1202	continue
				1203	s1.append(u)
				1204	del G[u]
				1205	for iu in u.outgoing:
				1206	adjust_score(iu, +iu.incoming.pop(u))
				1207	if not iu.incoming:
				1208	new_sources[iu] = None
				1209	sources = new_sources
				1210
				1211	if not G:
				1212	break
				1213
				1214	# Find the "best" vertex to put next. "Best" is the one that
				1215	# maximizes the net difference in source blocks saved we get by
				1216	# pretending it's a source rather than a sink.
				1217
				1218	while True:
				1219	u = heapq.heappop(heap)
				1220	if u and u.item in G:
				1221	u = u.item
				1222	break
				1223
				1224	s1.append(u)
				1225	del G[u]
				1226	for iu in u.outgoing:
				1227	adjust_score(iu, +iu.incoming.pop(u))
				1228	if not iu.incoming:
				1229	sources[iu] = None
				1230
				1231	for iu in u.incoming:
				1232	adjust_score(iu, -iu.outgoing.pop(u))
				1233	if not iu.outgoing:
				1234	sinks[iu] = None
				1235
				1236	# Now record the sequence in the 'order' field of each transfer,
				1237	# and by rearranging self.transfers to be in the chosen sequence.
				1238
				1239	new_transfers = []
				1240	for x in itertools.chain(s1, s2):
				1241	x.order = len(new_transfers)
				1242	new_transfers.append(x)
				1243	del x.incoming
				1244	del x.outgoing
				1245
				1246	self.transfers = new_transfers
				1247
				1248	def GenerateDigraph(self):
				1249	print("Generating digraph...")
				1250
				1251	# Each item of source_ranges will be:
				1252	# - None, if that block is not used as a source,
				1253	# - an ordered set of transfers.
				1254	source_ranges = []
				1255	for b in self.transfers:
				1256	for s, e in b.src_ranges:
				1257	if e > len(source_ranges):
				1258	source_ranges.extend([None] * (e-len(source_ranges)))
				1259	for i in range(s, e):
				1260	if source_ranges[i] is None:
				1261	source_ranges[i] = OrderedDict.fromkeys([b])
				1262	else:
				1263	source_ranges[i][b] = None
				1264
				1265	for a in self.transfers:
				1266	intersections = OrderedDict()
				1267	for s, e in a.tgt_ranges:
				1268	for i in range(s, e):
				1269	if i >= len(source_ranges):
				1270	break
				1271	# Add all the Transfers in source_ranges[i] to the (ordered) set.
				1272	if source_ranges[i] is not None:
				1273	for j in source_ranges[i]:
				1274	intersections[j] = None
				1275
				1276	for b in intersections:
				1277	if a is b:
				1278	continue
				1279
				1280	# If the blocks written by A are read by B, then B needs to go before A.
				1281	i = a.tgt_ranges.intersect(b.src_ranges)
				1282	if i:
				1283	if b.src_name == "__ZERO":
				1284	# the cost of removing source blocks for the __ZERO domain
				1285	# is (nearly) zero.
				1286	size = 0
				1287	else:
				1288	size = i.size()
				1289	b.goes_before[a] = size
				1290	a.goes_after[b] = size
				1291
				1292	def FindTransfers(self):
				1293	"""Parse the file_map to generate all the transfers."""
				1294
				1295	def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
				1296	src_ranges, style, by_id):
				1297	"""Add one or multiple Transfer()s by splitting large files.
				1298
				1299	For BBOTA v3, we need to stash source blocks for resumable feature.
				1300	However, with the growth of file size and the shrink of the cache
				1301	partition source blocks are too large to be stashed. If a file occupies
				1302	too many blocks, we split it into smaller pieces by getting multiple
				1303	Transfer()s.
				1304
				1305	The downside is that after splitting, we may increase the package size
				1306	since the split pieces don't align well. According to our experiments,
				1307	1/8 of the cache size as the per-piece limit appears to be optimal.
				1308	Compared to the fixed 1024-block limit, it reduces the overall package
				1309	size by 30% for volantis, and 20% for angler and bullhead."""
				1310
				1311	pieces = 0
				1312	while (tgt_ranges.size() > max_blocks_per_transfer and
				1313	src_ranges.size() > max_blocks_per_transfer):
				1314	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1315	src_split_name = "%s-%d" % (src_name, pieces)
				1316	tgt_first = tgt_ranges.first(max_blocks_per_transfer)
				1317	src_first = src_ranges.first(max_blocks_per_transfer)
				1318
				1319	Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
				1320	self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
				1321	style, by_id)
				1322
				1323	tgt_ranges = tgt_ranges.subtract(tgt_first)
				1324	src_ranges = src_ranges.subtract(src_first)
				1325	pieces += 1
				1326
				1327	# Handle remaining blocks.
				1328	if tgt_ranges.size() or src_ranges.size():
				1329	# Must be both non-empty.
				1330	assert tgt_ranges.size() and src_ranges.size()
				1331	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1332	src_split_name = "%s-%d" % (src_name, pieces)
				1333	Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
				1334	self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
				1335	style, by_id)
				1336
				1337	def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
				1338	by_id):
				1339	"""Find all the zip files and split the others with a fixed chunk size.
				1340
				1341	This function will construct a list of zip archives, which will later be
				1342	split by imgdiff to reduce the final patch size. For the other files,
				1343	we will plainly split them based on a fixed chunk size with the potential
				1344	patch size penalty.
				1345	"""
				1346
				1347	assert style == "diff"
				1348
				1349	# Change nothing for small files.
				1350	if (tgt_ranges.size() <= max_blocks_per_transfer and
				1351	src_ranges.size() <= max_blocks_per_transfer):
				1352	Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
				1353	self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
				1354	style, by_id)
				1355	return
				1356
				1357	# Split large APKs with imgdiff, if possible. We're intentionally checking
				1358	# file types one more time (CanUseImgdiff() checks that as well), before
				1359	# calling the costly RangeSha1()s.
				1360	if (self.FileTypeSupportedByImgdiff(tgt_name) and
				1361	self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
				1362	if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
				1363	large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
				1364	return
				1365
				1366	AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
				1367	src_ranges, style, by_id)
				1368
				1369	def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
				1370	split=False):
				1371	"""Wrapper function for adding a Transfer()."""
				1372
				1373	# We specialize diff transfers only (which covers bsdiff/imgdiff/move);
				1374	# otherwise add the Transfer() as is.
				1375	if style != "diff" or not split:
				1376	Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
				1377	self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
				1378	style, by_id)
				1379	return
				1380
				1381	# Handle .odex files specially to analyze the block-wise difference. If
				1382	# most of the blocks are identical with only few changes (e.g. header),
				1383	# we will patch the changed blocks only. This avoids stashing unchanged
				1384	# blocks while patching. We limit the analysis to files without size
				1385	# changes only. This is to avoid sacrificing the OTA generation cost too
				1386	# much.
				1387	if (tgt_name.split(".")[-1].lower() == 'odex' and
				1388	tgt_ranges.size() == src_ranges.size()):
				1389
				1390	# 0.5 threshold can be further tuned. The tradeoff is: if only very
				1391	# few blocks remain identical, we lose the opportunity to use imgdiff
				1392	# that may have better compression ratio than bsdiff.
				1393	crop_threshold = 0.5
				1394
				1395	tgt_skipped = RangeSet()
				1396	src_skipped = RangeSet()
				1397	tgt_size = tgt_ranges.size()
				1398	tgt_changed = 0
				1399	for src_block, tgt_block in zip(src_ranges.next_item(),
				1400	tgt_ranges.next_item()):
				1401	src_rs = RangeSet(str(src_block))
				1402	tgt_rs = RangeSet(str(tgt_block))
				1403	if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
				1404	tgt_skipped = tgt_skipped.union(tgt_rs)
				1405	src_skipped = src_skipped.union(src_rs)
				1406	else:
				1407	tgt_changed += tgt_rs.size()
				1408
				1409	# Terminate early if no clear sign of benefits.
				1410	if tgt_changed > tgt_size * crop_threshold:
				1411	break
				1412
				1413	if tgt_changed < tgt_size * crop_threshold:
				1414	assert tgt_changed + tgt_skipped.size() == tgt_size
				1415	print('%10d %10d (%6.2f%%) %s' % (
				1416	tgt_skipped.size(), tgt_size,
				1417	tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
				1418	AddSplitTransfers(
				1419	"%s-skipped" % (tgt_name,),
				1420	"%s-skipped" % (src_name,),
				1421	tgt_skipped, src_skipped, style, by_id)
				1422
				1423	# Intentionally change the file extension to avoid being imgdiff'd as
				1424	# the files are no longer in their original format.
				1425	tgt_name = "%s-cropped" % (tgt_name,)
				1426	src_name = "%s-cropped" % (src_name,)
				1427	tgt_ranges = tgt_ranges.subtract(tgt_skipped)
				1428	src_ranges = src_ranges.subtract(src_skipped)
				1429
				1430	# Possibly having no changed blocks.
				1431	if not tgt_ranges:
				1432	return
				1433
				1434	# Add the transfer(s).
				1435	AddSplitTransfers(
				1436	tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				1437
				1438	def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
				1439	split_info):
				1440	"""Parse the split_info and return a list of info tuples.
				1441
				1442	Args:
				1443	patch_size: total size of the patch file.
				1444	tgt_ranges: Ranges of the target file within the original image.
				1445	src_ranges: Ranges of the source file within the original image.
				1446	split_info format:
				1447	imgdiff version#
				1448	count of pieces
				1449	<patch_size_1> <tgt_size_1> <src_ranges_1>
				1450	...
				1451	<patch_size_n> <tgt_size_n> <src_ranges_n>
				1452
				1453	Returns:
				1454	[patch_start, patch_len, split_tgt_ranges, split_src_ranges]
				1455	"""
				1456
				1457	version = int(split_info[0])
				1458	assert version == 2
				1459	count = int(split_info[1])
				1460	assert len(split_info) - 2 == count
				1461
				1462	split_info_list = []
				1463	patch_start = 0
				1464	tgt_remain = copy.deepcopy(tgt_ranges)
				1465	# each line has the format <patch_size>, <tgt_size>, <src_ranges>
				1466	for line in split_info[2:]:
				1467	info = line.split()
				1468	assert len(info) == 3
				1469	patch_length = int(info[0])
				1470
				1471	split_tgt_size = int(info[1])
				1472	assert split_tgt_size % 4096 == 0
				1473	assert split_tgt_size / 4096 <= tgt_remain.size()
				1474	split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
				1475	tgt_remain = tgt_remain.subtract(split_tgt_ranges)
				1476
				1477	# Find the split_src_ranges within the image file from its relative
				1478	# position in file.
				1479	split_src_indices = RangeSet.parse_raw(info[2])
				1480	split_src_ranges = RangeSet()
				1481	for r in split_src_indices:
				1482	curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
				1483	assert not split_src_ranges.overlaps(curr_range)
				1484	split_src_ranges = split_src_ranges.union(curr_range)
				1485
				1486	split_info_list.append((patch_start, patch_length,
				1487	split_tgt_ranges, split_src_ranges))
				1488	patch_start += patch_length
				1489
				1490	# Check that the sizes of all the split pieces add up to the final file
				1491	# size for patch and target.
				1492	assert tgt_remain.size() == 0
				1493	assert patch_start == patch_size
				1494	return split_info_list
				1495
				1496	def SplitLargeApks():
				1497	"""Split the large apks files.
				1498
				1499	Example: Chrome.apk will be split into
				1500	src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
				1501	src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
				1502	...
				1503
				1504	After the split, the target pieces are continuous and block aligned; and
				1505	the source pieces are mutually exclusive. During the split, we also
				1506	generate and save the image patch between src-X & tgt-X. This patch will
				1507	be valid because the block ranges of src-X & tgt-X will always stay the
				1508	same afterwards; but there's a chance we don't use the patch if we
				1509	convert the "diff" command into "new" or "move" later.
				1510	"""
				1511
				1512	while True:
				1513	with transfer_lock:
				1514	if not large_apks:
				1515	return
				1516	tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
				1517
				1518	src_file = common.MakeTempFile(prefix="src-")
				1519	tgt_file = common.MakeTempFile(prefix="tgt-")
				1520	with open(src_file, "wb") as src_fd:
				1521	self.src.WriteRangeDataToFd(src_ranges, src_fd)
				1522	with open(tgt_file, "wb") as tgt_fd:
				1523	self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
				1524
				1525	patch_file = common.MakeTempFile(prefix="patch-")
				1526	patch_info_file = common.MakeTempFile(prefix="split_info-")
				1527	cmd = ["imgdiff", "-z",
				1528	"--block-limit={}".format(max_blocks_per_transfer),
				1529	"--split-info=" + patch_info_file,
				1530	src_file, tgt_file, patch_file]
				1531	p = common.Run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
				1532	imgdiff_output, _ = p.communicate()
				1533	assert p.returncode == 0, \
				1534	"Failed to create imgdiff patch between {} and {}:\n{}".format(
				1535	src_name, tgt_name, imgdiff_output)
				1536
				1537	with open(patch_info_file) as patch_info:
				1538	lines = patch_info.readlines()
				1539
				1540	patch_size_total = os.path.getsize(patch_file)
				1541	split_info_list = ParseAndValidateSplitInfo(patch_size_total,
				1542	tgt_ranges, src_ranges,
				1543	lines)
				1544	for index, (patch_start, patch_length, split_tgt_ranges,
				1545	split_src_ranges) in enumerate(split_info_list):
				1546	with open(patch_file) as f:
				1547	f.seek(patch_start)
				1548	patch_content = f.read(patch_length)
				1549
				1550	split_src_name = "{}-{}".format(src_name, index)
				1551	split_tgt_name = "{}-{}".format(tgt_name, index)
				1552	split_large_apks.append((split_tgt_name,
				1553	split_src_name,
				1554	split_tgt_ranges,
				1555	split_src_ranges,
				1556	patch_content))
				1557
				1558	print("Finding transfers...")
				1559
				1560	large_apks = []
				1561	split_large_apks = []
				1562	cache_size = common.OPTIONS.cache_size
				1563	split_threshold = 0.125
				1564	max_blocks_per_transfer = int(cache_size * split_threshold /
				1565	self.tgt.blocksize)
				1566	empty = RangeSet()
				1567	for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
				1568	if tgt_fn == "__ZERO":
				1569	# the special "__ZERO" domain is all the blocks not contained
				1570	# in any file and that are filled with zeros. We have a
				1571	# special transfer style for zero blocks.
				1572	src_ranges = self.src.file_map.get("__ZERO", empty)
				1573	AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				1574	"zero", self.transfers)
				1575	continue
				1576
				1577	elif tgt_fn == "__COPY":
				1578	# "__COPY" domain includes all the blocks not contained in any
				1579	# file and that need to be copied unconditionally to the target.
				1580	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
				1581	continue
				1582
				1583	elif tgt_fn in self.src.file_map:
				1584	# Look for an exact pathname match in the source.
				1585	AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				1586	"diff", self.transfers, True)
				1587	continue
				1588
				1589	b = os.path.basename(tgt_fn)
				1590	if b in self.src_basenames:
				1591	# Look for an exact basename match in the source.
				1592	src_fn = self.src_basenames[b]
				1593	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1594	"diff", self.transfers, True)
				1595	continue
				1596
				1597	b = re.sub("[0-9]+", "#", b)
				1598	if b in self.src_numpatterns:
				1599	# Look for a 'number pattern' match (a basename match after
				1600	# all runs of digits are replaced by "#"). (This is useful
				1601	# for .so files that contain version numbers in the filename
				1602	# that get bumped.)
				1603	src_fn = self.src_numpatterns[b]
				1604	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1605	"diff", self.transfers, True)
				1606	continue
				1607
				1608	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
				1609
				1610	transfer_lock = threading.Lock()
				1611	threads = [threading.Thread(target=SplitLargeApks)
				1612	for _ in range(self.threads)]
				1613	for th in threads:
				1614	th.start()
				1615	while threads:
				1616	threads.pop().join()
				1617
				1618	# Sort the split transfers for large apks to generate a determinate package.
				1619	split_large_apks.sort()
				1620	for (tgt_name, src_name, tgt_ranges, src_ranges,
				1621	patch) in split_large_apks:
				1622	transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
				1623	self.tgt.RangeSha1(tgt_ranges),
				1624	self.src.RangeSha1(src_ranges),
				1625	"diff", self.transfers)
				1626	transfer_split.patch = patch
				1627
				1628	def AbbreviateSourceNames(self):
				1629	for k in self.src.file_map.keys():
				1630	b = os.path.basename(k)
				1631	self.src_basenames[b] = k
				1632	b = re.sub("[0-9]+", "#", b)
				1633	self.src_numpatterns[b] = k
				1634
				1635	@staticmethod
				1636	def AssertPartition(total, seq):
				1637	"""Assert that all the RangeSets in 'seq' form a partition of the
				1638	'total' RangeSet (ie, they are nonintersecting and their union
				1639	equals 'total')."""
				1640
				1641	so_far = RangeSet()
				1642	for i in seq:
				1643	assert not so_far.overlaps(i)
				1644	so_far = so_far.union(i)
				1645	assert so_far == total