blob: 24c5b2de7f31c3c46f807d20688a0a2d5b363aee [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16
17import array
18import copy
19import functools
20import heapq
21import itertools
22import multiprocessing
23import os
24import os.path
25import re
26import subprocess
27import sys
28import threading
29from collections import deque, OrderedDict
30from hashlib import sha1
31
32import common
33from rangelib import RangeSet
34
35
36__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
38
39def compute_patch(srcfile, tgtfile, imgdiff=False):
40 patchfile = common.MakeTempFile(prefix='patch-')
41
42 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
44
45 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
49 output, _ = p.communicate()
50
51 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
55 return f.read()
56
57
58class Image(object):
59 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
62 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
65 def TotalSha1(self, include_clobbered_blocks=False):
66 raise NotImplementedError
67
68 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
71
72class EmptyImage(Image):
73 """A zero-length image."""
74
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
86 def ReadRangeSet(self, ranges):
87 return ()
88
89 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
93 return sha1().hexdigest()
94
95 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
98
99class DataImage(Image):
100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
109 padded = False
110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
115 padded = True
116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
132 else:
133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
135 self.extended = RangeSet()
136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
150 assert zero_blocks or nonzero_blocks or clobbered_blocks
151
152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
159
160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
170 def ReadRangeSet(self, ranges):
171 return [self._GetRangeData(ranges)]
172
173 def TotalSha1(self, include_clobbered_blocks=False):
174 if not include_clobbered_blocks:
175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
176 else:
177 return sha1(self.data).hexdigest()
178
179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
183
184class Transfer(object):
185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
193 self.style = style
194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
199
200 self.stash_before = []
201 self.use_stash = []
202
203 self.id = len(by_id)
204 by_id.append(self)
205
206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
227 self.patch = None
228
229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
240 self.score = -item.score
241
242 def clear(self):
243 self.item = None
244
245 def __bool__(self):
246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
252 def __eq__(self, other):
253 return self.score == other.score
254
255 def __le__(self, other):
256 return self.score <= other.score
257
258
259class ImgdiffStats(object):
260 """A class that collects imgdiff stats.
261
262 It keeps track of the files that will be applied imgdiff while generating
263 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
264 reasons. The stats is only meaningful when imgdiff not being disabled by the
265 caller of BlockImageDiff. In addition, only files with supported types
266 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
267 """
268
269 USED_IMGDIFF = "APK files diff'd with imgdiff"
270 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
271
272 # Reasons for not applying imgdiff on APKs.
273 SKIPPED_TRIMMED = "Not used imgdiff due to trimmed RangeSet"
274 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
275 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
276 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
277
278 # The list of valid reasons, which will also be the dumped order in a report.
279 REASONS = (
280 USED_IMGDIFF,
281 USED_IMGDIFF_LARGE_APK,
282 SKIPPED_TRIMMED,
283 SKIPPED_NONMONOTONIC,
284 SKIPPED_SHARED_BLOCKS,
285 SKIPPED_INCOMPLETE,
286 )
287
288 def __init__(self):
289 self.stats = {}
290
291 def Log(self, filename, reason):
292 """Logs why imgdiff can or cannot be applied to the given filename.
293
294 Args:
295 filename: The filename string.
296 reason: One of the reason constants listed in REASONS.
297
298 Raises:
299 AssertionError: On unsupported filetypes or invalid reason.
300 """
301 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
302 assert reason in self.REASONS
303
304 if reason not in self.stats:
305 self.stats[reason] = set()
306 self.stats[reason].add(filename)
307
308 def Report(self):
309 """Prints a report of the collected imgdiff stats."""
310
311 def print_header(header, separator):
312 print(header)
313 print(separator * len(header) + '\n')
314
315 print_header(' Imgdiff Stats Report ', '=')
316 for key in self.REASONS:
317 if key not in self.stats:
318 continue
319 values = self.stats[key]
320 section_header = ' {} (count: {}) '.format(key, len(values))
321 print_header(section_header, '-')
322 print(''.join([' {}\n'.format(name) for name in values]))
323
324
325# BlockImageDiff works on two image objects. An image object is
326# anything that provides the following attributes:
327#
328# blocksize: the size in bytes of a block, currently must be 4096.
329#
330# total_blocks: the total size of the partition/image, in blocks.
331#
332# care_map: a RangeSet containing which blocks (in the range [0,
333# total_blocks) we actually care about; i.e. which blocks contain
334# data.
335#
336# file_map: a dict that partitions the blocks contained in care_map
337# into smaller domains that are useful for doing diffs on.
338# (Typically a domain is a file, and the key in file_map is the
339# pathname.)
340#
341# clobbered_blocks: a RangeSet containing which blocks contain data
342# but may be altered by the FS. They need to be excluded when
343# verifying the partition integrity.
344#
345# ReadRangeSet(): a function that takes a RangeSet and returns the
346# data contained in the image blocks of that RangeSet. The data
347# is returned as a list or tuple of strings; concatenating the
348# elements together should produce the requested data.
349# Implementations are free to break up the data into list/tuple
350# elements in any way that is convenient.
351#
352# RangeSha1(): a function that returns (as a hex string) the SHA-1
353# hash of all the data in the specified range.
354#
355# TotalSha1(): a function that returns (as a hex string) the SHA-1
356# hash of all the data in the image (ie, all the blocks in the
357# care_map minus clobbered_blocks, or including the clobbered
358# blocks if include_clobbered_blocks is True).
359#
360# When creating a BlockImageDiff, the src image may be None, in which
361# case the list of transfers produced will never read from the
362# original image.
363
364class BlockImageDiff(object):
365 def __init__(self, tgt, src=None, threads=None, version=4,
366 disable_imgdiff=False):
367 if threads is None:
368 threads = multiprocessing.cpu_count() // 2
369 if threads == 0:
370 threads = 1
371 self.threads = threads
372 self.version = version
373 self.transfers = []
374 self.src_basenames = {}
375 self.src_numpatterns = {}
376 self._max_stashed_size = 0
377 self.touched_src_ranges = RangeSet()
378 self.touched_src_sha1 = None
379 self.disable_imgdiff = disable_imgdiff
380 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
381
382 assert version in (3, 4)
383
384 self.tgt = tgt
385 if src is None:
386 src = EmptyImage()
387 self.src = src
388
389 # The updater code that installs the patch always uses 4k blocks.
390 assert tgt.blocksize == 4096
391 assert src.blocksize == 4096
392
393 # The range sets in each filemap should comprise a partition of
394 # the care map.
395 self.AssertPartition(src.care_map, src.file_map.values())
396 self.AssertPartition(tgt.care_map, tgt.file_map.values())
397
398 @property
399 def max_stashed_size(self):
400 return self._max_stashed_size
401
402 @staticmethod
403 def FileTypeSupportedByImgdiff(filename):
404 """Returns whether the file type is supported by imgdiff."""
405 return filename.lower().endswith(('.apk', '.jar', '.zip'))
406
407 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
408 """Checks whether we can apply imgdiff for the given RangeSets.
409
410 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
411 'imgdiff -z' if possible. Because it usually produces significantly smaller
412 patches than bsdiff.
413
414 This is permissible if all of the following conditions hold.
415 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
416 - The file type is supported by imgdiff;
417 - The source and target blocks are monotonic (i.e. the data is stored with
418 blocks in increasing order);
419 - Both files don't contain shared blocks;
420 - Both files have complete lists of blocks;
421 - We haven't removed any blocks from the source set.
422
423 If all these conditions are satisfied, concatenating all the blocks in the
424 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
425 in the last block). imgdiff is fine with extra zeros at the end of the file.
426
427 Args:
428 name: The filename to be diff'd.
429 tgt_ranges: The target RangeSet.
430 src_ranges: The source RangeSet.
431 large_apk: Whether this is to split a large APK.
432
433 Returns:
434 A boolean result.
435 """
436 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
437 return False
438
439 if not tgt_ranges.monotonic or not src_ranges.monotonic:
440 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
441 return False
442
443 if (tgt_ranges.extra.get('uses_shared_blocks') or
444 src_ranges.extra.get('uses_shared_blocks')):
445 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
446 return False
447
448 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
449 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
450 return False
451
452 if tgt_ranges.extra.get('trimmed') or src_ranges.extra.get('trimmed'):
453 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_TRIMMED)
454 return False
455
456 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
457 else ImgdiffStats.USED_IMGDIFF)
458 self.imgdiff_stats.Log(name, reason)
459 return True
460
461 def Compute(self, prefix):
462 # When looking for a source file to use as the diff input for a
463 # target file, we try:
464 # 1) an exact path match if available, otherwise
465 # 2) a exact basename match if available, otherwise
466 # 3) a basename match after all runs of digits are replaced by
467 # "#" if available, otherwise
468 # 4) we have no source for this target.
469 self.AbbreviateSourceNames()
470 self.FindTransfers()
471
472 # Find the ordering dependencies among transfers (this is O(n^2)
473 # in the number of transfers).
474 self.GenerateDigraph()
475 # Find a sequence of transfers that satisfies as many ordering
476 # dependencies as possible (heuristically).
477 self.FindVertexSequence()
478 # Fix up the ordering dependencies that the sequence didn't
479 # satisfy.
480 self.ReverseBackwardEdges()
481 self.ImproveVertexSequence()
482
483 # Ensure the runtime stash size is under the limit.
484 if common.OPTIONS.cache_size is not None:
485 self.ReviseStashSize()
486
487 # Double-check our work.
488 self.AssertSequenceGood()
489 self.AssertSha1Good()
490
491 self.ComputePatches(prefix)
492 self.WriteTransfers(prefix)
493
494 # Report the imgdiff stats.
495 if common.OPTIONS.verbose and not self.disable_imgdiff:
496 self.imgdiff_stats.Report()
497
498 def WriteTransfers(self, prefix):
499 def WriteSplitTransfers(out, style, target_blocks):
500 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
501
502 This prevents the target size of one command from being too large; and
503 might help to avoid fsync errors on some devices."""
504
505 assert style == "new" or style == "zero"
506 blocks_limit = 1024
507 total = 0
508 while target_blocks:
509 blocks_to_write = target_blocks.first(blocks_limit)
510 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
511 total += blocks_to_write.size()
512 target_blocks = target_blocks.subtract(blocks_to_write)
513 return total
514
515 out = []
516 total = 0
517
518 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
519 # id. 'stashes' records the map from 'hash' to the ref count. The stash
520 # will be freed only if the count decrements to zero.
521 stashes = {}
522 stashed_blocks = 0
523 max_stashed_blocks = 0
524
525 for xf in self.transfers:
526
527 for _, sr in xf.stash_before:
528 sh = self.src.RangeSha1(sr)
529 if sh in stashes:
530 stashes[sh] += 1
531 else:
532 stashes[sh] = 1
533 stashed_blocks += sr.size()
534 self.touched_src_ranges = self.touched_src_ranges.union(sr)
535 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
536
537 if stashed_blocks > max_stashed_blocks:
538 max_stashed_blocks = stashed_blocks
539
540 free_string = []
541 free_size = 0
542
543 # <# blocks> <src ranges>
544 # OR
545 # <# blocks> <src ranges> <src locs> <stash refs...>
546 # OR
547 # <# blocks> - <stash refs...>
548
549 size = xf.src_ranges.size()
550 src_str_buffer = [str(size)]
551
552 unstashed_src_ranges = xf.src_ranges
553 mapped_stashes = []
554 for _, sr in xf.use_stash:
555 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
556 sh = self.src.RangeSha1(sr)
557 sr = xf.src_ranges.map_within(sr)
558 mapped_stashes.append(sr)
559 assert sh in stashes
560 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
561 stashes[sh] -= 1
562 if stashes[sh] == 0:
563 free_string.append("free %s\n" % (sh,))
564 free_size += sr.size()
565 stashes.pop(sh)
566
567 if unstashed_src_ranges:
568 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
569 if xf.use_stash:
570 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
571 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
572 mapped_stashes.append(mapped_unstashed)
573 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
574 else:
575 src_str_buffer.insert(1, "-")
576 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
577
578 src_str = " ".join(src_str_buffer)
579
580 # version 3+:
581 # zero <rangeset>
582 # new <rangeset>
583 # erase <rangeset>
584 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
585 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
586 # move hash <tgt rangeset> <src_str>
587
588 tgt_size = xf.tgt_ranges.size()
589
590 if xf.style == "new":
591 assert xf.tgt_ranges
592 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
593 total += tgt_size
594 elif xf.style == "move":
595 assert xf.tgt_ranges
596 assert xf.src_ranges.size() == tgt_size
597 if xf.src_ranges != xf.tgt_ranges:
598 # take into account automatic stashing of overlapping blocks
599 if xf.src_ranges.overlaps(xf.tgt_ranges):
600 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
601 if temp_stash_usage > max_stashed_blocks:
602 max_stashed_blocks = temp_stash_usage
603
604 self.touched_src_ranges = self.touched_src_ranges.union(
605 xf.src_ranges)
606
607 out.append("%s %s %s %s\n" % (
608 xf.style,
609 xf.tgt_sha1,
610 xf.tgt_ranges.to_string_raw(), src_str))
611 total += tgt_size
612 elif xf.style in ("bsdiff", "imgdiff"):
613 assert xf.tgt_ranges
614 assert xf.src_ranges
615 # take into account automatic stashing of overlapping blocks
616 if xf.src_ranges.overlaps(xf.tgt_ranges):
617 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
618 if temp_stash_usage > max_stashed_blocks:
619 max_stashed_blocks = temp_stash_usage
620
621 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
622
623 out.append("%s %d %d %s %s %s %s\n" % (
624 xf.style,
625 xf.patch_start, xf.patch_len,
626 xf.src_sha1,
627 xf.tgt_sha1,
628 xf.tgt_ranges.to_string_raw(), src_str))
629 total += tgt_size
630 elif xf.style == "zero":
631 assert xf.tgt_ranges
632 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
633 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
634 total += to_zero.size()
635 else:
636 raise ValueError("unknown transfer style '%s'\n" % xf.style)
637
638 if free_string:
639 out.append("".join(free_string))
640 stashed_blocks -= free_size
641
642 if common.OPTIONS.cache_size is not None:
643 # Sanity check: abort if we're going to need more stash space than
644 # the allowed size (cache_size * threshold). There are two purposes
645 # of having a threshold here. a) Part of the cache may have been
646 # occupied by some recovery logs. b) It will buy us some time to deal
647 # with the oversize issue.
648 cache_size = common.OPTIONS.cache_size
649 stash_threshold = common.OPTIONS.stash_threshold
650 max_allowed = cache_size * stash_threshold
651 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
652 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
653 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
654 self.tgt.blocksize, max_allowed, cache_size,
655 stash_threshold)
656
657 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
658
659 # Zero out extended blocks as a workaround for bug 20881595.
660 if self.tgt.extended:
661 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
662 self.tgt.extended.size())
663 total += self.tgt.extended.size()
664
665 # We erase all the blocks on the partition that a) don't contain useful
666 # data in the new image; b) will not be touched by dm-verity. Out of those
667 # blocks, we erase the ones that won't be used in this update at the
668 # beginning of an update. The rest would be erased at the end. This is to
669 # work around the eMMC issue observed on some devices, which may otherwise
670 # get starving for clean blocks and thus fail the update. (b/28347095)
671 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
672 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
673 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
674
675 erase_first = new_dontcare.subtract(self.touched_src_ranges)
676 if erase_first:
677 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
678
679 erase_last = new_dontcare.subtract(erase_first)
680 if erase_last:
681 out.append("erase %s\n" % (erase_last.to_string_raw(),))
682
683 out.insert(0, "%d\n" % (self.version,)) # format version number
684 out.insert(1, "%d\n" % (total,))
685 # v3+: the number of stash slots is unused.
686 out.insert(2, "0\n")
687 out.insert(3, str(max_stashed_blocks) + "\n")
688
689 with open(prefix + ".transfer.list", "wb") as f:
690 for i in out:
691 f.write(i)
692
693 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
694 OPTIONS = common.OPTIONS
695 if OPTIONS.cache_size is not None:
696 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
697 print("max stashed blocks: %d (%d bytes), "
698 "limit: %d bytes (%.2f%%)\n" % (
699 max_stashed_blocks, self._max_stashed_size, max_allowed,
700 self._max_stashed_size * 100.0 / max_allowed))
701 else:
702 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
703 max_stashed_blocks, self._max_stashed_size))
704
705 def ReviseStashSize(self):
706 print("Revising stash size...")
707 stash_map = {}
708
709 # Create the map between a stash and its def/use points. For example, for a
710 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
711 for xf in self.transfers:
712 # Command xf defines (stores) all the stashes in stash_before.
713 for stash_raw_id, sr in xf.stash_before:
714 stash_map[stash_raw_id] = (sr, xf)
715
716 # Record all the stashes command xf uses.
717 for stash_raw_id, _ in xf.use_stash:
718 stash_map[stash_raw_id] += (xf,)
719
720 # Compute the maximum blocks available for stash based on /cache size and
721 # the threshold.
722 cache_size = common.OPTIONS.cache_size
723 stash_threshold = common.OPTIONS.stash_threshold
724 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
725
726 # See the comments for 'stashes' in WriteTransfers().
727 stashes = {}
728 stashed_blocks = 0
729 new_blocks = 0
730
731 # Now go through all the commands. Compute the required stash size on the
732 # fly. If a command requires excess stash than available, it deletes the
733 # stash by replacing the command that uses the stash with a "new" command
734 # instead.
735 for xf in self.transfers:
736 replaced_cmds = []
737
738 # xf.stash_before generates explicit stash commands.
739 for stash_raw_id, sr in xf.stash_before:
740 # Check the post-command stashed_blocks.
741 stashed_blocks_after = stashed_blocks
742 sh = self.src.RangeSha1(sr)
743 if sh not in stashes:
744 stashed_blocks_after += sr.size()
745
746 if stashed_blocks_after > max_allowed:
747 # We cannot stash this one for a later command. Find out the command
748 # that will use this stash and replace the command with "new".
749 use_cmd = stash_map[stash_raw_id][2]
750 replaced_cmds.append(use_cmd)
751 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
752 else:
753 # Update the stashes map.
754 if sh in stashes:
755 stashes[sh] += 1
756 else:
757 stashes[sh] = 1
758 stashed_blocks = stashed_blocks_after
759
760 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
761 # ComputePatches(), they both have the style of "diff".
762 if xf.style == "diff":
763 assert xf.tgt_ranges and xf.src_ranges
764 if xf.src_ranges.overlaps(xf.tgt_ranges):
765 if stashed_blocks + xf.src_ranges.size() > max_allowed:
766 replaced_cmds.append(xf)
767 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
768
769 # Replace the commands in replaced_cmds with "new"s.
770 for cmd in replaced_cmds:
771 # It no longer uses any commands in "use_stash". Remove the def points
772 # for all those stashes.
773 for stash_raw_id, sr in cmd.use_stash:
774 def_cmd = stash_map[stash_raw_id][1]
775 assert (stash_raw_id, sr) in def_cmd.stash_before
776 def_cmd.stash_before.remove((stash_raw_id, sr))
777
778 # Add up blocks that violates space limit and print total number to
779 # screen later.
780 new_blocks += cmd.tgt_ranges.size()
781 cmd.ConvertToNew()
782
783 # xf.use_stash may generate free commands.
784 for _, sr in xf.use_stash:
785 sh = self.src.RangeSha1(sr)
786 assert sh in stashes
787 stashes[sh] -= 1
788 if stashes[sh] == 0:
789 stashed_blocks -= sr.size()
790 stashes.pop(sh)
791
792 num_of_bytes = new_blocks * self.tgt.blocksize
793 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
794 "insufficient cache size." % (new_blocks, num_of_bytes))
795 return new_blocks
796
797 def ComputePatches(self, prefix):
798 print("Reticulating splines...")
799 diff_queue = []
800 patch_num = 0
801 with open(prefix + ".new.dat", "wb") as new_f:
802 for index, xf in enumerate(self.transfers):
803 if xf.style == "zero":
804 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
805 print("%10d %10d (%6.2f%%) %7s %s %s" % (
806 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
807 str(xf.tgt_ranges)))
808
809 elif xf.style == "new":
810 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
811 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
812 print("%10d %10d (%6.2f%%) %7s %s %s" % (
813 tgt_size, tgt_size, 100.0, xf.style,
814 xf.tgt_name, str(xf.tgt_ranges)))
815
816 elif xf.style == "diff":
817 # We can't compare src and tgt directly because they may have
818 # the same content but be broken up into blocks differently, eg:
819 #
820 # ["he", "llo"] vs ["h", "ello"]
821 #
822 # We want those to compare equal, ideally without having to
823 # actually concatenate the strings (these may be tens of
824 # megabytes).
825 if xf.src_sha1 == xf.tgt_sha1:
826 # These are identical; we don't need to generate a patch,
827 # just issue copy commands on the device.
828 xf.style = "move"
829 xf.patch = None
830 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
831 if xf.src_ranges != xf.tgt_ranges:
832 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
833 tgt_size, tgt_size, 100.0, xf.style,
834 xf.tgt_name if xf.tgt_name == xf.src_name else (
835 xf.tgt_name + " (from " + xf.src_name + ")"),
836 str(xf.tgt_ranges), str(xf.src_ranges)))
837 else:
838 if xf.patch:
839 # We have already generated the patch with imgdiff. Check if the
840 # transfer is intact.
841 assert not self.disable_imgdiff
842 imgdiff = True
843 if (xf.src_ranges.extra.get('trimmed') or
844 xf.tgt_ranges.extra.get('trimmed')):
845 imgdiff = False
846 xf.patch = None
847 else:
848 imgdiff = self.CanUseImgdiff(
849 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
850 xf.style = "imgdiff" if imgdiff else "bsdiff"
851 diff_queue.append((index, imgdiff, patch_num))
852 patch_num += 1
853
854 else:
855 assert False, "unknown style " + xf.style
856
857 if diff_queue:
858 if self.threads > 1:
859 print("Computing patches (using %d threads)..." % (self.threads,))
860 else:
861 print("Computing patches...")
862
863 diff_total = len(diff_queue)
864 patches = [None] * diff_total
865 error_messages = []
866
867 # Using multiprocessing doesn't give additional benefits, due to the
868 # pattern of the code. The diffing work is done by subprocess.call, which
869 # already runs in a separate process (not affected much by the GIL -
870 # Global Interpreter Lock). Using multiprocess also requires either a)
871 # writing the diff input files in the main process before forking, or b)
872 # reopening the image file (SparseImage) in the worker processes. Doing
873 # neither of them further improves the performance.
874 lock = threading.Lock()
875 def diff_worker():
876 while True:
877 with lock:
878 if not diff_queue:
879 return
880 xf_index, imgdiff, patch_index = diff_queue.pop()
881 xf = self.transfers[xf_index]
882
883 if sys.stdout.isatty():
884 diff_left = len(diff_queue)
885 progress = (diff_total - diff_left) * 100 / diff_total
886 # '\033[K' is to clear to EOL.
887 print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
888 sys.stdout.flush()
889
890 patch = xf.patch
891 if not patch:
892 src_ranges = xf.src_ranges
893 tgt_ranges = xf.tgt_ranges
894
895 src_file = common.MakeTempFile(prefix="src-")
896 with open(src_file, "wb") as fd:
897 self.src.WriteRangeDataToFd(src_ranges, fd)
898
899 tgt_file = common.MakeTempFile(prefix="tgt-")
900 with open(tgt_file, "wb") as fd:
901 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
902
903 message = []
904 try:
905 patch = compute_patch(src_file, tgt_file, imgdiff)
906 except ValueError as e:
907 message.append(
908 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
909 "imgdiff" if imgdiff else "bsdiff",
910 xf.tgt_name if xf.tgt_name == xf.src_name else
911 xf.tgt_name + " (from " + xf.src_name + ")",
912 xf.tgt_ranges, xf.src_ranges, e.message))
913 if message:
914 with lock:
915 error_messages.extend(message)
916
917 with lock:
918 patches[patch_index] = (xf_index, patch)
919
920 threads = [threading.Thread(target=diff_worker)
921 for _ in range(self.threads)]
922 for th in threads:
923 th.start()
924 while threads:
925 threads.pop().join()
926
927 if sys.stdout.isatty():
928 print('\n')
929
930 if error_messages:
931 print('ERROR:')
932 print('\n'.join(error_messages))
933 print('\n\n\n')
934 sys.exit(1)
935 else:
936 patches = []
937
938 offset = 0
939 with open(prefix + ".patch.dat", "wb") as patch_fd:
940 for index, patch in patches:
941 xf = self.transfers[index]
942 xf.patch_len = len(patch)
943 xf.patch_start = offset
944 offset += xf.patch_len
945 patch_fd.write(patch)
946
947 if common.OPTIONS.verbose:
948 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
949 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
950 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
951 xf.style,
952 xf.tgt_name if xf.tgt_name == xf.src_name else (
953 xf.tgt_name + " (from " + xf.src_name + ")"),
954 xf.tgt_ranges, xf.src_ranges))
955
956 def AssertSha1Good(self):
957 """Check the SHA-1 of the src & tgt blocks in the transfer list.
958
959 Double check the SHA-1 value to avoid the issue in b/71908713, where
960 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
961 environment. That specific problem has been fixed by protecting the
962 underlying generator function 'SparseImage._GetRangeData()' with lock.
963 """
964 for xf in self.transfers:
965 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
966 assert xf.tgt_sha1 == tgt_sha1
967 if xf.style == "diff":
968 src_sha1 = self.src.RangeSha1(xf.src_ranges)
969 assert xf.src_sha1 == src_sha1
970
971 def AssertSequenceGood(self):
972 # Simulate the sequences of transfers we will output, and check that:
973 # - we never read a block after writing it, and
974 # - we write every block we care about exactly once.
975
976 # Start with no blocks having been touched yet.
977 touched = array.array("B", "\0" * self.tgt.total_blocks)
978
979 # Imagine processing the transfers in order.
980 for xf in self.transfers:
981 # Check that the input blocks for this transfer haven't yet been touched.
982
983 x = xf.src_ranges
984 for _, sr in xf.use_stash:
985 x = x.subtract(sr)
986
987 for s, e in x:
988 # Source image could be larger. Don't check the blocks that are in the
989 # source image only. Since they are not in 'touched', and won't ever
990 # be touched.
991 for i in range(s, min(e, self.tgt.total_blocks)):
992 assert touched[i] == 0
993
994 # Check that the output blocks for this transfer haven't yet
995 # been touched, and touch all the blocks written by this
996 # transfer.
997 for s, e in xf.tgt_ranges:
998 for i in range(s, e):
999 assert touched[i] == 0
1000 touched[i] = 1
1001
1002 # Check that we've written every target block.
1003 for s, e in self.tgt.care_map:
1004 for i in range(s, e):
1005 assert touched[i] == 1
1006
1007 def ImproveVertexSequence(self):
1008 print("Improving vertex order...")
1009
1010 # At this point our digraph is acyclic; we reversed any edges that
1011 # were backwards in the heuristically-generated sequence. The
1012 # previously-generated order is still acceptable, but we hope to
1013 # find a better order that needs less memory for stashed data.
1014 # Now we do a topological sort to generate a new vertex order,
1015 # using a greedy algorithm to choose which vertex goes next
1016 # whenever we have a choice.
1017
1018 # Make a copy of the edge set; this copy will get destroyed by the
1019 # algorithm.
1020 for xf in self.transfers:
1021 xf.incoming = xf.goes_after.copy()
1022 xf.outgoing = xf.goes_before.copy()
1023
1024 L = [] # the new vertex order
1025
1026 # S is the set of sources in the remaining graph; we always choose
1027 # the one that leaves the least amount of stashed data after it's
1028 # executed.
1029 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1030 if not u.incoming]
1031 heapq.heapify(S)
1032
1033 while S:
1034 _, _, xf = heapq.heappop(S)
1035 L.append(xf)
1036 for u in xf.outgoing:
1037 del u.incoming[xf]
1038 if not u.incoming:
1039 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1040
1041 # if this fails then our graph had a cycle.
1042 assert len(L) == len(self.transfers)
1043
1044 self.transfers = L
1045 for i, xf in enumerate(L):
1046 xf.order = i
1047
1048 def RemoveBackwardEdges(self):
1049 print("Removing backward edges...")
1050 in_order = 0
1051 out_of_order = 0
1052 lost_source = 0
1053
1054 for xf in self.transfers:
1055 lost = 0
1056 size = xf.src_ranges.size()
1057 for u in xf.goes_before:
1058 # xf should go before u
1059 if xf.order < u.order:
1060 # it does, hurray!
1061 in_order += 1
1062 else:
1063 # it doesn't, boo. trim the blocks that u writes from xf's
1064 # source, so that xf can go after u.
1065 out_of_order += 1
1066 assert xf.src_ranges.overlaps(u.tgt_ranges)
1067 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
1068 xf.src_ranges.extra['trimmed'] = True
1069
1070 if xf.style == "diff" and not xf.src_ranges:
1071 # nothing left to diff from; treat as new data
1072 xf.style = "new"
1073
1074 lost = size - xf.src_ranges.size()
1075 lost_source += lost
1076
1077 print((" %d/%d dependencies (%.2f%%) were violated; "
1078 "%d source blocks removed.") %
1079 (out_of_order, in_order + out_of_order,
1080 (out_of_order * 100.0 / (in_order + out_of_order))
1081 if (in_order + out_of_order) else 0.0,
1082 lost_source))
1083
1084 def ReverseBackwardEdges(self):
1085 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1086
1087 For each transfer, make sure it properly stashes the blocks it touches and
1088 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1089 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1090 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1091 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1092 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1093 blocks will be written to the same stash slot in WriteTransfers().
1094 """
1095
1096 print("Reversing backward edges...")
1097 in_order = 0
1098 out_of_order = 0
1099 stash_raw_id = 0
1100 stash_size = 0
1101
1102 for xf in self.transfers:
1103 for u in xf.goes_before.copy():
1104 # xf should go before u
1105 if xf.order < u.order:
1106 # it does, hurray!
1107 in_order += 1
1108 else:
1109 # it doesn't, boo. modify u to stash the blocks that it
1110 # writes that xf wants to read, and then require u to go
1111 # before xf.
1112 out_of_order += 1
1113
1114 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1115 assert overlap
1116
1117 u.stash_before.append((stash_raw_id, overlap))
1118 xf.use_stash.append((stash_raw_id, overlap))
1119 stash_raw_id += 1
1120 stash_size += overlap.size()
1121
1122 # reverse the edge direction; now xf must go after u
1123 del xf.goes_before[u]
1124 del u.goes_after[xf]
1125 xf.goes_after[u] = None # value doesn't matter
1126 u.goes_before[xf] = None
1127
1128 print((" %d/%d dependencies (%.2f%%) were violated; "
1129 "%d source blocks stashed.") %
1130 (out_of_order, in_order + out_of_order,
1131 (out_of_order * 100.0 / (in_order + out_of_order))
1132 if (in_order + out_of_order) else 0.0,
1133 stash_size))
1134
1135 def FindVertexSequence(self):
1136 print("Finding vertex sequence...")
1137
1138 # This is based on "A Fast & Effective Heuristic for the Feedback
1139 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1140 # it as starting with the digraph G and moving all the vertices to
1141 # be on a horizontal line in some order, trying to minimize the
1142 # number of edges that end up pointing to the left. Left-pointing
1143 # edges will get removed to turn the digraph into a DAG. In this
1144 # case each edge has a weight which is the number of source blocks
1145 # we'll lose if that edge is removed; we try to minimize the total
1146 # weight rather than just the number of edges.
1147
1148 # Make a copy of the edge set; this copy will get destroyed by the
1149 # algorithm.
1150 for xf in self.transfers:
1151 xf.incoming = xf.goes_after.copy()
1152 xf.outgoing = xf.goes_before.copy()
1153 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
1154
1155 # We use an OrderedDict instead of just a set so that the output
1156 # is repeatable; otherwise it would depend on the hash values of
1157 # the transfer objects.
1158 G = OrderedDict()
1159 for xf in self.transfers:
1160 G[xf] = None
1161 s1 = deque() # the left side of the sequence, built from left to right
1162 s2 = deque() # the right side of the sequence, built from right to left
1163
1164 heap = []
1165 for xf in self.transfers:
1166 xf.heap_item = HeapItem(xf)
1167 heap.append(xf.heap_item)
1168 heapq.heapify(heap)
1169
1170 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1171 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1172 # { key1: None, key2: None, ... }.
1173 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1174 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
1175
1176 def adjust_score(iu, delta):
1177 iu.score += delta
1178 iu.heap_item.clear()
1179 iu.heap_item = HeapItem(iu)
1180 heapq.heappush(heap, iu.heap_item)
1181
1182 while G:
1183 # Put all sinks at the end of the sequence.
1184 while sinks:
1185 new_sinks = OrderedDict()
1186 for u in sinks:
1187 if u not in G:
1188 continue
1189 s2.appendleft(u)
1190 del G[u]
1191 for iu in u.incoming:
1192 adjust_score(iu, -iu.outgoing.pop(u))
1193 if not iu.outgoing:
1194 new_sinks[iu] = None
1195 sinks = new_sinks
1196
1197 # Put all the sources at the beginning of the sequence.
1198 while sources:
1199 new_sources = OrderedDict()
1200 for u in sources:
1201 if u not in G:
1202 continue
1203 s1.append(u)
1204 del G[u]
1205 for iu in u.outgoing:
1206 adjust_score(iu, +iu.incoming.pop(u))
1207 if not iu.incoming:
1208 new_sources[iu] = None
1209 sources = new_sources
1210
1211 if not G:
1212 break
1213
1214 # Find the "best" vertex to put next. "Best" is the one that
1215 # maximizes the net difference in source blocks saved we get by
1216 # pretending it's a source rather than a sink.
1217
1218 while True:
1219 u = heapq.heappop(heap)
1220 if u and u.item in G:
1221 u = u.item
1222 break
1223
1224 s1.append(u)
1225 del G[u]
1226 for iu in u.outgoing:
1227 adjust_score(iu, +iu.incoming.pop(u))
1228 if not iu.incoming:
1229 sources[iu] = None
1230
1231 for iu in u.incoming:
1232 adjust_score(iu, -iu.outgoing.pop(u))
1233 if not iu.outgoing:
1234 sinks[iu] = None
1235
1236 # Now record the sequence in the 'order' field of each transfer,
1237 # and by rearranging self.transfers to be in the chosen sequence.
1238
1239 new_transfers = []
1240 for x in itertools.chain(s1, s2):
1241 x.order = len(new_transfers)
1242 new_transfers.append(x)
1243 del x.incoming
1244 del x.outgoing
1245
1246 self.transfers = new_transfers
1247
1248 def GenerateDigraph(self):
1249 print("Generating digraph...")
1250
1251 # Each item of source_ranges will be:
1252 # - None, if that block is not used as a source,
1253 # - an ordered set of transfers.
1254 source_ranges = []
1255 for b in self.transfers:
1256 for s, e in b.src_ranges:
1257 if e > len(source_ranges):
1258 source_ranges.extend([None] * (e-len(source_ranges)))
1259 for i in range(s, e):
1260 if source_ranges[i] is None:
1261 source_ranges[i] = OrderedDict.fromkeys([b])
1262 else:
1263 source_ranges[i][b] = None
1264
1265 for a in self.transfers:
1266 intersections = OrderedDict()
1267 for s, e in a.tgt_ranges:
1268 for i in range(s, e):
1269 if i >= len(source_ranges):
1270 break
1271 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1272 if source_ranges[i] is not None:
1273 for j in source_ranges[i]:
1274 intersections[j] = None
1275
1276 for b in intersections:
1277 if a is b:
1278 continue
1279
1280 # If the blocks written by A are read by B, then B needs to go before A.
1281 i = a.tgt_ranges.intersect(b.src_ranges)
1282 if i:
1283 if b.src_name == "__ZERO":
1284 # the cost of removing source blocks for the __ZERO domain
1285 # is (nearly) zero.
1286 size = 0
1287 else:
1288 size = i.size()
1289 b.goes_before[a] = size
1290 a.goes_after[b] = size
1291
1292 def FindTransfers(self):
1293 """Parse the file_map to generate all the transfers."""
1294
1295 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1296 src_ranges, style, by_id):
1297 """Add one or multiple Transfer()s by splitting large files.
1298
1299 For BBOTA v3, we need to stash source blocks for resumable feature.
1300 However, with the growth of file size and the shrink of the cache
1301 partition source blocks are too large to be stashed. If a file occupies
1302 too many blocks, we split it into smaller pieces by getting multiple
1303 Transfer()s.
1304
1305 The downside is that after splitting, we may increase the package size
1306 since the split pieces don't align well. According to our experiments,
1307 1/8 of the cache size as the per-piece limit appears to be optimal.
1308 Compared to the fixed 1024-block limit, it reduces the overall package
1309 size by 30% for volantis, and 20% for angler and bullhead."""
1310
1311 pieces = 0
1312 while (tgt_ranges.size() > max_blocks_per_transfer and
1313 src_ranges.size() > max_blocks_per_transfer):
1314 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1315 src_split_name = "%s-%d" % (src_name, pieces)
1316 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1317 src_first = src_ranges.first(max_blocks_per_transfer)
1318
1319 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1320 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1321 style, by_id)
1322
1323 tgt_ranges = tgt_ranges.subtract(tgt_first)
1324 src_ranges = src_ranges.subtract(src_first)
1325 pieces += 1
1326
1327 # Handle remaining blocks.
1328 if tgt_ranges.size() or src_ranges.size():
1329 # Must be both non-empty.
1330 assert tgt_ranges.size() and src_ranges.size()
1331 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1332 src_split_name = "%s-%d" % (src_name, pieces)
1333 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1334 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1335 style, by_id)
1336
1337 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1338 by_id):
1339 """Find all the zip files and split the others with a fixed chunk size.
1340
1341 This function will construct a list of zip archives, which will later be
1342 split by imgdiff to reduce the final patch size. For the other files,
1343 we will plainly split them based on a fixed chunk size with the potential
1344 patch size penalty.
1345 """
1346
1347 assert style == "diff"
1348
1349 # Change nothing for small files.
1350 if (tgt_ranges.size() <= max_blocks_per_transfer and
1351 src_ranges.size() <= max_blocks_per_transfer):
1352 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1353 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1354 style, by_id)
1355 return
1356
1357 # Split large APKs with imgdiff, if possible. We're intentionally checking
1358 # file types one more time (CanUseImgdiff() checks that as well), before
1359 # calling the costly RangeSha1()s.
1360 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1361 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
1362 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
1363 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1364 return
1365
1366 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1367 src_ranges, style, by_id)
1368
1369 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1370 split=False):
1371 """Wrapper function for adding a Transfer()."""
1372
1373 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1374 # otherwise add the Transfer() as is.
1375 if style != "diff" or not split:
1376 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1377 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1378 style, by_id)
1379 return
1380
1381 # Handle .odex files specially to analyze the block-wise difference. If
1382 # most of the blocks are identical with only few changes (e.g. header),
1383 # we will patch the changed blocks only. This avoids stashing unchanged
1384 # blocks while patching. We limit the analysis to files without size
1385 # changes only. This is to avoid sacrificing the OTA generation cost too
1386 # much.
1387 if (tgt_name.split(".")[-1].lower() == 'odex' and
1388 tgt_ranges.size() == src_ranges.size()):
1389
1390 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1391 # few blocks remain identical, we lose the opportunity to use imgdiff
1392 # that may have better compression ratio than bsdiff.
1393 crop_threshold = 0.5
1394
1395 tgt_skipped = RangeSet()
1396 src_skipped = RangeSet()
1397 tgt_size = tgt_ranges.size()
1398 tgt_changed = 0
1399 for src_block, tgt_block in zip(src_ranges.next_item(),
1400 tgt_ranges.next_item()):
1401 src_rs = RangeSet(str(src_block))
1402 tgt_rs = RangeSet(str(tgt_block))
1403 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1404 tgt_skipped = tgt_skipped.union(tgt_rs)
1405 src_skipped = src_skipped.union(src_rs)
1406 else:
1407 tgt_changed += tgt_rs.size()
1408
1409 # Terminate early if no clear sign of benefits.
1410 if tgt_changed > tgt_size * crop_threshold:
1411 break
1412
1413 if tgt_changed < tgt_size * crop_threshold:
1414 assert tgt_changed + tgt_skipped.size() == tgt_size
1415 print('%10d %10d (%6.2f%%) %s' % (
1416 tgt_skipped.size(), tgt_size,
1417 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1418 AddSplitTransfers(
1419 "%s-skipped" % (tgt_name,),
1420 "%s-skipped" % (src_name,),
1421 tgt_skipped, src_skipped, style, by_id)
1422
1423 # Intentionally change the file extension to avoid being imgdiff'd as
1424 # the files are no longer in their original format.
1425 tgt_name = "%s-cropped" % (tgt_name,)
1426 src_name = "%s-cropped" % (src_name,)
1427 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1428 src_ranges = src_ranges.subtract(src_skipped)
1429
1430 # Possibly having no changed blocks.
1431 if not tgt_ranges:
1432 return
1433
1434 # Add the transfer(s).
1435 AddSplitTransfers(
1436 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1437
1438 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1439 split_info):
1440 """Parse the split_info and return a list of info tuples.
1441
1442 Args:
1443 patch_size: total size of the patch file.
1444 tgt_ranges: Ranges of the target file within the original image.
1445 src_ranges: Ranges of the source file within the original image.
1446 split_info format:
1447 imgdiff version#
1448 count of pieces
1449 <patch_size_1> <tgt_size_1> <src_ranges_1>
1450 ...
1451 <patch_size_n> <tgt_size_n> <src_ranges_n>
1452
1453 Returns:
1454 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1455 """
1456
1457 version = int(split_info[0])
1458 assert version == 2
1459 count = int(split_info[1])
1460 assert len(split_info) - 2 == count
1461
1462 split_info_list = []
1463 patch_start = 0
1464 tgt_remain = copy.deepcopy(tgt_ranges)
1465 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1466 for line in split_info[2:]:
1467 info = line.split()
1468 assert len(info) == 3
1469 patch_length = int(info[0])
1470
1471 split_tgt_size = int(info[1])
1472 assert split_tgt_size % 4096 == 0
1473 assert split_tgt_size / 4096 <= tgt_remain.size()
1474 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1475 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1476
1477 # Find the split_src_ranges within the image file from its relative
1478 # position in file.
1479 split_src_indices = RangeSet.parse_raw(info[2])
1480 split_src_ranges = RangeSet()
1481 for r in split_src_indices:
1482 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1483 assert not split_src_ranges.overlaps(curr_range)
1484 split_src_ranges = split_src_ranges.union(curr_range)
1485
1486 split_info_list.append((patch_start, patch_length,
1487 split_tgt_ranges, split_src_ranges))
1488 patch_start += patch_length
1489
1490 # Check that the sizes of all the split pieces add up to the final file
1491 # size for patch and target.
1492 assert tgt_remain.size() == 0
1493 assert patch_start == patch_size
1494 return split_info_list
1495
1496 def SplitLargeApks():
1497 """Split the large apks files.
1498
1499 Example: Chrome.apk will be split into
1500 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1501 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1502 ...
1503
1504 After the split, the target pieces are continuous and block aligned; and
1505 the source pieces are mutually exclusive. During the split, we also
1506 generate and save the image patch between src-X & tgt-X. This patch will
1507 be valid because the block ranges of src-X & tgt-X will always stay the
1508 same afterwards; but there's a chance we don't use the patch if we
1509 convert the "diff" command into "new" or "move" later.
1510 """
1511
1512 while True:
1513 with transfer_lock:
1514 if not large_apks:
1515 return
1516 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1517
1518 src_file = common.MakeTempFile(prefix="src-")
1519 tgt_file = common.MakeTempFile(prefix="tgt-")
1520 with open(src_file, "wb") as src_fd:
1521 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1522 with open(tgt_file, "wb") as tgt_fd:
1523 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
1524
1525 patch_file = common.MakeTempFile(prefix="patch-")
1526 patch_info_file = common.MakeTempFile(prefix="split_info-")
1527 cmd = ["imgdiff", "-z",
1528 "--block-limit={}".format(max_blocks_per_transfer),
1529 "--split-info=" + patch_info_file,
1530 src_file, tgt_file, patch_file]
1531 p = common.Run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1532 imgdiff_output, _ = p.communicate()
1533 assert p.returncode == 0, \
1534 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1535 src_name, tgt_name, imgdiff_output)
1536
1537 with open(patch_info_file) as patch_info:
1538 lines = patch_info.readlines()
1539
1540 patch_size_total = os.path.getsize(patch_file)
1541 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1542 tgt_ranges, src_ranges,
1543 lines)
1544 for index, (patch_start, patch_length, split_tgt_ranges,
1545 split_src_ranges) in enumerate(split_info_list):
1546 with open(patch_file) as f:
1547 f.seek(patch_start)
1548 patch_content = f.read(patch_length)
1549
1550 split_src_name = "{}-{}".format(src_name, index)
1551 split_tgt_name = "{}-{}".format(tgt_name, index)
1552 split_large_apks.append((split_tgt_name,
1553 split_src_name,
1554 split_tgt_ranges,
1555 split_src_ranges,
1556 patch_content))
1557
1558 print("Finding transfers...")
1559
1560 large_apks = []
1561 split_large_apks = []
1562 cache_size = common.OPTIONS.cache_size
1563 split_threshold = 0.125
1564 max_blocks_per_transfer = int(cache_size * split_threshold /
1565 self.tgt.blocksize)
1566 empty = RangeSet()
1567 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
1568 if tgt_fn == "__ZERO":
1569 # the special "__ZERO" domain is all the blocks not contained
1570 # in any file and that are filled with zeros. We have a
1571 # special transfer style for zero blocks.
1572 src_ranges = self.src.file_map.get("__ZERO", empty)
1573 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1574 "zero", self.transfers)
1575 continue
1576
1577 elif tgt_fn == "__COPY":
1578 # "__COPY" domain includes all the blocks not contained in any
1579 # file and that need to be copied unconditionally to the target.
1580 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
1581 continue
1582
1583 elif tgt_fn in self.src.file_map:
1584 # Look for an exact pathname match in the source.
1585 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1586 "diff", self.transfers, True)
1587 continue
1588
1589 b = os.path.basename(tgt_fn)
1590 if b in self.src_basenames:
1591 # Look for an exact basename match in the source.
1592 src_fn = self.src_basenames[b]
1593 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1594 "diff", self.transfers, True)
1595 continue
1596
1597 b = re.sub("[0-9]+", "#", b)
1598 if b in self.src_numpatterns:
1599 # Look for a 'number pattern' match (a basename match after
1600 # all runs of digits are replaced by "#"). (This is useful
1601 # for .so files that contain version numbers in the filename
1602 # that get bumped.)
1603 src_fn = self.src_numpatterns[b]
1604 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1605 "diff", self.transfers, True)
1606 continue
1607
1608 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
1609
1610 transfer_lock = threading.Lock()
1611 threads = [threading.Thread(target=SplitLargeApks)
1612 for _ in range(self.threads)]
1613 for th in threads:
1614 th.start()
1615 while threads:
1616 threads.pop().join()
1617
1618 # Sort the split transfers for large apks to generate a determinate package.
1619 split_large_apks.sort()
1620 for (tgt_name, src_name, tgt_ranges, src_ranges,
1621 patch) in split_large_apks:
1622 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1623 self.tgt.RangeSha1(tgt_ranges),
1624 self.src.RangeSha1(src_ranges),
1625 "diff", self.transfers)
1626 transfer_split.patch = patch
1627
1628 def AbbreviateSourceNames(self):
1629 for k in self.src.file_map.keys():
1630 b = os.path.basename(k)
1631 self.src_basenames[b] = k
1632 b = re.sub("[0-9]+", "#", b)
1633 self.src_numpatterns[b] = k
1634
1635 @staticmethod
1636 def AssertPartition(total, seq):
1637 """Assert that all the RangeSets in 'seq' form a partition of the
1638 'total' RangeSet (ie, they are nonintersecting and their union
1639 equals 'total')."""
1640
1641 so_far = RangeSet()
1642 for i in seq:
1643 assert not so_far.overlaps(i)
1644 so_far = so_far.union(i)
1645 assert so_far == total