Blame - src/support/ab_tools/scripts/sparse_img.py - T800

blob: 6432ca45a5043fa63fcf1b2c9ec1a4867c17415a [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
				15	import bisect
				16	import os
				17	import struct
				18	import threading
				19	from hashlib import sha1
				20
				21	import rangelib
				22
				23
				24	class SparseImage(object):
				25	"""Wraps a sparse image file into an image object.
				26
				27	Wraps a sparse image file (and optional file map and clobbered_blocks) into
				28	an image object suitable for passing to BlockImageDiff. file_map contains
				29	the mapping between files and their blocks. clobbered_blocks contains the set
				30	of blocks that should be always written to the target regardless of the old
				31	contents (i.e. copying instead of patching). clobbered_blocks should be in
				32	the form of a string like "0" or "0 1-5 8".
				33	"""
				34
				35	def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None,
				36	mode="rb", build_map=True, allow_shared_blocks=False):
				37	self.simg_f = f = open(simg_fn, mode)
				38
				39	header_bin = f.read(28)
				40	header = struct.unpack("<I4H4I", header_bin)
				41
				42	magic = header[0]
				43	major_version = header[1]
				44	minor_version = header[2]
				45	file_hdr_sz = header[3]
				46	chunk_hdr_sz = header[4]
				47	self.blocksize = blk_sz = header[5]
				48	self.total_blocks = total_blks = header[6]
				49	self.total_chunks = total_chunks = header[7]
				50
				51
				52
				53
				54	if magic != 0xED26FF3A:
				55	raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
				56	if major_version != 1 or minor_version != 0:
				57	raise ValueError("I know about version 1.0, but this is version %u.%u" %
				58	(major_version, minor_version))
				59	if file_hdr_sz != 28:
				60	raise ValueError("File header size was expected to be 28, but is %u." %
				61	(file_hdr_sz,))
				62	if chunk_hdr_sz != 12:
				63	raise ValueError("Chunk header size was expected to be 12, but is %u." %
				64	(chunk_hdr_sz,))
				65
				66	print("Total of %u %u-byte output blocks in %u input chunks."
				67	% (total_blks, blk_sz, total_chunks))
				68
				69	if not build_map:
				70	return
				71
				72	pos = 0 # in blocks
				73	care_data = []
				74	self.offset_map = offset_map = []
				75	self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks)
				76
				77	for i in range(total_chunks):
				78	header_bin = f.read(12)
				79	header = struct.unpack("<2H2I", header_bin)
				80	chunk_type = header[0]
				81	chunk_sz = header[2]
				82	total_sz = header[3]
				83	data_sz = total_sz - 12
				84
				85	if chunk_type == 0xCAC1:
				86	if data_sz != (chunk_sz * blk_sz):
				87	raise ValueError(
				88	"Raw chunk input size (%u) does not match output size (%u)" %
				89	(data_sz, chunk_sz * blk_sz))
				90	else:
				91	care_data.append(pos)
				92	care_data.append(pos + chunk_sz)
				93	offset_map.append((pos, chunk_sz, f.tell(), None))
				94	pos += chunk_sz
				95	f.seek(data_sz, os.SEEK_CUR)
				96
				97	elif chunk_type == 0xCAC2:
				98	fill_data = f.read(4)
				99	care_data.append(pos)
				100	care_data.append(pos + chunk_sz)
				101	offset_map.append((pos, chunk_sz, None, fill_data))
				102	pos += chunk_sz
				103
				104	elif chunk_type == 0xCAC3:
				105	if data_sz != 0:
				106	raise ValueError("Don't care chunk input size is non-zero (%u)" %
				107	(data_sz))
				108	else:
				109	pos += chunk_sz
				110
				111	elif chunk_type == 0xCAC4:
				112	raise ValueError("CRC32 chunks are not supported")
				113
				114	else:
				115	raise ValueError("Unknown chunk type 0x%04X not supported" %
				116	(chunk_type,))
				117
				118	self.generator_lock = threading.Lock()
				119
				120
				121	print("care data %s."
				122	% (care_data))
				123
				124	self.care_map = rangelib.RangeSet(care_data)
				125	print("care data %s." % (self.care_map.to_string_raw()))
				126	self.offset_index = [i[0] for i in offset_map]
				127
				128	# Bug: 20881595
				129	# Introduce extended blocks as a workaround for the bug. dm-verity may
				130	# touch blocks that are not in the care_map due to block device
				131	# read-ahead. It will fail if such blocks contain non-zeroes. We zero out
				132	# the extended blocks explicitly to avoid dm-verity failures. 512 blocks
				133	# are the maximum read-ahead we configure for dm-verity block devices.
				134	extended = self.care_map.extend(512)
				135	all_blocks = rangelib.RangeSet(data=(0, self.total_blocks))
				136	extended = extended.intersect(all_blocks).subtract(self.care_map)
				137	self.extended = extended
				138
				139	if file_map_fn:
				140	self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks,
				141	allow_shared_blocks)
				142	else:
				143	self.file_map = {"__DATA": self.care_map}
				144
				145	def AppendFillChunk(self, data, blocks):
				146	f = self.simg_f
				147
				148	# Append a fill chunk
				149	f.seek(0, os.SEEK_END)
				150	f.write(struct.pack("<2H3I", 0xCAC2, 0, blocks, 16, data))
				151
				152	# Update the sparse header
				153	self.total_blocks += blocks
				154	self.total_chunks += 1
				155
				156	f.seek(16, os.SEEK_SET)
				157	f.write(struct.pack("<2I", self.total_blocks, self.total_chunks))
				158
				159	def RangeSha1(self, ranges):
				160	h = sha1()
				161	for data in self._GetRangeData(ranges):
				162	h.update(data)
				163	return h.hexdigest()
				164
				165	def ReadRangeSet(self, ranges):
				166	return [d for d in self._GetRangeData(ranges)]
				167
				168	def TotalSha1(self, include_clobbered_blocks=False):
				169	"""Return the SHA-1 hash of all data in the 'care' regions.
				170
				171	If include_clobbered_blocks is True, it returns the hash including the
				172	clobbered_blocks."""
				173	ranges = self.care_map
				174	if not include_clobbered_blocks:
				175	ranges = ranges.subtract(self.clobbered_blocks)
				176	return self.RangeSha1(ranges)
				177
				178	def WriteRangeDataToFd(self, ranges, fd):
				179	for data in self._GetRangeData(ranges):
				180	fd.write(data)
				181
				182	def _GetRangeData(self, ranges):
				183	"""Generator that produces all the image data in 'ranges'. The
				184	number of individual pieces returned is arbitrary (and in
				185	particular is not necessarily equal to the number of ranges in
				186	'ranges'.
				187
				188	Use a lock to protect the generator so that we will not run two
				189	instances of this generator on the same object simultaneously."""
				190
				191	f = self.simg_f
				192	with self.generator_lock:
				193	for s, e in ranges:
				194	to_read = e-s
				195	idx = bisect.bisect_right(self.offset_index, s) - 1
				196	chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
				197
				198	# for the first chunk we may be starting partway through it.
				199	remain = chunk_len - (s - chunk_start)
				200	this_read = min(remain, to_read)
				201	if filepos is not None:
				202	p = filepos + ((s - chunk_start) * self.blocksize)
				203	f.seek(p, os.SEEK_SET)
				204	yield f.read(this_read * self.blocksize)
				205	else:
				206	yield fill_data * (this_read * (self.blocksize >> 2))
				207	to_read -= this_read
				208
				209	while to_read > 0:
				210	# continue with following chunks if this range spans multiple chunks.
				211	idx += 1
				212	chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
				213	this_read = min(chunk_len, to_read)
				214	if filepos is not None:
				215	f.seek(filepos, os.SEEK_SET)
				216	yield f.read(this_read * self.blocksize)
				217	else:
				218	yield fill_data * (this_read * (self.blocksize >> 2))
				219	to_read -= this_read
				220
				221	def LoadFileBlockMap(self, fn, clobbered_blocks, allow_shared_blocks):
				222	"""Loads the given block map file.
				223
				224	Args:
				225	fn: The filename of the block map file.
				226	clobbered_blocks: A RangeSet instance for the clobbered blocks.
				227	allow_shared_blocks: Whether having shared blocks is allowed.
				228	"""
				229	remaining = self.care_map
				230	self.file_map = out = {}
				231
				232	with open(fn) as f:
				233	for line in f:
				234	fn, ranges = line.split(None, 1)
				235	ranges = rangelib.RangeSet.parse(ranges)
				236
				237	if allow_shared_blocks:
				238	# Find the shared blocks that have been claimed by others.
				239	shared_blocks = ranges.subtract(remaining)
				240	if shared_blocks:
				241	ranges = ranges.subtract(shared_blocks)
				242	if not ranges:
				243	continue
				244
				245	# Tag the entry so that we can skip applying imgdiff on this file.
				246	ranges.extra['uses_shared_blocks'] = True
				247
				248	out[fn] = ranges
				249	assert ranges.size() == ranges.intersect(remaining).size()
				250
				251	# Currently we assume that blocks in clobbered_blocks are not part of
				252	# any file.
				253	assert not clobbered_blocks.overlaps(ranges)
				254	remaining = remaining.subtract(ranges)
				255
				256	remaining = remaining.subtract(clobbered_blocks)
				257
				258	# For all the remaining blocks in the care_map (ie, those that
				259	# aren't part of the data for any file nor part of the clobbered_blocks),
				260	# divide them into blocks that are all zero and blocks that aren't.
				261	# (Zero blocks are handled specially because (1) there are usually
				262	# a lot of them and (2) bsdiff handles files with long sequences of
				263	# repeated bytes especially poorly.)
				264
				265	zero_blocks = []
				266	nonzero_blocks = []
				267	reference = '\0' * self.blocksize
				268
				269	# Workaround for bug 23227672. For squashfs, we don't have a system.map. So
				270	# the whole system image will be treated as a single file. But for some
				271	# unknown bug, the updater will be killed due to OOM when writing back the
				272	# patched image to flash (observed on lenok-userdebug MEA49). Prior to
				273	# getting a real fix, we evenly divide the non-zero blocks into smaller
				274	# groups (currently 1024 blocks or 4MB per group).
				275	# Bug: 23227672
				276	MAX_BLOCKS_PER_GROUP = 1024
				277	nonzero_groups = []
				278
				279	f = self.simg_f
				280	for s, e in remaining:
				281	for b in range(s, e):
				282	idx = bisect.bisect_right(self.offset_index, b) - 1
				283	chunk_start, _, filepos, fill_data = self.offset_map[idx]
				284	if filepos is not None:
				285	filepos += (b-chunk_start) * self.blocksize
				286	f.seek(filepos, os.SEEK_SET)
				287	data = f.read(self.blocksize)
				288	else:
				289	if fill_data == reference[:4]: # fill with all zeros
				290	data = reference
				291	else:
				292	data = None
				293
				294	if data == reference:
				295	zero_blocks.append(b)
				296	zero_blocks.append(b+1)
				297	else:
				298	nonzero_blocks.append(b)
				299	nonzero_blocks.append(b+1)
				300
				301	if len(nonzero_blocks) >= MAX_BLOCKS_PER_GROUP:
				302	nonzero_groups.append(nonzero_blocks)
				303	# Clear the list.
				304	nonzero_blocks = []
				305
				306	if nonzero_blocks:
				307	nonzero_groups.append(nonzero_blocks)
				308	nonzero_blocks = []
				309
				310	assert zero_blocks or nonzero_groups or clobbered_blocks
				311
				312	if zero_blocks:
				313	out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
				314	if nonzero_groups:
				315	for i, blocks in enumerate(nonzero_groups):
				316	out["__NONZERO-%d" % i] = rangelib.RangeSet(data=blocks)
				317	if clobbered_blocks:
				318	out["__COPY"] = clobbered_blocks
				319
				320	def ResetFileMap(self):
				321	"""Throw away the file map and treat the entire image as
				322	undifferentiated data."""
				323	self.file_map = {"__DATA": self.care_map}