blob: 36becf419214f83a9fb7bcb882bd8e3b17cb4a2a [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16
17import heapq
18import itertools
19
20
21__all__ = ["RangeSet"]
22
23
24class RangeSet(object):
25 """A RangeSet represents a set of non-overlapping ranges on integers.
26
27 Attributes:
28 monotonic: Whether the input has all its integers in increasing order.
29 extra: A dict that can be used by the caller, e.g. to store info that's
30 only meaningful to caller.
31 """
32
33 def __init__(self, data=None):
34 self.monotonic = False
35 self._extra = {}
36 if isinstance(data, str):
37 self._parse_internal(data)
38 elif data:
39 assert len(data) % 2 == 0
40 self.data = tuple(self._remove_pairs(data))
41 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
42 else:
43 self.data = ()
44
45 def __iter__(self):
46 for i in range(0, len(self.data), 2):
47 yield self.data[i:i+2]
48
49 def __eq__(self, other):
50 return self.data == other.data
51
52 def __ne__(self, other):
53 return self.data != other.data
54
55 def __nonzero__(self):
56 return bool(self.data)
57
58 def __str__(self):
59 if not self.data:
60 return "empty"
61 else:
62 return self.to_string()
63
64 def __repr__(self):
65 return '<RangeSet("' + self.to_string() + '")>'
66
67 @property
68 def extra(self):
69 return self._extra
70
71 @classmethod
72 def parse(cls, text):
73 """Parses a text string into a RangeSet.
74
75 The input text string consists of a space-separated list of blocks and
76 ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their
77 ends (so the above example represents 18 individual blocks). Returns a
78 RangeSet object.
79
80 If the input has all its blocks in increasing order, then the 'monotonic'
81 attribute of the returned RangeSet will be set to True. For example the
82 input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even
83 though they represent the same set of blocks (and the two RangeSets will
84 compare equal with ==).
85 """
86 return cls(text)
87
88 @classmethod
89 def parse_raw(cls, text):
90 """Parse a string generated by RangeSet.to_string_raw().
91
92 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
93 <RangeSet("0-9")>
94 """
95
96 raw = [int(i) for i in text.split(',')]
97 assert raw[0] == len(raw[1:]), "Invalid raw string."
98
99 return cls(data=raw[1:])
100
101 def _parse_internal(self, text):
102 data = []
103 last = -1
104 monotonic = True
105 for p in text.split():
106 if "-" in p:
107 s, e = (int(x) for x in p.split("-"))
108 data.append(s)
109 data.append(e+1)
110 if last <= s <= e:
111 last = e
112 else:
113 monotonic = False
114 else:
115 s = int(p)
116 data.append(s)
117 data.append(s+1)
118 if last <= s:
119 last = s+1
120 else:
121 monotonic = False
122 data.sort()
123 self.data = tuple(self._remove_pairs(data))
124 self.monotonic = monotonic
125
126 @staticmethod
127 def _remove_pairs(source):
128 """Remove consecutive duplicate items to simplify the result.
129
130 [1, 2, 2, 5, 5, 10] will become [1, 10]."""
131 last = None
132 for i in source:
133 if i == last:
134 last = None
135 else:
136 if last is not None:
137 yield last
138 last = i
139 if last is not None:
140 yield last
141
142 def to_string(self):
143 out = []
144 for i in range(0, len(self.data), 2):
145 s, e = self.data[i:i+2]
146 if e == s+1:
147 out.append(str(s))
148 else:
149 out.append(str(s) + "-" + str(e-1))
150 return " ".join(out)
151
152 def to_string_raw(self):
153 assert self.data
154 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
155
156 def union(self, other):
157 """Return a new RangeSet representing the union of this RangeSet
158 with the argument.
159
160 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
161 <RangeSet("10-34")>
162 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
163 <RangeSet("10-19 22 30-34")>
164 """
165 out = []
166 z = 0
167 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
168 zip(other.data, itertools.cycle((+1, -1)))):
169 if (z == 0 and d == 1) or (z == 1 and d == -1):
170 out.append(p)
171 z += d
172 return RangeSet(data=out)
173
174 def intersect(self, other):
175 """Return a new RangeSet representing the intersection of this
176 RangeSet with the argument.
177
178 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
179 <RangeSet("18-19 30-32")>
180 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
181 <RangeSet("")>
182 """
183 out = []
184 z = 0
185 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
186 zip(other.data, itertools.cycle((+1, -1)))):
187 if (z == 1 and d == 1) or (z == 2 and d == -1):
188 out.append(p)
189 z += d
190 return RangeSet(data=out)
191
192 def subtract(self, other):
193 """Return a new RangeSet representing subtracting the argument
194 from this RangeSet.
195
196 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
197 <RangeSet("10-17 33-34")>
198 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
199 <RangeSet("10-19 30-34")>
200 """
201
202 out = []
203 z = 0
204 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
205 zip(other.data, itertools.cycle((-1, +1)))):
206 if (z == 0 and d == 1) or (z == 1 and d == -1):
207 out.append(p)
208 z += d
209 return RangeSet(data=out)
210
211 def overlaps(self, other):
212 """Returns true if the argument has a nonempty overlap with this
213 RangeSet.
214
215 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
216 True
217 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
218 False
219 """
220
221 # This is like intersect, but we can stop as soon as we discover the
222 # output is going to be nonempty.
223 z = 0
224 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
225 zip(other.data, itertools.cycle((+1, -1)))):
226 if (z == 1 and d == 1) or (z == 2 and d == -1):
227 return True
228 z += d
229 return False
230
231 def size(self):
232 """Returns the total size of the RangeSet (ie, how many integers
233 are in the set).
234
235 >>> RangeSet("10-19 30-34").size()
236 15
237 """
238
239 total = 0
240 for i, p in enumerate(self.data):
241 if i % 2:
242 total += p
243 else:
244 total -= p
245 return total
246
247 def map_within(self, other):
248 """'other' should be a subset of 'self'. Returns a RangeSet
249 representing what 'other' would get translated to if the integers
250 of 'self' were translated down to be contiguous starting at zero.
251
252 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
253 <RangeSet("3-4")>
254 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
255 <RangeSet("3-4")>
256 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
257 <RangeSet("7-12")>
258 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
259 <RangeSet("2-3 7-12")>
260 """
261
262 out = []
263 offset = 0
264 start = None
265 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
266 zip(other.data, itertools.cycle((-1, +1)))):
267 if d == -5:
268 start = p
269 elif d == +5:
270 offset += p-start
271 start = None
272 else:
273 out.append(offset + p - start)
274 return RangeSet(data=out)
275
276 def extend(self, n):
277 """Extend the RangeSet by 'n' blocks.
278
279 The lower bound is guaranteed to be non-negative.
280
281 >>> RangeSet("0-9").extend(1)
282 <RangeSet("0-10")>
283 >>> RangeSet("10-19").extend(15)
284 <RangeSet("0-34")>
285 >>> RangeSet("10-19 30-39").extend(4)
286 <RangeSet("6-23 26-43")>
287 >>> RangeSet("10-19 30-39").extend(10)
288 <RangeSet("0-49")>
289 """
290 out = self
291 for i in range(0, len(self.data), 2):
292 s, e = self.data[i:i+2]
293 s1 = max(0, s - n)
294 e1 = e + n
295 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
296 return out
297
298 def first(self, n):
299 """Return the RangeSet that contains at most the first 'n' integers.
300
301 >>> RangeSet("0-9").first(1)
302 <RangeSet("0")>
303 >>> RangeSet("10-19").first(5)
304 <RangeSet("10-14")>
305 >>> RangeSet("10-19").first(15)
306 <RangeSet("10-19")>
307 >>> RangeSet("10-19 30-39").first(3)
308 <RangeSet("10-12")>
309 >>> RangeSet("10-19 30-39").first(15)
310 <RangeSet("10-19 30-34")>
311 >>> RangeSet("10-19 30-39").first(30)
312 <RangeSet("10-19 30-39")>
313 >>> RangeSet("0-9").first(0)
314 <RangeSet("")>
315 """
316
317 if self.size() <= n:
318 return self
319
320 out = []
321 for s, e in self:
322 if e - s >= n:
323 out += (s, s+n)
324 break
325 else:
326 out += (s, e)
327 n -= e - s
328 return RangeSet(data=out)
329
330 def next_item(self):
331 """Return the next integer represented by the RangeSet.
332
333 >>> list(RangeSet("0-9").next_item())
334 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
335 >>> list(RangeSet("10-19 3-5").next_item())
336 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
337 >>> list(RangeSet("10-19 3 5 7").next_item())
338 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
339 """
340 for s, e in self:
341 for element in range(s, e):
342 yield element
343
344
345if __name__ == "__main__":
346 import doctest
347 doctest.testmod()