blob: 86251ccedfda99afaea5462577f90d938ff686d7 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/* LzFind.c -- Match finder for LZ algorithms
22009-04-22 : Igor Pavlov : Public domain */
3
4#include <string.h>
5
6#include "LzFind.h"
7#include "LzHash.h"
8
9#define kEmptyHashValue 0
10#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
11#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
12#define kNormalizeMask (~(kNormalizeStepMin - 1))
13#define kMaxHistorySize ((UInt32)3 << 30)
14
15#define kStartMaxLen 3
16
17#if 0
18#define DIRECT_INPUT p->directInput
19#else
20#define DIRECT_INPUT 1
21#endif
22
23static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
24{
25 if (!DIRECT_INPUT)
26 {
27 alloc->Free(alloc, p->bufferBase);
28 p->bufferBase = 0;
29 }
30}
31
32/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
33
34static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
35{
36 UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
37 if (DIRECT_INPUT)
38 {
39 p->blockSize = blockSize;
40 return 1;
41 }
42 if (p->bufferBase == 0 || p->blockSize != blockSize)
43 {
44 LzInWindow_Free(p, alloc);
45 p->blockSize = blockSize;
46 p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
47 }
48 return (p->bufferBase != 0);
49}
50
51static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
52static Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
53
54static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
55
56static void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
57{
58 p->posLimit -= subValue;
59 p->pos -= subValue;
60 p->streamPos -= subValue;
61}
62
63static void MatchFinder_ReadBlock(CMatchFinder *p)
64{
65 if (p->streamEndWasReached || p->result != SZ_OK)
66 return;
67 if (DIRECT_INPUT)
68 {
69 UInt32 curSize = 0xFFFFFFFF - p->streamPos;
70 if (curSize > p->directInputRem)
71 curSize = (UInt32)p->directInputRem;
72 p->directInputRem -= curSize;
73 p->streamPos += curSize;
74 if (p->directInputRem == 0)
75 p->streamEndWasReached = 1;
76 return;
77 }
78 for (;;)
79 {
80 Byte *dest = p->buffer + (p->streamPos - p->pos);
81 size_t size = (p->bufferBase + p->blockSize - dest);
82 if (size == 0)
83 return;
84 p->result = p->stream->Read(p->stream, dest, &size);
85 if (p->result != SZ_OK)
86 return;
87 if (size == 0)
88 {
89 p->streamEndWasReached = 1;
90 return;
91 }
92 p->streamPos += (UInt32)size;
93 if (p->streamPos - p->pos > p->keepSizeAfter)
94 return;
95 }
96}
97
98static void MatchFinder_MoveBlock(CMatchFinder *p)
99{
100 memmove(p->bufferBase,
101 p->buffer - p->keepSizeBefore,
102 (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
103 p->buffer = p->bufferBase + p->keepSizeBefore;
104}
105
106static int MatchFinder_NeedMove(CMatchFinder *p)
107{
108 if (DIRECT_INPUT)
109 return 0;
110 /* if (p->streamEndWasReached) return 0; */
111 return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
112}
113
114static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
115{
116 if (MatchFinder_NeedMove(p))
117 MatchFinder_MoveBlock(p);
118 MatchFinder_ReadBlock(p);
119}
120
121static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
122{
123 p->cutValue = 32;
124 p->btMode = 1;
125 p->numHashBytes = 4;
126 p->bigHash = 0;
127}
128
129#define kCrcPoly 0xEDB88320
130
131void MatchFinder_Construct(CMatchFinder *p)
132{
133 UInt32 i;
134 p->bufferBase = 0;
135 p->directInput = 0;
136 p->hash = 0;
137 MatchFinder_SetDefaultSettings(p);
138
139 for (i = 0; i < 256; i++)
140 {
141 UInt32 r = i;
142 int j;
143 for (j = 0; j < 8; j++)
144 r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
145 p->crc[i] = r;
146 }
147}
148
149static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
150{
151 alloc->Free(alloc, p->hash);
152 p->hash = 0;
153}
154
155void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
156{
157 MatchFinder_FreeThisClassMemory(p, alloc);
158 LzInWindow_Free(p, alloc);
159}
160
161static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
162{
163 size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
164 if (sizeInBytes / sizeof(CLzRef) != num)
165 return 0;
166 return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
167}
168
169int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
170 UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
171 ISzAlloc *alloc)
172{
173 UInt32 sizeReserv;
174 if (historySize > kMaxHistorySize)
175 {
176 MatchFinder_Free(p, alloc);
177 return 0;
178 }
179 sizeReserv = historySize >> 1;
180 if (historySize > ((UInt32)2 << 30))
181 sizeReserv = historySize >> 2;
182 sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
183
184 p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
185 p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
186 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
187 if (LzInWindow_Create(p, sizeReserv, alloc))
188 {
189 UInt32 newCyclicBufferSize = historySize + 1;
190 UInt32 hs;
191 p->matchMaxLen = matchMaxLen;
192 {
193 p->fixedHashSize = 0;
194 if (p->numHashBytes == 2)
195 hs = (1 << 16) - 1;
196 else
197 {
198 hs = historySize - 1;
199 hs |= (hs >> 1);
200 hs |= (hs >> 2);
201 hs |= (hs >> 4);
202 hs |= (hs >> 8);
203 hs >>= 1;
204 hs |= 0xFFFF; /* don't change it! It's required for Deflate */
205 if (hs > (1 << 24))
206 {
207 if (p->numHashBytes == 3)
208 hs = (1 << 24) - 1;
209 else
210 hs >>= 1;
211 }
212 }
213 p->hashMask = hs;
214 hs++;
215 if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
216 if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
217 if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
218 hs += p->fixedHashSize;
219 }
220
221 {
222 UInt32 prevSize = p->hashSizeSum + p->numSons;
223 UInt32 newSize;
224 p->historySize = historySize;
225 p->hashSizeSum = hs;
226 p->cyclicBufferSize = newCyclicBufferSize;
227 p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
228 newSize = p->hashSizeSum + p->numSons;
229 if (p->hash != 0 && prevSize == newSize)
230 return 1;
231 MatchFinder_FreeThisClassMemory(p, alloc);
232 p->hash = AllocRefs(newSize, alloc);
233 if (p->hash != 0)
234 {
235 p->son = p->hash + p->hashSizeSum;
236 return 1;
237 }
238 }
239 }
240 MatchFinder_Free(p, alloc);
241 return 0;
242}
243
244static void MatchFinder_SetLimits(CMatchFinder *p)
245{
246 UInt32 limit = kMaxValForNormalize - p->pos;
247 UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
248 if (limit2 < limit)
249 limit = limit2;
250 limit2 = p->streamPos - p->pos;
251 if (limit2 <= p->keepSizeAfter)
252 {
253 if (limit2 > 0)
254 limit2 = 1;
255 }
256 else
257 limit2 -= p->keepSizeAfter;
258 if (limit2 < limit)
259 limit = limit2;
260 {
261 UInt32 lenLimit = p->streamPos - p->pos;
262 if (lenLimit > p->matchMaxLen)
263 lenLimit = p->matchMaxLen;
264 p->lenLimit = lenLimit;
265 }
266 p->posLimit = p->pos + limit;
267}
268
269static void MatchFinder_Init(CMatchFinder *p)
270{
271 UInt32 i;
272 for (i = 0; i < p->hashSizeSum; i++)
273 p->hash[i] = kEmptyHashValue;
274 p->cyclicBufferPos = 0;
275 p->buffer = p->bufferBase;
276 p->pos = p->streamPos = p->cyclicBufferSize;
277 p->result = SZ_OK;
278 p->streamEndWasReached = 0;
279 MatchFinder_ReadBlock(p);
280 MatchFinder_SetLimits(p);
281}
282
283static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
284{
285 return (p->pos - p->historySize - 1) & kNormalizeMask;
286}
287
288static void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
289{
290 UInt32 i;
291 for (i = 0; i < numItems; i++)
292 {
293 UInt32 value = items[i];
294 if (value <= subValue)
295 value = kEmptyHashValue;
296 else
297 value -= subValue;
298 items[i] = value;
299 }
300}
301
302static void MatchFinder_Normalize(CMatchFinder *p)
303{
304 UInt32 subValue = MatchFinder_GetSubValue(p);
305 MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
306 MatchFinder_ReduceOffsets(p, subValue);
307}
308
309static void MatchFinder_CheckLimits(CMatchFinder *p)
310{
311 if (p->pos == kMaxValForNormalize)
312 MatchFinder_Normalize(p);
313 if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
314 MatchFinder_CheckAndMoveAndRead(p);
315 if (p->cyclicBufferPos == p->cyclicBufferSize)
316 p->cyclicBufferPos = 0;
317 MatchFinder_SetLimits(p);
318}
319
320static UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
321 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
322 UInt32 *distances, UInt32 maxLen)
323{
324 CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
325 CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
326 UInt32 len0 = 0, len1 = 0;
327 for (;;)
328 {
329 UInt32 delta = pos - curMatch;
330 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
331 {
332 *ptr0 = *ptr1 = kEmptyHashValue;
333 return distances;
334 }
335 {
336 CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
337 const Byte *pb = cur - delta;
338 UInt32 len = (len0 < len1 ? len0 : len1);
339 if (pb[len] == cur[len])
340 {
341 if (++len != lenLimit && pb[len] == cur[len])
342 while (++len != lenLimit)
343 if (pb[len] != cur[len])
344 break;
345 if (maxLen < len)
346 {
347 *distances++ = maxLen = len;
348 *distances++ = delta - 1;
349 if (len == lenLimit)
350 {
351 *ptr1 = pair[0];
352 *ptr0 = pair[1];
353 return distances;
354 }
355 }
356 }
357 if (pb[len] < cur[len])
358 {
359 *ptr1 = curMatch;
360 ptr1 = pair + 1;
361 curMatch = *ptr1;
362 len1 = len;
363 }
364 else
365 {
366 *ptr0 = curMatch;
367 ptr0 = pair;
368 curMatch = *ptr0;
369 len0 = len;
370 }
371 }
372 }
373}
374
375static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
376 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
377{
378 CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
379 CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
380 UInt32 len0 = 0, len1 = 0;
381 for (;;)
382 {
383 UInt32 delta = pos - curMatch;
384 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
385 {
386 *ptr0 = *ptr1 = kEmptyHashValue;
387 return;
388 }
389 {
390 CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
391 const Byte *pb = cur - delta;
392 UInt32 len = (len0 < len1 ? len0 : len1);
393 if (pb[len] == cur[len])
394 {
395 while (++len != lenLimit)
396 if (pb[len] != cur[len])
397 break;
398 {
399 if (len == lenLimit)
400 {
401 *ptr1 = pair[0];
402 *ptr0 = pair[1];
403 return;
404 }
405 }
406 }
407 if (pb[len] < cur[len])
408 {
409 *ptr1 = curMatch;
410 ptr1 = pair + 1;
411 curMatch = *ptr1;
412 len1 = len;
413 }
414 else
415 {
416 *ptr0 = curMatch;
417 ptr0 = pair;
418 curMatch = *ptr0;
419 len0 = len;
420 }
421 }
422 }
423}
424
425#define MOVE_POS \
426 ++p->cyclicBufferPos; \
427 p->buffer++; \
428 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
429
430static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
431
432#define MOVE_POS_RET MatchFinder_MovePos(p); return offset;
433
434#define GET_MATCHES_HEADER2(minLen, ret_op) \
435 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
436 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
437 cur = p->buffer;
438
439#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
440#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
441
442#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
443
444#define GET_MATCHES_FOOTER(offset, maxLen) \
445 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
446 distances + offset, maxLen) - distances); MOVE_POS_RET;
447
448#define SKIP_FOOTER \
449 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MatchFinder_MovePos(p);
450
451static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
452{
453 UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
454 GET_MATCHES_HEADER(4)
455
456 HASH4_CALC;
457
458 delta2 = p->pos - p->hash[ hash2Value];
459 delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
460 curMatch = p->hash[kFix4HashSize + hashValue];
461
462 p->hash[ hash2Value] =
463 p->hash[kFix3HashSize + hash3Value] =
464 p->hash[kFix4HashSize + hashValue] = p->pos;
465
466 maxLen = 1;
467 offset = 0;
468 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
469 {
470 distances[0] = maxLen = 2;
471 distances[1] = delta2 - 1;
472 offset = 2;
473 }
474 if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
475 {
476 maxLen = 3;
477 distances[offset + 1] = delta3 - 1;
478 offset += 2;
479 delta2 = delta3;
480 }
481 if (offset != 0)
482 {
483 for (; maxLen != lenLimit; maxLen++)
484 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
485 break;
486 distances[offset - 2] = maxLen;
487 if (maxLen == lenLimit)
488 {
489 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
490 MOVE_POS_RET;
491 }
492 }
493 if (maxLen < 3)
494 maxLen = 3;
495 GET_MATCHES_FOOTER(offset, maxLen)
496}
497
498static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
499{
500 do
501 {
502 UInt32 hash2Value, hash3Value;
503 SKIP_HEADER(4)
504 HASH4_CALC;
505 curMatch = p->hash[kFix4HashSize + hashValue];
506 p->hash[ hash2Value] =
507 p->hash[kFix3HashSize + hash3Value] = p->pos;
508 p->hash[kFix4HashSize + hashValue] = p->pos;
509 SKIP_FOOTER
510 }
511 while (--num != 0);
512}
513
514void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
515{
516 vTable->Init = (Mf_Init_Func)MatchFinder_Init;
517 vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
518 vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
519 vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
520 vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
521 vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
522}