| /* | 
 |  * FSE : Finite State Entropy encoder | 
 |  * Copyright (C) 2013-2015, Yann Collet. | 
 |  * | 
 |  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | 
 |  * | 
 |  * Redistribution and use in source and binary forms, with or without | 
 |  * modification, are permitted provided that the following conditions are | 
 |  * met: | 
 |  * | 
 |  *   * Redistributions of source code must retain the above copyright | 
 |  * notice, this list of conditions and the following disclaimer. | 
 |  *   * Redistributions in binary form must reproduce the above | 
 |  * copyright notice, this list of conditions and the following disclaimer | 
 |  * in the documentation and/or other materials provided with the | 
 |  * distribution. | 
 |  * | 
 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 |  * | 
 |  * This program is free software; you can redistribute it and/or modify it under | 
 |  * the terms of the GNU General Public License version 2 as published by the | 
 |  * Free Software Foundation. This program is dual-licensed; you may select | 
 |  * either version 2 of the GNU General Public License ("GPL") or BSD license | 
 |  * ("BSD"). | 
 |  * | 
 |  * You can contact the author at : | 
 |  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | 
 |  */ | 
 |  | 
 | /* ************************************************************** | 
 | *  Compiler specifics | 
 | ****************************************************************/ | 
 | #define FORCE_INLINE static __always_inline | 
 |  | 
 | /* ************************************************************** | 
 | *  Includes | 
 | ****************************************************************/ | 
 | #include "bitstream.h" | 
 | #include "fse.h" | 
 | #include <linux/compiler.h> | 
 | #include <linux/kernel.h> | 
 | #include <linux/math64.h> | 
 | #include <linux/string.h> /* memcpy, memset */ | 
 |  | 
 | /* ************************************************************** | 
 | *  Error Management | 
 | ****************************************************************/ | 
 | #define FSE_STATIC_ASSERT(c)                                   \ | 
 | 	{                                                      \ | 
 | 		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ | 
 | 	} /* use only *after* variable declarations */ | 
 |  | 
 | /* ************************************************************** | 
 | *  Templates | 
 | ****************************************************************/ | 
 | /* | 
 |   designed to be included | 
 |   for type-specific functions (template emulation in C) | 
 |   Objective is to write these functions only once, for improved maintenance | 
 | */ | 
 |  | 
 | /* safety checks */ | 
 | #ifndef FSE_FUNCTION_EXTENSION | 
 | #error "FSE_FUNCTION_EXTENSION must be defined" | 
 | #endif | 
 | #ifndef FSE_FUNCTION_TYPE | 
 | #error "FSE_FUNCTION_TYPE must be defined" | 
 | #endif | 
 |  | 
 | /* Function names */ | 
 | #define FSE_CAT(X, Y) X##Y | 
 | #define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) | 
 | #define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) | 
 |  | 
 | /* Function templates */ | 
 |  | 
 | /* FSE_buildCTable_wksp() : | 
 |  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). | 
 |  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` | 
 |  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements | 
 |  */ | 
 | size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) | 
 | { | 
 | 	U32 const tableSize = 1 << tableLog; | 
 | 	U32 const tableMask = tableSize - 1; | 
 | 	void *const ptr = ct; | 
 | 	U16 *const tableU16 = ((U16 *)ptr) + 2; | 
 | 	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1); | 
 | 	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); | 
 | 	U32 const step = FSE_TABLESTEP(tableSize); | 
 | 	U32 highThreshold = tableSize - 1; | 
 |  | 
 | 	U32 *cumul; | 
 | 	FSE_FUNCTION_TYPE *tableSymbol; | 
 | 	size_t spaceUsed32 = 0; | 
 |  | 
 | 	cumul = (U32 *)workspace + spaceUsed32; | 
 | 	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2; | 
 | 	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32); | 
 | 	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2; | 
 |  | 
 | 	if ((spaceUsed32 << 2) > workspaceSize) | 
 | 		return ERROR(tableLog_tooLarge); | 
 | 	workspace = (U32 *)workspace + spaceUsed32; | 
 | 	workspaceSize -= (spaceUsed32 << 2); | 
 |  | 
 | 	/* CTable header */ | 
 | 	tableU16[-2] = (U16)tableLog; | 
 | 	tableU16[-1] = (U16)maxSymbolValue; | 
 |  | 
 | 	/* For explanations on how to distribute symbol values over the table : | 
 | 	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ | 
 |  | 
 | 	/* symbol start positions */ | 
 | 	{ | 
 | 		U32 u; | 
 | 		cumul[0] = 0; | 
 | 		for (u = 1; u <= maxSymbolValue + 1; u++) { | 
 | 			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */ | 
 | 				cumul[u] = cumul[u - 1] + 1; | 
 | 				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1); | 
 | 			} else { | 
 | 				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1]; | 
 | 			} | 
 | 		} | 
 | 		cumul[maxSymbolValue + 1] = tableSize + 1; | 
 | 	} | 
 |  | 
 | 	/* Spread symbols */ | 
 | 	{ | 
 | 		U32 position = 0; | 
 | 		U32 symbol; | 
 | 		for (symbol = 0; symbol <= maxSymbolValue; symbol++) { | 
 | 			int nbOccurences; | 
 | 			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) { | 
 | 				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; | 
 | 				position = (position + step) & tableMask; | 
 | 				while (position > highThreshold) | 
 | 					position = (position + step) & tableMask; /* Low proba area */ | 
 | 			} | 
 | 		} | 
 |  | 
 | 		if (position != 0) | 
 | 			return ERROR(GENERIC); /* Must have gone through all positions */ | 
 | 	} | 
 |  | 
 | 	/* Build table */ | 
 | 	{ | 
 | 		U32 u; | 
 | 		for (u = 0; u < tableSize; u++) { | 
 | 			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */ | 
 | 			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */ | 
 | 		} | 
 | 	} | 
 |  | 
 | 	/* Build Symbol Transformation Table */ | 
 | 	{ | 
 | 		unsigned total = 0; | 
 | 		unsigned s; | 
 | 		for (s = 0; s <= maxSymbolValue; s++) { | 
 | 			switch (normalizedCounter[s]) { | 
 | 			case 0: break; | 
 |  | 
 | 			case -1: | 
 | 			case 1: | 
 | 				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog); | 
 | 				symbolTT[s].deltaFindState = total - 1; | 
 | 				total++; | 
 | 				break; | 
 | 			default: { | 
 | 				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1); | 
 | 				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; | 
 | 				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; | 
 | 				symbolTT[s].deltaFindState = total - normalizedCounter[s]; | 
 | 				total += normalizedCounter[s]; | 
 | 			} | 
 | 			} | 
 | 		} | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /*-************************************************************** | 
 | *  FSE NCount encoding-decoding | 
 | ****************************************************************/ | 
 | size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) | 
 | { | 
 | 	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3; | 
 | 	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ | 
 | } | 
 |  | 
 | static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, | 
 | 				      unsigned writeIsSafe) | 
 | { | 
 | 	BYTE *const ostart = (BYTE *)header; | 
 | 	BYTE *out = ostart; | 
 | 	BYTE *const oend = ostart + headerBufferSize; | 
 | 	int nbBits; | 
 | 	const int tableSize = 1 << tableLog; | 
 | 	int remaining; | 
 | 	int threshold; | 
 | 	U32 bitStream; | 
 | 	int bitCount; | 
 | 	unsigned charnum = 0; | 
 | 	int previous0 = 0; | 
 |  | 
 | 	bitStream = 0; | 
 | 	bitCount = 0; | 
 | 	/* Table Size */ | 
 | 	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount; | 
 | 	bitCount += 4; | 
 |  | 
 | 	/* Init */ | 
 | 	remaining = tableSize + 1; /* +1 for extra accuracy */ | 
 | 	threshold = tableSize; | 
 | 	nbBits = tableLog + 1; | 
 |  | 
 | 	while (remaining > 1) { /* stops at 1 */ | 
 | 		if (previous0) { | 
 | 			unsigned start = charnum; | 
 | 			while (!normalizedCounter[charnum]) | 
 | 				charnum++; | 
 | 			while (charnum >= start + 24) { | 
 | 				start += 24; | 
 | 				bitStream += 0xFFFFU << bitCount; | 
 | 				if ((!writeIsSafe) && (out > oend - 2)) | 
 | 					return ERROR(dstSize_tooSmall); /* Buffer overflow */ | 
 | 				out[0] = (BYTE)bitStream; | 
 | 				out[1] = (BYTE)(bitStream >> 8); | 
 | 				out += 2; | 
 | 				bitStream >>= 16; | 
 | 			} | 
 | 			while (charnum >= start + 3) { | 
 | 				start += 3; | 
 | 				bitStream += 3 << bitCount; | 
 | 				bitCount += 2; | 
 | 			} | 
 | 			bitStream += (charnum - start) << bitCount; | 
 | 			bitCount += 2; | 
 | 			if (bitCount > 16) { | 
 | 				if ((!writeIsSafe) && (out > oend - 2)) | 
 | 					return ERROR(dstSize_tooSmall); /* Buffer overflow */ | 
 | 				out[0] = (BYTE)bitStream; | 
 | 				out[1] = (BYTE)(bitStream >> 8); | 
 | 				out += 2; | 
 | 				bitStream >>= 16; | 
 | 				bitCount -= 16; | 
 | 			} | 
 | 		} | 
 | 		{ | 
 | 			int count = normalizedCounter[charnum++]; | 
 | 			int const max = (2 * threshold - 1) - remaining; | 
 | 			remaining -= count < 0 ? -count : count; | 
 | 			count++; /* +1 for extra accuracy */ | 
 | 			if (count >= threshold) | 
 | 				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ | 
 | 			bitStream += count << bitCount; | 
 | 			bitCount += nbBits; | 
 | 			bitCount -= (count < max); | 
 | 			previous0 = (count == 1); | 
 | 			if (remaining < 1) | 
 | 				return ERROR(GENERIC); | 
 | 			while (remaining < threshold) | 
 | 				nbBits--, threshold >>= 1; | 
 | 		} | 
 | 		if (bitCount > 16) { | 
 | 			if ((!writeIsSafe) && (out > oend - 2)) | 
 | 				return ERROR(dstSize_tooSmall); /* Buffer overflow */ | 
 | 			out[0] = (BYTE)bitStream; | 
 | 			out[1] = (BYTE)(bitStream >> 8); | 
 | 			out += 2; | 
 | 			bitStream >>= 16; | 
 | 			bitCount -= 16; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	/* flush remaining bitStream */ | 
 | 	if ((!writeIsSafe) && (out > oend - 2)) | 
 | 		return ERROR(dstSize_tooSmall); /* Buffer overflow */ | 
 | 	out[0] = (BYTE)bitStream; | 
 | 	out[1] = (BYTE)(bitStream >> 8); | 
 | 	out += (bitCount + 7) / 8; | 
 |  | 
 | 	if (charnum > maxSymbolValue + 1) | 
 | 		return ERROR(GENERIC); | 
 |  | 
 | 	return (out - ostart); | 
 | } | 
 |  | 
 | size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | 
 | { | 
 | 	if (tableLog > FSE_MAX_TABLELOG) | 
 | 		return ERROR(tableLog_tooLarge); /* Unsupported */ | 
 | 	if (tableLog < FSE_MIN_TABLELOG) | 
 | 		return ERROR(GENERIC); /* Unsupported */ | 
 |  | 
 | 	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) | 
 | 		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); | 
 |  | 
 | 	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); | 
 | } | 
 |  | 
 | /*-************************************************************** | 
 | *  Counting histogram | 
 | ****************************************************************/ | 
 | /*! FSE_count_simple | 
 | 	This function counts byte values within `src`, and store the histogram into table `count`. | 
 | 	It doesn't use any additional memory. | 
 | 	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. | 
 | 	For this reason, prefer using a table `count` with 256 elements. | 
 | 	@return : count of most numerous element | 
 | */ | 
 | size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize) | 
 | { | 
 | 	const BYTE *ip = (const BYTE *)src; | 
 | 	const BYTE *const end = ip + srcSize; | 
 | 	unsigned maxSymbolValue = *maxSymbolValuePtr; | 
 | 	unsigned max = 0; | 
 |  | 
 | 	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count)); | 
 | 	if (srcSize == 0) { | 
 | 		*maxSymbolValuePtr = 0; | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	while (ip < end) | 
 | 		count[*ip++]++; | 
 |  | 
 | 	while (!count[maxSymbolValue]) | 
 | 		maxSymbolValue--; | 
 | 	*maxSymbolValuePtr = maxSymbolValue; | 
 |  | 
 | 	{ | 
 | 		U32 s; | 
 | 		for (s = 0; s <= maxSymbolValue; s++) | 
 | 			if (count[s] > max) | 
 | 				max = count[s]; | 
 | 	} | 
 |  | 
 | 	return (size_t)max; | 
 | } | 
 |  | 
 | /* FSE_count_parallel_wksp() : | 
 |  * Same as FSE_count_parallel(), but using an externally provided scratch buffer. | 
 |  * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ | 
 | static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax, | 
 | 				      unsigned *const workSpace) | 
 | { | 
 | 	const BYTE *ip = (const BYTE *)source; | 
 | 	const BYTE *const iend = ip + sourceSize; | 
 | 	unsigned maxSymbolValue = *maxSymbolValuePtr; | 
 | 	unsigned max = 0; | 
 | 	U32 *const Counting1 = workSpace; | 
 | 	U32 *const Counting2 = Counting1 + 256; | 
 | 	U32 *const Counting3 = Counting2 + 256; | 
 | 	U32 *const Counting4 = Counting3 + 256; | 
 |  | 
 | 	memset(Counting1, 0, 4 * 256 * sizeof(unsigned)); | 
 |  | 
 | 	/* safety checks */ | 
 | 	if (!sourceSize) { | 
 | 		memset(count, 0, maxSymbolValue + 1); | 
 | 		*maxSymbolValuePtr = 0; | 
 | 		return 0; | 
 | 	} | 
 | 	if (!maxSymbolValue) | 
 | 		maxSymbolValue = 255; /* 0 == default */ | 
 |  | 
 | 	/* by stripes of 16 bytes */ | 
 | 	{ | 
 | 		U32 cached = ZSTD_read32(ip); | 
 | 		ip += 4; | 
 | 		while (ip < iend - 15) { | 
 | 			U32 c = cached; | 
 | 			cached = ZSTD_read32(ip); | 
 | 			ip += 4; | 
 | 			Counting1[(BYTE)c]++; | 
 | 			Counting2[(BYTE)(c >> 8)]++; | 
 | 			Counting3[(BYTE)(c >> 16)]++; | 
 | 			Counting4[c >> 24]++; | 
 | 			c = cached; | 
 | 			cached = ZSTD_read32(ip); | 
 | 			ip += 4; | 
 | 			Counting1[(BYTE)c]++; | 
 | 			Counting2[(BYTE)(c >> 8)]++; | 
 | 			Counting3[(BYTE)(c >> 16)]++; | 
 | 			Counting4[c >> 24]++; | 
 | 			c = cached; | 
 | 			cached = ZSTD_read32(ip); | 
 | 			ip += 4; | 
 | 			Counting1[(BYTE)c]++; | 
 | 			Counting2[(BYTE)(c >> 8)]++; | 
 | 			Counting3[(BYTE)(c >> 16)]++; | 
 | 			Counting4[c >> 24]++; | 
 | 			c = cached; | 
 | 			cached = ZSTD_read32(ip); | 
 | 			ip += 4; | 
 | 			Counting1[(BYTE)c]++; | 
 | 			Counting2[(BYTE)(c >> 8)]++; | 
 | 			Counting3[(BYTE)(c >> 16)]++; | 
 | 			Counting4[c >> 24]++; | 
 | 		} | 
 | 		ip -= 4; | 
 | 	} | 
 |  | 
 | 	/* finish last symbols */ | 
 | 	while (ip < iend) | 
 | 		Counting1[*ip++]++; | 
 |  | 
 | 	if (checkMax) { /* verify stats will fit into destination table */ | 
 | 		U32 s; | 
 | 		for (s = 255; s > maxSymbolValue; s--) { | 
 | 			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; | 
 | 			if (Counting1[s]) | 
 | 				return ERROR(maxSymbolValue_tooSmall); | 
 | 		} | 
 | 	} | 
 |  | 
 | 	{ | 
 | 		U32 s; | 
 | 		for (s = 0; s <= maxSymbolValue; s++) { | 
 | 			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | 
 | 			if (count[s] > max) | 
 | 				max = count[s]; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	while (!count[maxSymbolValue]) | 
 | 		maxSymbolValue--; | 
 | 	*maxSymbolValuePtr = maxSymbolValue; | 
 | 	return (size_t)max; | 
 | } | 
 |  | 
 | /* FSE_countFast_wksp() : | 
 |  * Same as FSE_countFast(), but using an externally provided scratch buffer. | 
 |  * `workSpace` size must be table of >= `1024` unsigned */ | 
 | size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) | 
 | { | 
 | 	if (sourceSize < 1500) | 
 | 		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); | 
 | 	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); | 
 | } | 
 |  | 
 | /* FSE_count_wksp() : | 
 |  * Same as FSE_count(), but using an externally provided scratch buffer. | 
 |  * `workSpace` size must be table of >= `1024` unsigned */ | 
 | size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) | 
 | { | 
 | 	if (*maxSymbolValuePtr < 255) | 
 | 		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); | 
 | 	*maxSymbolValuePtr = 255; | 
 | 	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); | 
 | } | 
 |  | 
 | /*-************************************************************** | 
 | *  FSE Compression Code | 
 | ****************************************************************/ | 
 | /*! FSE_sizeof_CTable() : | 
 | 	FSE_CTable is a variable size structure which contains : | 
 | 	`U16 tableLog;` | 
 | 	`U16 maxSymbolValue;` | 
 | 	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable | 
 | 	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable | 
 | Allocation is manual (C standard does not support variable-size structures). | 
 | */ | 
 | size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog) | 
 | { | 
 | 	if (tableLog > FSE_MAX_TABLELOG) | 
 | 		return ERROR(tableLog_tooLarge); | 
 | 	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32); | 
 | } | 
 |  | 
 | /* provides the minimum logSize to safely represent a distribution */ | 
 | static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) | 
 | { | 
 | 	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; | 
 | 	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; | 
 | 	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; | 
 | 	return minBits; | 
 | } | 
 |  | 
 | unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) | 
 | { | 
 | 	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; | 
 | 	U32 tableLog = maxTableLog; | 
 | 	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); | 
 | 	if (tableLog == 0) | 
 | 		tableLog = FSE_DEFAULT_TABLELOG; | 
 | 	if (maxBitsSrc < tableLog) | 
 | 		tableLog = maxBitsSrc; /* Accuracy can be reduced */ | 
 | 	if (minBits > tableLog) | 
 | 		tableLog = minBits; /* Need a minimum to safely represent all symbol values */ | 
 | 	if (tableLog < FSE_MIN_TABLELOG) | 
 | 		tableLog = FSE_MIN_TABLELOG; | 
 | 	if (tableLog > FSE_MAX_TABLELOG) | 
 | 		tableLog = FSE_MAX_TABLELOG; | 
 | 	return tableLog; | 
 | } | 
 |  | 
 | unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) | 
 | { | 
 | 	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); | 
 | } | 
 |  | 
 | /* Secondary normalization method. | 
 |    To be used when primary method fails. */ | 
 |  | 
 | static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue) | 
 | { | 
 | 	short const NOT_YET_ASSIGNED = -2; | 
 | 	U32 s; | 
 | 	U32 distributed = 0; | 
 | 	U32 ToDistribute; | 
 |  | 
 | 	/* Init */ | 
 | 	U32 const lowThreshold = (U32)(total >> tableLog); | 
 | 	U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); | 
 |  | 
 | 	for (s = 0; s <= maxSymbolValue; s++) { | 
 | 		if (count[s] == 0) { | 
 | 			norm[s] = 0; | 
 | 			continue; | 
 | 		} | 
 | 		if (count[s] <= lowThreshold) { | 
 | 			norm[s] = -1; | 
 | 			distributed++; | 
 | 			total -= count[s]; | 
 | 			continue; | 
 | 		} | 
 | 		if (count[s] <= lowOne) { | 
 | 			norm[s] = 1; | 
 | 			distributed++; | 
 | 			total -= count[s]; | 
 | 			continue; | 
 | 		} | 
 |  | 
 | 		norm[s] = NOT_YET_ASSIGNED; | 
 | 	} | 
 | 	ToDistribute = (1 << tableLog) - distributed; | 
 |  | 
 | 	if ((total / ToDistribute) > lowOne) { | 
 | 		/* risk of rounding to zero */ | 
 | 		lowOne = (U32)((total * 3) / (ToDistribute * 2)); | 
 | 		for (s = 0; s <= maxSymbolValue; s++) { | 
 | 			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { | 
 | 				norm[s] = 1; | 
 | 				distributed++; | 
 | 				total -= count[s]; | 
 | 				continue; | 
 | 			} | 
 | 		} | 
 | 		ToDistribute = (1 << tableLog) - distributed; | 
 | 	} | 
 |  | 
 | 	if (distributed == maxSymbolValue + 1) { | 
 | 		/* all values are pretty poor; | 
 | 		   probably incompressible data (should have already been detected); | 
 | 		   find max, then give all remaining points to max */ | 
 | 		U32 maxV = 0, maxC = 0; | 
 | 		for (s = 0; s <= maxSymbolValue; s++) | 
 | 			if (count[s] > maxC) | 
 | 				maxV = s, maxC = count[s]; | 
 | 		norm[maxV] += (short)ToDistribute; | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	if (total == 0) { | 
 | 		/* all of the symbols were low enough for the lowOne or lowThreshold */ | 
 | 		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1)) | 
 | 			if (norm[s] > 0) | 
 | 				ToDistribute--, norm[s]++; | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	{ | 
 | 		U64 const vStepLog = 62 - tableLog; | 
 | 		U64 const mid = (1ULL << (vStepLog - 1)) - 1; | 
 | 		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ | 
 | 		U64 tmpTotal = mid; | 
 | 		for (s = 0; s <= maxSymbolValue; s++) { | 
 | 			if (norm[s] == NOT_YET_ASSIGNED) { | 
 | 				U64 const end = tmpTotal + (count[s] * rStep); | 
 | 				U32 const sStart = (U32)(tmpTotal >> vStepLog); | 
 | 				U32 const sEnd = (U32)(end >> vStepLog); | 
 | 				U32 const weight = sEnd - sStart; | 
 | 				if (weight < 1) | 
 | 					return ERROR(GENERIC); | 
 | 				norm[s] = (short)weight; | 
 | 				tmpTotal = end; | 
 | 			} | 
 | 		} | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue) | 
 | { | 
 | 	/* Sanity checks */ | 
 | 	if (tableLog == 0) | 
 | 		tableLog = FSE_DEFAULT_TABLELOG; | 
 | 	if (tableLog < FSE_MIN_TABLELOG) | 
 | 		return ERROR(GENERIC); /* Unsupported size */ | 
 | 	if (tableLog > FSE_MAX_TABLELOG) | 
 | 		return ERROR(tableLog_tooLarge); /* Unsupported size */ | 
 | 	if (tableLog < FSE_minTableLog(total, maxSymbolValue)) | 
 | 		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ | 
 |  | 
 | 	{ | 
 | 		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}; | 
 | 		U64 const scale = 62 - tableLog; | 
 | 		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */ | 
 | 		U64 const vStep = 1ULL << (scale - 20); | 
 | 		int stillToDistribute = 1 << tableLog; | 
 | 		unsigned s; | 
 | 		unsigned largest = 0; | 
 | 		short largestP = 0; | 
 | 		U32 lowThreshold = (U32)(total >> tableLog); | 
 |  | 
 | 		for (s = 0; s <= maxSymbolValue; s++) { | 
 | 			if (count[s] == total) | 
 | 				return 0; /* rle special case */ | 
 | 			if (count[s] == 0) { | 
 | 				normalizedCounter[s] = 0; | 
 | 				continue; | 
 | 			} | 
 | 			if (count[s] <= lowThreshold) { | 
 | 				normalizedCounter[s] = -1; | 
 | 				stillToDistribute--; | 
 | 			} else { | 
 | 				short proba = (short)((count[s] * step) >> scale); | 
 | 				if (proba < 8) { | 
 | 					U64 restToBeat = vStep * rtbTable[proba]; | 
 | 					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat; | 
 | 				} | 
 | 				if (proba > largestP) | 
 | 					largestP = proba, largest = s; | 
 | 				normalizedCounter[s] = proba; | 
 | 				stillToDistribute -= proba; | 
 | 			} | 
 | 		} | 
 | 		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { | 
 | 			/* corner case, need another normalization method */ | 
 | 			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); | 
 | 			if (FSE_isError(errorCode)) | 
 | 				return errorCode; | 
 | 		} else | 
 | 			normalizedCounter[largest] += (short)stillToDistribute; | 
 | 	} | 
 |  | 
 | 	return tableLog; | 
 | } | 
 |  | 
 | /* fake FSE_CTable, for raw (uncompressed) input */ | 
 | size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits) | 
 | { | 
 | 	const unsigned tableSize = 1 << nbBits; | 
 | 	const unsigned tableMask = tableSize - 1; | 
 | 	const unsigned maxSymbolValue = tableMask; | 
 | 	void *const ptr = ct; | 
 | 	U16 *const tableU16 = ((U16 *)ptr) + 2; | 
 | 	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */ | 
 | 	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); | 
 | 	unsigned s; | 
 |  | 
 | 	/* Sanity checks */ | 
 | 	if (nbBits < 1) | 
 | 		return ERROR(GENERIC); /* min size */ | 
 |  | 
 | 	/* header */ | 
 | 	tableU16[-2] = (U16)nbBits; | 
 | 	tableU16[-1] = (U16)maxSymbolValue; | 
 |  | 
 | 	/* Build table */ | 
 | 	for (s = 0; s < tableSize; s++) | 
 | 		tableU16[s] = (U16)(tableSize + s); | 
 |  | 
 | 	/* Build Symbol Transformation Table */ | 
 | 	{ | 
 | 		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); | 
 | 		for (s = 0; s <= maxSymbolValue; s++) { | 
 | 			symbolTT[s].deltaNbBits = deltaNbBits; | 
 | 			symbolTT[s].deltaFindState = s - 1; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* fake FSE_CTable, for rle input (always same symbol) */ | 
 | size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue) | 
 | { | 
 | 	void *ptr = ct; | 
 | 	U16 *tableU16 = ((U16 *)ptr) + 2; | 
 | 	void *FSCTptr = (U32 *)ptr + 2; | 
 | 	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr; | 
 |  | 
 | 	/* header */ | 
 | 	tableU16[-2] = (U16)0; | 
 | 	tableU16[-1] = (U16)symbolValue; | 
 |  | 
 | 	/* Build table */ | 
 | 	tableU16[0] = 0; | 
 | 	tableU16[1] = 0; /* just in case */ | 
 |  | 
 | 	/* Build Symbol Transformation Table */ | 
 | 	symbolTT[symbolValue].deltaNbBits = 0; | 
 | 	symbolTT[symbolValue].deltaFindState = 0; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast) | 
 | { | 
 | 	const BYTE *const istart = (const BYTE *)src; | 
 | 	const BYTE *const iend = istart + srcSize; | 
 | 	const BYTE *ip = iend; | 
 |  | 
 | 	BIT_CStream_t bitC; | 
 | 	FSE_CState_t CState1, CState2; | 
 |  | 
 | 	/* init */ | 
 | 	if (srcSize <= 2) | 
 | 		return 0; | 
 | 	{ | 
 | 		size_t const initError = BIT_initCStream(&bitC, dst, dstSize); | 
 | 		if (FSE_isError(initError)) | 
 | 			return 0; /* not enough space available to write a bitstream */ | 
 | 	} | 
 |  | 
 | #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) | 
 |  | 
 | 	if (srcSize & 1) { | 
 | 		FSE_initCState2(&CState1, ct, *--ip); | 
 | 		FSE_initCState2(&CState2, ct, *--ip); | 
 | 		FSE_encodeSymbol(&bitC, &CState1, *--ip); | 
 | 		FSE_FLUSHBITS(&bitC); | 
 | 	} else { | 
 | 		FSE_initCState2(&CState2, ct, *--ip); | 
 | 		FSE_initCState2(&CState1, ct, *--ip); | 
 | 	} | 
 |  | 
 | 	/* join to mod 4 */ | 
 | 	srcSize -= 2; | 
 | 	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */ | 
 | 		FSE_encodeSymbol(&bitC, &CState2, *--ip); | 
 | 		FSE_encodeSymbol(&bitC, &CState1, *--ip); | 
 | 		FSE_FLUSHBITS(&bitC); | 
 | 	} | 
 |  | 
 | 	/* 2 or 4 encoding per loop */ | 
 | 	while (ip > istart) { | 
 |  | 
 | 		FSE_encodeSymbol(&bitC, &CState2, *--ip); | 
 |  | 
 | 		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */ | 
 | 			FSE_FLUSHBITS(&bitC); | 
 |  | 
 | 		FSE_encodeSymbol(&bitC, &CState1, *--ip); | 
 |  | 
 | 		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */ | 
 | 			FSE_encodeSymbol(&bitC, &CState2, *--ip); | 
 | 			FSE_encodeSymbol(&bitC, &CState1, *--ip); | 
 | 		} | 
 |  | 
 | 		FSE_FLUSHBITS(&bitC); | 
 | 	} | 
 |  | 
 | 	FSE_flushCState(&bitC, &CState2); | 
 | 	FSE_flushCState(&bitC, &CState1); | 
 | 	return BIT_closeCStream(&bitC); | 
 | } | 
 |  | 
 | size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct) | 
 | { | 
 | 	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); | 
 |  | 
 | 	if (fast) | 
 | 		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); | 
 | 	else | 
 | 		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); | 
 | } | 
 |  | 
 | size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |