| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 
|  | 2 |  | 
|  | 3 | #ifndef __842_H__ | 
|  | 4 | #define __842_H__ | 
|  | 5 |  | 
|  | 6 | /* The 842 compressed format is made up of multiple blocks, each of | 
|  | 7 | * which have the format: | 
|  | 8 | * | 
|  | 9 | * <template>[arg1][arg2][arg3][arg4] | 
|  | 10 | * | 
|  | 11 | * where there are between 0 and 4 template args, depending on the specific | 
|  | 12 | * template operation.  For normal operations, each arg is either a specific | 
|  | 13 | * number of data bytes to add to the output buffer, or an index pointing | 
|  | 14 | * to a previously-written number of data bytes to copy to the output buffer. | 
|  | 15 | * | 
|  | 16 | * The template code is a 5-bit value.  This code indicates what to do with | 
|  | 17 | * the following data.  Template codes from 0 to 0x19 should use the template | 
|  | 18 | * table, the static "decomp_ops" table used in decompress.  For each template | 
|  | 19 | * (table row), there are between 1 and 4 actions; each action corresponds to | 
|  | 20 | * an arg following the template code bits.  Each action is either a "data" | 
|  | 21 | * type action, or a "index" type action, and each action results in 2, 4, or 8 | 
|  | 22 | * bytes being written to the output buffer.  Each template (i.e. all actions | 
|  | 23 | * in the table row) will add up to 8 bytes being written to the output buffer. | 
|  | 24 | * Any row with less than 4 actions is padded with noop actions, indicated by | 
|  | 25 | * N0 (for which there is no corresponding arg in the compressed data buffer). | 
|  | 26 | * | 
|  | 27 | * "Data" actions, indicated in the table by D2, D4, and D8, mean that the | 
|  | 28 | * corresponding arg is 2, 4, or 8 bytes, respectively, in the compressed data | 
|  | 29 | * buffer should be copied directly to the output buffer. | 
|  | 30 | * | 
|  | 31 | * "Index" actions, indicated in the table by I2, I4, and I8, mean the | 
|  | 32 | * corresponding arg is an index parameter that points to, respectively, a 2, | 
|  | 33 | * 4, or 8 byte value already in the output buffer, that should be copied to | 
|  | 34 | * the end of the output buffer.  Essentially, the index points to a position | 
|  | 35 | * in a ring buffer that contains the last N bytes of output buffer data. | 
|  | 36 | * The number of bits for each index's arg are: 8 bits for I2, 9 bits for I4, | 
|  | 37 | * and 8 bits for I8.  Since each index points to a 2, 4, or 8 byte section, | 
|  | 38 | * this means that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4 | 
|  | 39 | * can reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference 2048 | 
|  | 40 | * bytes ((2^8 = 256) * 8 bytes).  Think of it as a kind-of ring buffer for | 
|  | 41 | * each of I2, I4, and I8 that are updated for each byte written to the output | 
|  | 42 | * buffer.  In this implementation, the output buffer is directly used for each | 
|  | 43 | * index; there is no additional memory required.  Note that the index is into | 
|  | 44 | * a ring buffer, not a sliding window; for example, if there have been 260 | 
|  | 45 | * bytes written to the output buffer, an I2 index of 0 would index to byte 256 | 
|  | 46 | * in the output buffer, while an I2 index of 16 would index to byte 16 in the | 
|  | 47 | * output buffer. | 
|  | 48 | * | 
|  | 49 | * There are also 3 special template codes; 0x1b for "repeat", 0x1c for | 
|  | 50 | * "zeros", and 0x1e for "end".  The "repeat" operation is followed by a 6 bit | 
|  | 51 | * arg N indicating how many times to repeat.  The last 8 bytes written to the | 
|  | 52 | * output buffer are written again to the output buffer, N + 1 times.  The | 
|  | 53 | * "zeros" operation, which has no arg bits, writes 8 zeros to the output | 
|  | 54 | * buffer.  The "end" operation, which also has no arg bits, signals the end | 
|  | 55 | * of the compressed data.  There may be some number of padding (don't care, | 
|  | 56 | * but usually 0) bits after the "end" operation bits, to fill the buffer | 
|  | 57 | * length to a specific byte multiple (usually a multiple of 8, 16, or 32 | 
|  | 58 | * bytes). | 
|  | 59 | * | 
|  | 60 | * This software implementation also uses one of the undefined template values, | 
|  | 61 | * 0x1d as a special "short data" template code, to represent less than 8 bytes | 
|  | 62 | * of uncompressed data.  It is followed by a 3 bit arg N indicating how many | 
|  | 63 | * data bytes will follow, and then N bytes of data, which should be copied to | 
|  | 64 | * the output buffer.  This allows the software 842 compressor to accept input | 
|  | 65 | * buffers that are not an exact multiple of 8 bytes long.  However, those | 
|  | 66 | * compressed buffers containing this sw-only template will be rejected by | 
|  | 67 | * the 842 hardware decompressor, and must be decompressed with this software | 
|  | 68 | * library.  The 842 software compression module includes a parameter to | 
|  | 69 | * disable using this sw-only "short data" template, and instead simply | 
|  | 70 | * reject any input buffer that is not a multiple of 8 bytes long. | 
|  | 71 | * | 
|  | 72 | * After all actions for each operation code are processed, another template | 
|  | 73 | * code is in the next 5 bits.  The decompression ends once the "end" template | 
|  | 74 | * code is detected. | 
|  | 75 | */ | 
|  | 76 |  | 
|  | 77 | #include <linux/module.h> | 
|  | 78 | #include <linux/kernel.h> | 
|  | 79 | #include <linux/bitops.h> | 
|  | 80 | #include <linux/crc32.h> | 
|  | 81 | #include <asm/unaligned.h> | 
|  | 82 |  | 
|  | 83 | #include <linux/sw842.h> | 
|  | 84 |  | 
|  | 85 | /* special templates */ | 
|  | 86 | #define OP_REPEAT	(0x1B) | 
|  | 87 | #define OP_ZEROS	(0x1C) | 
|  | 88 | #define OP_END		(0x1E) | 
|  | 89 |  | 
|  | 90 | /* sw only template - this is not in the hw design; it's used only by this | 
|  | 91 | * software compressor and decompressor, to allow input buffers that aren't | 
|  | 92 | * a multiple of 8. | 
|  | 93 | */ | 
|  | 94 | #define OP_SHORT_DATA	(0x1D) | 
|  | 95 |  | 
|  | 96 | /* additional bits of each op param */ | 
|  | 97 | #define OP_BITS		(5) | 
|  | 98 | #define REPEAT_BITS	(6) | 
|  | 99 | #define SHORT_DATA_BITS	(3) | 
|  | 100 | #define I2_BITS		(8) | 
|  | 101 | #define I4_BITS		(9) | 
|  | 102 | #define I8_BITS		(8) | 
|  | 103 | #define CRC_BITS	(32) | 
|  | 104 |  | 
|  | 105 | #define REPEAT_BITS_MAX		(0x3f) | 
|  | 106 | #define SHORT_DATA_BITS_MAX	(0x7) | 
|  | 107 |  | 
|  | 108 | /* Arbitrary values used to indicate action */ | 
|  | 109 | #define OP_ACTION	(0x70) | 
|  | 110 | #define OP_ACTION_INDEX	(0x10) | 
|  | 111 | #define OP_ACTION_DATA	(0x20) | 
|  | 112 | #define OP_ACTION_NOOP	(0x40) | 
|  | 113 | #define OP_AMOUNT	(0x0f) | 
|  | 114 | #define OP_AMOUNT_0	(0x00) | 
|  | 115 | #define OP_AMOUNT_2	(0x02) | 
|  | 116 | #define OP_AMOUNT_4	(0x04) | 
|  | 117 | #define OP_AMOUNT_8	(0x08) | 
|  | 118 |  | 
|  | 119 | #define D2		(OP_ACTION_DATA  | OP_AMOUNT_2) | 
|  | 120 | #define D4		(OP_ACTION_DATA  | OP_AMOUNT_4) | 
|  | 121 | #define D8		(OP_ACTION_DATA  | OP_AMOUNT_8) | 
|  | 122 | #define I2		(OP_ACTION_INDEX | OP_AMOUNT_2) | 
|  | 123 | #define I4		(OP_ACTION_INDEX | OP_AMOUNT_4) | 
|  | 124 | #define I8		(OP_ACTION_INDEX | OP_AMOUNT_8) | 
|  | 125 | #define N0		(OP_ACTION_NOOP  | OP_AMOUNT_0) | 
|  | 126 |  | 
|  | 127 | /* the max of the regular templates - not including the special templates */ | 
|  | 128 | #define OPS_MAX		(0x1a) | 
|  | 129 |  | 
|  | 130 | #endif |