| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * 842 Software Compression | 
|  | 3 | * | 
|  | 4 | * Copyright (C) 2015 Dan Streetman, IBM Corp | 
|  | 5 | * | 
|  | 6 | * This program is free software; you can redistribute it and/or modify | 
|  | 7 | * it under the terms of the GNU General Public License as published by | 
|  | 8 | * the Free Software Foundation; either version 2 of the License, or | 
|  | 9 | * (at your option) any later version. | 
|  | 10 | * | 
|  | 11 | * This program is distributed in the hope that it will be useful, | 
|  | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 14 | * GNU General Public License for more details. | 
|  | 15 | * | 
|  | 16 | * See 842.h for details of the 842 compressed format. | 
|  | 17 | */ | 
|  | 18 |  | 
|  | 19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 
|  | 20 | #define MODULE_NAME "842_compress" | 
|  | 21 |  | 
|  | 22 | #include <linux/hashtable.h> | 
|  | 23 |  | 
|  | 24 | #include "842.h" | 
|  | 25 | #include "842_debugfs.h" | 
|  | 26 |  | 
|  | 27 | #define SW842_HASHTABLE8_BITS	(10) | 
|  | 28 | #define SW842_HASHTABLE4_BITS	(11) | 
|  | 29 | #define SW842_HASHTABLE2_BITS	(10) | 
|  | 30 |  | 
|  | 31 | /* By default, we allow compressing input buffers of any length, but we must | 
|  | 32 | * use the non-standard "short data" template so the decompressor can correctly | 
|  | 33 | * reproduce the uncompressed data buffer at the right length.  However the | 
|  | 34 | * hardware 842 compressor will not recognize the "short data" template, and | 
|  | 35 | * will fail to decompress any compressed buffer containing it (I have no idea | 
|  | 36 | * why anyone would want to use software to compress and hardware to decompress | 
|  | 37 | * but that's beside the point).  This parameter forces the compression | 
|  | 38 | * function to simply reject any input buffer that isn't a multiple of 8 bytes | 
|  | 39 | * long, instead of using the "short data" template, so that all compressed | 
|  | 40 | * buffers produced by this function will be decompressable by the 842 hardware | 
|  | 41 | * decompressor.  Unless you have a specific need for that, leave this disabled | 
|  | 42 | * so that any length buffer can be compressed. | 
|  | 43 | */ | 
|  | 44 | static bool sw842_strict; | 
|  | 45 | module_param_named(strict, sw842_strict, bool, 0644); | 
|  | 46 |  | 
|  | 47 | static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */ | 
|  | 48 | { I8, N0, N0, N0, 0x19 }, /* 8 */ | 
|  | 49 | { I4, I4, N0, N0, 0x18 }, /* 18 */ | 
|  | 50 | { I4, I2, I2, N0, 0x17 }, /* 25 */ | 
|  | 51 | { I2, I2, I4, N0, 0x13 }, /* 25 */ | 
|  | 52 | { I2, I2, I2, I2, 0x12 }, /* 32 */ | 
|  | 53 | { I4, I2, D2, N0, 0x16 }, /* 33 */ | 
|  | 54 | { I4, D2, I2, N0, 0x15 }, /* 33 */ | 
|  | 55 | { I2, D2, I4, N0, 0x0e }, /* 33 */ | 
|  | 56 | { D2, I2, I4, N0, 0x09 }, /* 33 */ | 
|  | 57 | { I2, I2, I2, D2, 0x11 }, /* 40 */ | 
|  | 58 | { I2, I2, D2, I2, 0x10 }, /* 40 */ | 
|  | 59 | { I2, D2, I2, I2, 0x0d }, /* 40 */ | 
|  | 60 | { D2, I2, I2, I2, 0x08 }, /* 40 */ | 
|  | 61 | { I4, D4, N0, N0, 0x14 }, /* 41 */ | 
|  | 62 | { D4, I4, N0, N0, 0x04 }, /* 41 */ | 
|  | 63 | { I2, I2, D4, N0, 0x0f }, /* 48 */ | 
|  | 64 | { I2, D2, I2, D2, 0x0c }, /* 48 */ | 
|  | 65 | { I2, D4, I2, N0, 0x0b }, /* 48 */ | 
|  | 66 | { D2, I2, I2, D2, 0x07 }, /* 48 */ | 
|  | 67 | { D2, I2, D2, I2, 0x06 }, /* 48 */ | 
|  | 68 | { D4, I2, I2, N0, 0x03 }, /* 48 */ | 
|  | 69 | { I2, D2, D4, N0, 0x0a }, /* 56 */ | 
|  | 70 | { D2, I2, D4, N0, 0x05 }, /* 56 */ | 
|  | 71 | { D4, I2, D2, N0, 0x02 }, /* 56 */ | 
|  | 72 | { D4, D2, I2, N0, 0x01 }, /* 56 */ | 
|  | 73 | { D8, N0, N0, N0, 0x00 }, /* 64 */ | 
|  | 74 | }; | 
|  | 75 |  | 
|  | 76 | struct sw842_hlist_node8 { | 
|  | 77 | struct hlist_node node; | 
|  | 78 | u64 data; | 
|  | 79 | u8 index; | 
|  | 80 | }; | 
|  | 81 |  | 
|  | 82 | struct sw842_hlist_node4 { | 
|  | 83 | struct hlist_node node; | 
|  | 84 | u32 data; | 
|  | 85 | u16 index; | 
|  | 86 | }; | 
|  | 87 |  | 
|  | 88 | struct sw842_hlist_node2 { | 
|  | 89 | struct hlist_node node; | 
|  | 90 | u16 data; | 
|  | 91 | u8 index; | 
|  | 92 | }; | 
|  | 93 |  | 
|  | 94 | #define INDEX_NOT_FOUND		(-1) | 
|  | 95 | #define INDEX_NOT_CHECKED	(-2) | 
|  | 96 |  | 
|  | 97 | struct sw842_param { | 
|  | 98 | u8 *in; | 
|  | 99 | u8 *instart; | 
|  | 100 | u64 ilen; | 
|  | 101 | u8 *out; | 
|  | 102 | u64 olen; | 
|  | 103 | u8 bit; | 
|  | 104 | u64 data8[1]; | 
|  | 105 | u32 data4[2]; | 
|  | 106 | u16 data2[4]; | 
|  | 107 | int index8[1]; | 
|  | 108 | int index4[2]; | 
|  | 109 | int index2[4]; | 
|  | 110 | DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS); | 
|  | 111 | DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS); | 
|  | 112 | DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS); | 
|  | 113 | struct sw842_hlist_node8 node8[1 << I8_BITS]; | 
|  | 114 | struct sw842_hlist_node4 node4[1 << I4_BITS]; | 
|  | 115 | struct sw842_hlist_node2 node2[1 << I2_BITS]; | 
|  | 116 | }; | 
|  | 117 |  | 
|  | 118 | #define get_input_data(p, o, b)						\ | 
|  | 119 | be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o)))) | 
|  | 120 |  | 
|  | 121 | #define init_hashtable_nodes(p, b)	do {			\ | 
|  | 122 | int _i;							\ | 
|  | 123 | hash_init((p)->htable##b);				\ | 
|  | 124 | for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\ | 
|  | 125 | (p)->node##b[_i].index = _i;			\ | 
|  | 126 | (p)->node##b[_i].data = 0;			\ | 
|  | 127 | INIT_HLIST_NODE(&(p)->node##b[_i].node);	\ | 
|  | 128 | }							\ | 
|  | 129 | } while (0) | 
|  | 130 |  | 
|  | 131 | #define find_index(p, b, n)	({					\ | 
|  | 132 | struct sw842_hlist_node##b *_n;					\ | 
|  | 133 | p->index##b[n] = INDEX_NOT_FOUND;				\ | 
|  | 134 | hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\ | 
|  | 135 | if (p->data##b[n] == _n->data) {			\ | 
|  | 136 | p->index##b[n] = _n->index;			\ | 
|  | 137 | break;						\ | 
|  | 138 | }							\ | 
|  | 139 | }								\ | 
|  | 140 | p->index##b[n] >= 0;						\ | 
|  | 141 | }) | 
|  | 142 |  | 
|  | 143 | #define check_index(p, b, n)			\ | 
|  | 144 | ((p)->index##b[n] == INDEX_NOT_CHECKED	\ | 
|  | 145 | ? find_index(p, b, n)			\ | 
|  | 146 | : (p)->index##b[n] >= 0) | 
|  | 147 |  | 
|  | 148 | #define replace_hash(p, b, i, d)	do {				\ | 
|  | 149 | struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\ | 
|  | 150 | hash_del(&_n->node);						\ | 
|  | 151 | _n->data = (p)->data##b[d];					\ | 
|  | 152 | pr_debug("add hash index%x %x pos %x data %lx\n", b,		\ | 
|  | 153 | (unsigned int)_n->index,				\ | 
|  | 154 | (unsigned int)((p)->in - (p)->instart),		\ | 
|  | 155 | (unsigned long)_n->data);				\ | 
|  | 156 | hash_add((p)->htable##b, &_n->node, _n->data);			\ | 
|  | 157 | } while (0) | 
|  | 158 |  | 
|  | 159 | static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; | 
|  | 160 |  | 
|  | 161 | static int add_bits(struct sw842_param *p, u64 d, u8 n); | 
|  | 162 |  | 
|  | 163 | static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s) | 
|  | 164 | { | 
|  | 165 | int ret; | 
|  | 166 |  | 
|  | 167 | if (n <= s) | 
|  | 168 | return -EINVAL; | 
|  | 169 |  | 
|  | 170 | ret = add_bits(p, d >> s, n - s); | 
|  | 171 | if (ret) | 
|  | 172 | return ret; | 
|  | 173 | return add_bits(p, d & GENMASK_ULL(s - 1, 0), s); | 
|  | 174 | } | 
|  | 175 |  | 
|  | 176 | static int add_bits(struct sw842_param *p, u64 d, u8 n) | 
|  | 177 | { | 
|  | 178 | int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits; | 
|  | 179 | u64 o; | 
|  | 180 | u8 *out = p->out; | 
|  | 181 |  | 
|  | 182 | pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d); | 
|  | 183 |  | 
|  | 184 | if (n > 64) | 
|  | 185 | return -EINVAL; | 
|  | 186 |  | 
|  | 187 | /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0), | 
|  | 188 | * or if we're at the end of the output buffer and would write past end | 
|  | 189 | */ | 
|  | 190 | if (bits > 64) | 
|  | 191 | return __split_add_bits(p, d, n, 32); | 
|  | 192 | else if (p->olen < 8 && bits > 32 && bits <= 56) | 
|  | 193 | return __split_add_bits(p, d, n, 16); | 
|  | 194 | else if (p->olen < 4 && bits > 16 && bits <= 24) | 
|  | 195 | return __split_add_bits(p, d, n, 8); | 
|  | 196 |  | 
|  | 197 | if (DIV_ROUND_UP(bits, 8) > p->olen) | 
|  | 198 | return -ENOSPC; | 
|  | 199 |  | 
|  | 200 | o = *out & bmask[b]; | 
|  | 201 | d <<= s; | 
|  | 202 |  | 
|  | 203 | if (bits <= 8) | 
|  | 204 | *out = o | d; | 
|  | 205 | else if (bits <= 16) | 
|  | 206 | put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out); | 
|  | 207 | else if (bits <= 24) | 
|  | 208 | put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out); | 
|  | 209 | else if (bits <= 32) | 
|  | 210 | put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out); | 
|  | 211 | else if (bits <= 40) | 
|  | 212 | put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out); | 
|  | 213 | else if (bits <= 48) | 
|  | 214 | put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out); | 
|  | 215 | else if (bits <= 56) | 
|  | 216 | put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out); | 
|  | 217 | else | 
|  | 218 | put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out); | 
|  | 219 |  | 
|  | 220 | p->bit += n; | 
|  | 221 |  | 
|  | 222 | if (p->bit > 7) { | 
|  | 223 | p->out += p->bit / 8; | 
|  | 224 | p->olen -= p->bit / 8; | 
|  | 225 | p->bit %= 8; | 
|  | 226 | } | 
|  | 227 |  | 
|  | 228 | return 0; | 
|  | 229 | } | 
|  | 230 |  | 
|  | 231 | static int add_template(struct sw842_param *p, u8 c) | 
|  | 232 | { | 
|  | 233 | int ret, i, b = 0; | 
|  | 234 | u8 *t = comp_ops[c]; | 
|  | 235 | bool inv = false; | 
|  | 236 |  | 
|  | 237 | if (c >= OPS_MAX) | 
|  | 238 | return -EINVAL; | 
|  | 239 |  | 
|  | 240 | pr_debug("template %x\n", t[4]); | 
|  | 241 |  | 
|  | 242 | ret = add_bits(p, t[4], OP_BITS); | 
|  | 243 | if (ret) | 
|  | 244 | return ret; | 
|  | 245 |  | 
|  | 246 | for (i = 0; i < 4; i++) { | 
|  | 247 | pr_debug("op %x\n", t[i]); | 
|  | 248 |  | 
|  | 249 | switch (t[i] & OP_AMOUNT) { | 
|  | 250 | case OP_AMOUNT_8: | 
|  | 251 | if (b) | 
|  | 252 | inv = true; | 
|  | 253 | else if (t[i] & OP_ACTION_INDEX) | 
|  | 254 | ret = add_bits(p, p->index8[0], I8_BITS); | 
|  | 255 | else if (t[i] & OP_ACTION_DATA) | 
|  | 256 | ret = add_bits(p, p->data8[0], 64); | 
|  | 257 | else | 
|  | 258 | inv = true; | 
|  | 259 | break; | 
|  | 260 | case OP_AMOUNT_4: | 
|  | 261 | if (b == 2 && t[i] & OP_ACTION_DATA) | 
|  | 262 | ret = add_bits(p, get_input_data(p, 2, 32), 32); | 
|  | 263 | else if (b != 0 && b != 4) | 
|  | 264 | inv = true; | 
|  | 265 | else if (t[i] & OP_ACTION_INDEX) | 
|  | 266 | ret = add_bits(p, p->index4[b >> 2], I4_BITS); | 
|  | 267 | else if (t[i] & OP_ACTION_DATA) | 
|  | 268 | ret = add_bits(p, p->data4[b >> 2], 32); | 
|  | 269 | else | 
|  | 270 | inv = true; | 
|  | 271 | break; | 
|  | 272 | case OP_AMOUNT_2: | 
|  | 273 | if (b != 0 && b != 2 && b != 4 && b != 6) | 
|  | 274 | inv = true; | 
|  | 275 | if (t[i] & OP_ACTION_INDEX) | 
|  | 276 | ret = add_bits(p, p->index2[b >> 1], I2_BITS); | 
|  | 277 | else if (t[i] & OP_ACTION_DATA) | 
|  | 278 | ret = add_bits(p, p->data2[b >> 1], 16); | 
|  | 279 | else | 
|  | 280 | inv = true; | 
|  | 281 | break; | 
|  | 282 | case OP_AMOUNT_0: | 
|  | 283 | inv = (b != 8) || !(t[i] & OP_ACTION_NOOP); | 
|  | 284 | break; | 
|  | 285 | default: | 
|  | 286 | inv = true; | 
|  | 287 | break; | 
|  | 288 | } | 
|  | 289 |  | 
|  | 290 | if (ret) | 
|  | 291 | return ret; | 
|  | 292 |  | 
|  | 293 | if (inv) { | 
|  | 294 | pr_err("Invalid templ %x op %d : %x %x %x %x\n", | 
|  | 295 | c, i, t[0], t[1], t[2], t[3]); | 
|  | 296 | return -EINVAL; | 
|  | 297 | } | 
|  | 298 |  | 
|  | 299 | b += t[i] & OP_AMOUNT; | 
|  | 300 | } | 
|  | 301 |  | 
|  | 302 | if (b != 8) { | 
|  | 303 | pr_err("Invalid template %x len %x : %x %x %x %x\n", | 
|  | 304 | c, b, t[0], t[1], t[2], t[3]); | 
|  | 305 | return -EINVAL; | 
|  | 306 | } | 
|  | 307 |  | 
|  | 308 | if (sw842_template_counts) | 
|  | 309 | atomic_inc(&template_count[t[4]]); | 
|  | 310 |  | 
|  | 311 | return 0; | 
|  | 312 | } | 
|  | 313 |  | 
|  | 314 | static int add_repeat_template(struct sw842_param *p, u8 r) | 
|  | 315 | { | 
|  | 316 | int ret; | 
|  | 317 |  | 
|  | 318 | /* repeat param is 0-based */ | 
|  | 319 | if (!r || --r > REPEAT_BITS_MAX) | 
|  | 320 | return -EINVAL; | 
|  | 321 |  | 
|  | 322 | ret = add_bits(p, OP_REPEAT, OP_BITS); | 
|  | 323 | if (ret) | 
|  | 324 | return ret; | 
|  | 325 |  | 
|  | 326 | ret = add_bits(p, r, REPEAT_BITS); | 
|  | 327 | if (ret) | 
|  | 328 | return ret; | 
|  | 329 |  | 
|  | 330 | if (sw842_template_counts) | 
|  | 331 | atomic_inc(&template_repeat_count); | 
|  | 332 |  | 
|  | 333 | return 0; | 
|  | 334 | } | 
|  | 335 |  | 
|  | 336 | static int add_short_data_template(struct sw842_param *p, u8 b) | 
|  | 337 | { | 
|  | 338 | int ret, i; | 
|  | 339 |  | 
|  | 340 | if (!b || b > SHORT_DATA_BITS_MAX) | 
|  | 341 | return -EINVAL; | 
|  | 342 |  | 
|  | 343 | ret = add_bits(p, OP_SHORT_DATA, OP_BITS); | 
|  | 344 | if (ret) | 
|  | 345 | return ret; | 
|  | 346 |  | 
|  | 347 | ret = add_bits(p, b, SHORT_DATA_BITS); | 
|  | 348 | if (ret) | 
|  | 349 | return ret; | 
|  | 350 |  | 
|  | 351 | for (i = 0; i < b; i++) { | 
|  | 352 | ret = add_bits(p, p->in[i], 8); | 
|  | 353 | if (ret) | 
|  | 354 | return ret; | 
|  | 355 | } | 
|  | 356 |  | 
|  | 357 | if (sw842_template_counts) | 
|  | 358 | atomic_inc(&template_short_data_count); | 
|  | 359 |  | 
|  | 360 | return 0; | 
|  | 361 | } | 
|  | 362 |  | 
|  | 363 | static int add_zeros_template(struct sw842_param *p) | 
|  | 364 | { | 
|  | 365 | int ret = add_bits(p, OP_ZEROS, OP_BITS); | 
|  | 366 |  | 
|  | 367 | if (ret) | 
|  | 368 | return ret; | 
|  | 369 |  | 
|  | 370 | if (sw842_template_counts) | 
|  | 371 | atomic_inc(&template_zeros_count); | 
|  | 372 |  | 
|  | 373 | return 0; | 
|  | 374 | } | 
|  | 375 |  | 
|  | 376 | static int add_end_template(struct sw842_param *p) | 
|  | 377 | { | 
|  | 378 | int ret = add_bits(p, OP_END, OP_BITS); | 
|  | 379 |  | 
|  | 380 | if (ret) | 
|  | 381 | return ret; | 
|  | 382 |  | 
|  | 383 | if (sw842_template_counts) | 
|  | 384 | atomic_inc(&template_end_count); | 
|  | 385 |  | 
|  | 386 | return 0; | 
|  | 387 | } | 
|  | 388 |  | 
|  | 389 | static bool check_template(struct sw842_param *p, u8 c) | 
|  | 390 | { | 
|  | 391 | u8 *t = comp_ops[c]; | 
|  | 392 | int i, match, b = 0; | 
|  | 393 |  | 
|  | 394 | if (c >= OPS_MAX) | 
|  | 395 | return false; | 
|  | 396 |  | 
|  | 397 | for (i = 0; i < 4; i++) { | 
|  | 398 | if (t[i] & OP_ACTION_INDEX) { | 
|  | 399 | if (t[i] & OP_AMOUNT_2) | 
|  | 400 | match = check_index(p, 2, b >> 1); | 
|  | 401 | else if (t[i] & OP_AMOUNT_4) | 
|  | 402 | match = check_index(p, 4, b >> 2); | 
|  | 403 | else if (t[i] & OP_AMOUNT_8) | 
|  | 404 | match = check_index(p, 8, 0); | 
|  | 405 | else | 
|  | 406 | return false; | 
|  | 407 | if (!match) | 
|  | 408 | return false; | 
|  | 409 | } | 
|  | 410 |  | 
|  | 411 | b += t[i] & OP_AMOUNT; | 
|  | 412 | } | 
|  | 413 |  | 
|  | 414 | return true; | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | static void get_next_data(struct sw842_param *p) | 
|  | 418 | { | 
|  | 419 | p->data8[0] = get_input_data(p, 0, 64); | 
|  | 420 | p->data4[0] = get_input_data(p, 0, 32); | 
|  | 421 | p->data4[1] = get_input_data(p, 4, 32); | 
|  | 422 | p->data2[0] = get_input_data(p, 0, 16); | 
|  | 423 | p->data2[1] = get_input_data(p, 2, 16); | 
|  | 424 | p->data2[2] = get_input_data(p, 4, 16); | 
|  | 425 | p->data2[3] = get_input_data(p, 6, 16); | 
|  | 426 | } | 
|  | 427 |  | 
|  | 428 | /* update the hashtable entries. | 
|  | 429 | * only call this after finding/adding the current template | 
|  | 430 | * the dataN fields for the current 8 byte block must be already updated | 
|  | 431 | */ | 
|  | 432 | static void update_hashtables(struct sw842_param *p) | 
|  | 433 | { | 
|  | 434 | u64 pos = p->in - p->instart; | 
|  | 435 | u64 n8 = (pos >> 3) % (1 << I8_BITS); | 
|  | 436 | u64 n4 = (pos >> 2) % (1 << I4_BITS); | 
|  | 437 | u64 n2 = (pos >> 1) % (1 << I2_BITS); | 
|  | 438 |  | 
|  | 439 | replace_hash(p, 8, n8, 0); | 
|  | 440 | replace_hash(p, 4, n4, 0); | 
|  | 441 | replace_hash(p, 4, n4, 1); | 
|  | 442 | replace_hash(p, 2, n2, 0); | 
|  | 443 | replace_hash(p, 2, n2, 1); | 
|  | 444 | replace_hash(p, 2, n2, 2); | 
|  | 445 | replace_hash(p, 2, n2, 3); | 
|  | 446 | } | 
|  | 447 |  | 
|  | 448 | /* find the next template to use, and add it | 
|  | 449 | * the p->dataN fields must already be set for the current 8 byte block | 
|  | 450 | */ | 
|  | 451 | static int process_next(struct sw842_param *p) | 
|  | 452 | { | 
|  | 453 | int ret, i; | 
|  | 454 |  | 
|  | 455 | p->index8[0] = INDEX_NOT_CHECKED; | 
|  | 456 | p->index4[0] = INDEX_NOT_CHECKED; | 
|  | 457 | p->index4[1] = INDEX_NOT_CHECKED; | 
|  | 458 | p->index2[0] = INDEX_NOT_CHECKED; | 
|  | 459 | p->index2[1] = INDEX_NOT_CHECKED; | 
|  | 460 | p->index2[2] = INDEX_NOT_CHECKED; | 
|  | 461 | p->index2[3] = INDEX_NOT_CHECKED; | 
|  | 462 |  | 
|  | 463 | /* check up to OPS_MAX - 1; last op is our fallback */ | 
|  | 464 | for (i = 0; i < OPS_MAX - 1; i++) { | 
|  | 465 | if (check_template(p, i)) | 
|  | 466 | break; | 
|  | 467 | } | 
|  | 468 |  | 
|  | 469 | ret = add_template(p, i); | 
|  | 470 | if (ret) | 
|  | 471 | return ret; | 
|  | 472 |  | 
|  | 473 | return 0; | 
|  | 474 | } | 
|  | 475 |  | 
|  | 476 | /** | 
|  | 477 | * sw842_compress | 
|  | 478 | * | 
|  | 479 | * Compress the uncompressed buffer of length @ilen at @in to the output buffer | 
|  | 480 | * @out, using no more than @olen bytes, using the 842 compression format. | 
|  | 481 | * | 
|  | 482 | * Returns: 0 on success, error on failure.  The @olen parameter | 
|  | 483 | * will contain the number of output bytes written on success, or | 
|  | 484 | * 0 on error. | 
|  | 485 | */ | 
|  | 486 | int sw842_compress(const u8 *in, unsigned int ilen, | 
|  | 487 | u8 *out, unsigned int *olen, void *wmem) | 
|  | 488 | { | 
|  | 489 | struct sw842_param *p = (struct sw842_param *)wmem; | 
|  | 490 | int ret; | 
|  | 491 | u64 last, next, pad, total; | 
|  | 492 | u8 repeat_count = 0; | 
|  | 493 | u32 crc; | 
|  | 494 |  | 
|  | 495 | BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS); | 
|  | 496 |  | 
|  | 497 | init_hashtable_nodes(p, 8); | 
|  | 498 | init_hashtable_nodes(p, 4); | 
|  | 499 | init_hashtable_nodes(p, 2); | 
|  | 500 |  | 
|  | 501 | p->in = (u8 *)in; | 
|  | 502 | p->instart = p->in; | 
|  | 503 | p->ilen = ilen; | 
|  | 504 | p->out = out; | 
|  | 505 | p->olen = *olen; | 
|  | 506 | p->bit = 0; | 
|  | 507 |  | 
|  | 508 | total = p->olen; | 
|  | 509 |  | 
|  | 510 | *olen = 0; | 
|  | 511 |  | 
|  | 512 | /* if using strict mode, we can only compress a multiple of 8 */ | 
|  | 513 | if (sw842_strict && (ilen % 8)) { | 
|  | 514 | pr_err("Using strict mode, can't compress len %d\n", ilen); | 
|  | 515 | return -EINVAL; | 
|  | 516 | } | 
|  | 517 |  | 
|  | 518 | /* let's compress at least 8 bytes, mkay? */ | 
|  | 519 | if (unlikely(ilen < 8)) | 
|  | 520 | goto skip_comp; | 
|  | 521 |  | 
|  | 522 | /* make initial 'last' different so we don't match the first time */ | 
|  | 523 | last = ~get_unaligned((u64 *)p->in); | 
|  | 524 |  | 
|  | 525 | while (p->ilen > 7) { | 
|  | 526 | next = get_unaligned((u64 *)p->in); | 
|  | 527 |  | 
|  | 528 | /* must get the next data, as we need to update the hashtable | 
|  | 529 | * entries with the new data every time | 
|  | 530 | */ | 
|  | 531 | get_next_data(p); | 
|  | 532 |  | 
|  | 533 | /* we don't care about endianness in last or next; | 
|  | 534 | * we're just comparing 8 bytes to another 8 bytes, | 
|  | 535 | * they're both the same endianness | 
|  | 536 | */ | 
|  | 537 | if (next == last) { | 
|  | 538 | /* repeat count bits are 0-based, so we stop at +1 */ | 
|  | 539 | if (++repeat_count <= REPEAT_BITS_MAX) | 
|  | 540 | goto repeat; | 
|  | 541 | } | 
|  | 542 | if (repeat_count) { | 
|  | 543 | ret = add_repeat_template(p, repeat_count); | 
|  | 544 | repeat_count = 0; | 
|  | 545 | if (next == last) /* reached max repeat bits */ | 
|  | 546 | goto repeat; | 
|  | 547 | } | 
|  | 548 |  | 
|  | 549 | if (next == 0) | 
|  | 550 | ret = add_zeros_template(p); | 
|  | 551 | else | 
|  | 552 | ret = process_next(p); | 
|  | 553 |  | 
|  | 554 | if (ret) | 
|  | 555 | return ret; | 
|  | 556 |  | 
|  | 557 | repeat: | 
|  | 558 | last = next; | 
|  | 559 | update_hashtables(p); | 
|  | 560 | p->in += 8; | 
|  | 561 | p->ilen -= 8; | 
|  | 562 | } | 
|  | 563 |  | 
|  | 564 | if (repeat_count) { | 
|  | 565 | ret = add_repeat_template(p, repeat_count); | 
|  | 566 | if (ret) | 
|  | 567 | return ret; | 
|  | 568 | } | 
|  | 569 |  | 
|  | 570 | skip_comp: | 
|  | 571 | if (p->ilen > 0) { | 
|  | 572 | ret = add_short_data_template(p, p->ilen); | 
|  | 573 | if (ret) | 
|  | 574 | return ret; | 
|  | 575 |  | 
|  | 576 | p->in += p->ilen; | 
|  | 577 | p->ilen = 0; | 
|  | 578 | } | 
|  | 579 |  | 
|  | 580 | ret = add_end_template(p); | 
|  | 581 | if (ret) | 
|  | 582 | return ret; | 
|  | 583 |  | 
|  | 584 | /* | 
|  | 585 | * crc(0:31) is appended to target data starting with the next | 
|  | 586 | * bit after End of stream template. | 
|  | 587 | * nx842 calculates CRC for data in big-endian format. So doing | 
|  | 588 | * same here so that sw842 decompression can be used for both | 
|  | 589 | * compressed data. | 
|  | 590 | */ | 
|  | 591 | crc = crc32_be(0, in, ilen); | 
|  | 592 | ret = add_bits(p, crc, CRC_BITS); | 
|  | 593 | if (ret) | 
|  | 594 | return ret; | 
|  | 595 |  | 
|  | 596 | if (p->bit) { | 
|  | 597 | p->out++; | 
|  | 598 | p->olen--; | 
|  | 599 | p->bit = 0; | 
|  | 600 | } | 
|  | 601 |  | 
|  | 602 | /* pad compressed length to multiple of 8 */ | 
|  | 603 | pad = (8 - ((total - p->olen) % 8)) % 8; | 
|  | 604 | if (pad) { | 
|  | 605 | if (pad > p->olen) /* we were so close! */ | 
|  | 606 | return -ENOSPC; | 
|  | 607 | memset(p->out, 0, pad); | 
|  | 608 | p->out += pad; | 
|  | 609 | p->olen -= pad; | 
|  | 610 | } | 
|  | 611 |  | 
|  | 612 | if (unlikely((total - p->olen) > UINT_MAX)) | 
|  | 613 | return -ENOSPC; | 
|  | 614 |  | 
|  | 615 | *olen = total - p->olen; | 
|  | 616 |  | 
|  | 617 | return 0; | 
|  | 618 | } | 
|  | 619 | EXPORT_SYMBOL_GPL(sw842_compress); | 
|  | 620 |  | 
|  | 621 | static int __init sw842_init(void) | 
|  | 622 | { | 
|  | 623 | if (sw842_template_counts) | 
|  | 624 | sw842_debugfs_create(); | 
|  | 625 |  | 
|  | 626 | return 0; | 
|  | 627 | } | 
|  | 628 | module_init(sw842_init); | 
|  | 629 |  | 
|  | 630 | static void __exit sw842_exit(void) | 
|  | 631 | { | 
|  | 632 | if (sw842_template_counts) | 
|  | 633 | sw842_debugfs_remove(); | 
|  | 634 | } | 
|  | 635 | module_exit(sw842_exit); | 
|  | 636 |  | 
|  | 637 | MODULE_LICENSE("GPL"); | 
|  | 638 | MODULE_DESCRIPTION("Software 842 Compressor"); | 
|  | 639 | MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); |