root/lib/842/842_compress.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __split_add_bits
  2. add_bits
  3. add_template
  4. add_repeat_template
  5. add_short_data_template
  6. add_zeros_template
  7. add_end_template
  8. check_template
  9. get_next_data
  10. update_hashtables
  11. process_next
  12. sw842_compress
  13. sw842_init
  14. sw842_exit

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * 842 Software Compression
   4  *
   5  * Copyright (C) 2015 Dan Streetman, IBM Corp
   6  *
   7  * See 842.h for details of the 842 compressed format.
   8  */
   9 
  10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11 #define MODULE_NAME "842_compress"
  12 
  13 #include <linux/hashtable.h>
  14 
  15 #include "842.h"
  16 #include "842_debugfs.h"
  17 
  18 #define SW842_HASHTABLE8_BITS   (10)
  19 #define SW842_HASHTABLE4_BITS   (11)
  20 #define SW842_HASHTABLE2_BITS   (10)
  21 
  22 /* By default, we allow compressing input buffers of any length, but we must
  23  * use the non-standard "short data" template so the decompressor can correctly
  24  * reproduce the uncompressed data buffer at the right length.  However the
  25  * hardware 842 compressor will not recognize the "short data" template, and
  26  * will fail to decompress any compressed buffer containing it (I have no idea
  27  * why anyone would want to use software to compress and hardware to decompress
  28  * but that's beside the point).  This parameter forces the compression
  29  * function to simply reject any input buffer that isn't a multiple of 8 bytes
  30  * long, instead of using the "short data" template, so that all compressed
  31  * buffers produced by this function will be decompressable by the 842 hardware
  32  * decompressor.  Unless you have a specific need for that, leave this disabled
  33  * so that any length buffer can be compressed.
  34  */
  35 static bool sw842_strict;
  36 module_param_named(strict, sw842_strict, bool, 0644);
  37 
  38 static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
  39         { I8, N0, N0, N0, 0x19 }, /* 8 */
  40         { I4, I4, N0, N0, 0x18 }, /* 18 */
  41         { I4, I2, I2, N0, 0x17 }, /* 25 */
  42         { I2, I2, I4, N0, 0x13 }, /* 25 */
  43         { I2, I2, I2, I2, 0x12 }, /* 32 */
  44         { I4, I2, D2, N0, 0x16 }, /* 33 */
  45         { I4, D2, I2, N0, 0x15 }, /* 33 */
  46         { I2, D2, I4, N0, 0x0e }, /* 33 */
  47         { D2, I2, I4, N0, 0x09 }, /* 33 */
  48         { I2, I2, I2, D2, 0x11 }, /* 40 */
  49         { I2, I2, D2, I2, 0x10 }, /* 40 */
  50         { I2, D2, I2, I2, 0x0d }, /* 40 */
  51         { D2, I2, I2, I2, 0x08 }, /* 40 */
  52         { I4, D4, N0, N0, 0x14 }, /* 41 */
  53         { D4, I4, N0, N0, 0x04 }, /* 41 */
  54         { I2, I2, D4, N0, 0x0f }, /* 48 */
  55         { I2, D2, I2, D2, 0x0c }, /* 48 */
  56         { I2, D4, I2, N0, 0x0b }, /* 48 */
  57         { D2, I2, I2, D2, 0x07 }, /* 48 */
  58         { D2, I2, D2, I2, 0x06 }, /* 48 */
  59         { D4, I2, I2, N0, 0x03 }, /* 48 */
  60         { I2, D2, D4, N0, 0x0a }, /* 56 */
  61         { D2, I2, D4, N0, 0x05 }, /* 56 */
  62         { D4, I2, D2, N0, 0x02 }, /* 56 */
  63         { D4, D2, I2, N0, 0x01 }, /* 56 */
  64         { D8, N0, N0, N0, 0x00 }, /* 64 */
  65 };
  66 
  67 struct sw842_hlist_node8 {
  68         struct hlist_node node;
  69         u64 data;
  70         u8 index;
  71 };
  72 
  73 struct sw842_hlist_node4 {
  74         struct hlist_node node;
  75         u32 data;
  76         u16 index;
  77 };
  78 
  79 struct sw842_hlist_node2 {
  80         struct hlist_node node;
  81         u16 data;
  82         u8 index;
  83 };
  84 
  85 #define INDEX_NOT_FOUND         (-1)
  86 #define INDEX_NOT_CHECKED       (-2)
  87 
  88 struct sw842_param {
  89         u8 *in;
  90         u8 *instart;
  91         u64 ilen;
  92         u8 *out;
  93         u64 olen;
  94         u8 bit;
  95         u64 data8[1];
  96         u32 data4[2];
  97         u16 data2[4];
  98         int index8[1];
  99         int index4[2];
 100         int index2[4];
 101         DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
 102         DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
 103         DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
 104         struct sw842_hlist_node8 node8[1 << I8_BITS];
 105         struct sw842_hlist_node4 node4[1 << I4_BITS];
 106         struct sw842_hlist_node2 node2[1 << I2_BITS];
 107 };
 108 
 109 #define get_input_data(p, o, b)                                         \
 110         be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
 111 
 112 #define init_hashtable_nodes(p, b)      do {                    \
 113         int _i;                                                 \
 114         hash_init((p)->htable##b);                              \
 115         for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {     \
 116                 (p)->node##b[_i].index = _i;                    \
 117                 (p)->node##b[_i].data = 0;                      \
 118                 INIT_HLIST_NODE(&(p)->node##b[_i].node);        \
 119         }                                                       \
 120 } while (0)
 121 
 122 #define find_index(p, b, n)     ({                                      \
 123         struct sw842_hlist_node##b *_n;                                 \
 124         p->index##b[n] = INDEX_NOT_FOUND;                               \
 125         hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
 126                 if (p->data##b[n] == _n->data) {                        \
 127                         p->index##b[n] = _n->index;                     \
 128                         break;                                          \
 129                 }                                                       \
 130         }                                                               \
 131         p->index##b[n] >= 0;                                            \
 132 })
 133 
 134 #define check_index(p, b, n)                    \
 135         ((p)->index##b[n] == INDEX_NOT_CHECKED  \
 136          ? find_index(p, b, n)                  \
 137          : (p)->index##b[n] >= 0)
 138 
 139 #define replace_hash(p, b, i, d)        do {                            \
 140         struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];        \
 141         hash_del(&_n->node);                                            \
 142         _n->data = (p)->data##b[d];                                     \
 143         pr_debug("add hash index%x %x pos %x data %lx\n", b,            \
 144                  (unsigned int)_n->index,                               \
 145                  (unsigned int)((p)->in - (p)->instart),                \
 146                  (unsigned long)_n->data);                              \
 147         hash_add((p)->htable##b, &_n->node, _n->data);                  \
 148 } while (0)
 149 
 150 static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
 151 
 152 static int add_bits(struct sw842_param *p, u64 d, u8 n);
 153 
 154 static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
 155 {
 156         int ret;
 157 
 158         if (n <= s)
 159                 return -EINVAL;
 160 
 161         ret = add_bits(p, d >> s, n - s);
 162         if (ret)
 163                 return ret;
 164         return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
 165 }
 166 
 167 static int add_bits(struct sw842_param *p, u64 d, u8 n)
 168 {
 169         int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
 170         u64 o;
 171         u8 *out = p->out;
 172 
 173         pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
 174 
 175         if (n > 64)
 176                 return -EINVAL;
 177 
 178         /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
 179          * or if we're at the end of the output buffer and would write past end
 180          */
 181         if (bits > 64)
 182                 return __split_add_bits(p, d, n, 32);
 183         else if (p->olen < 8 && bits > 32 && bits <= 56)
 184                 return __split_add_bits(p, d, n, 16);
 185         else if (p->olen < 4 && bits > 16 && bits <= 24)
 186                 return __split_add_bits(p, d, n, 8);
 187 
 188         if (DIV_ROUND_UP(bits, 8) > p->olen)
 189                 return -ENOSPC;
 190 
 191         o = *out & bmask[b];
 192         d <<= s;
 193 
 194         if (bits <= 8)
 195                 *out = o | d;
 196         else if (bits <= 16)
 197                 put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
 198         else if (bits <= 24)
 199                 put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
 200         else if (bits <= 32)
 201                 put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
 202         else if (bits <= 40)
 203                 put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
 204         else if (bits <= 48)
 205                 put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
 206         else if (bits <= 56)
 207                 put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
 208         else
 209                 put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
 210 
 211         p->bit += n;
 212 
 213         if (p->bit > 7) {
 214                 p->out += p->bit / 8;
 215                 p->olen -= p->bit / 8;
 216                 p->bit %= 8;
 217         }
 218 
 219         return 0;
 220 }
 221 
 222 static int add_template(struct sw842_param *p, u8 c)
 223 {
 224         int ret, i, b = 0;
 225         u8 *t = comp_ops[c];
 226         bool inv = false;
 227 
 228         if (c >= OPS_MAX)
 229                 return -EINVAL;
 230 
 231         pr_debug("template %x\n", t[4]);
 232 
 233         ret = add_bits(p, t[4], OP_BITS);
 234         if (ret)
 235                 return ret;
 236 
 237         for (i = 0; i < 4; i++) {
 238                 pr_debug("op %x\n", t[i]);
 239 
 240                 switch (t[i] & OP_AMOUNT) {
 241                 case OP_AMOUNT_8:
 242                         if (b)
 243                                 inv = true;
 244                         else if (t[i] & OP_ACTION_INDEX)
 245                                 ret = add_bits(p, p->index8[0], I8_BITS);
 246                         else if (t[i] & OP_ACTION_DATA)
 247                                 ret = add_bits(p, p->data8[0], 64);
 248                         else
 249                                 inv = true;
 250                         break;
 251                 case OP_AMOUNT_4:
 252                         if (b == 2 && t[i] & OP_ACTION_DATA)
 253                                 ret = add_bits(p, get_input_data(p, 2, 32), 32);
 254                         else if (b != 0 && b != 4)
 255                                 inv = true;
 256                         else if (t[i] & OP_ACTION_INDEX)
 257                                 ret = add_bits(p, p->index4[b >> 2], I4_BITS);
 258                         else if (t[i] & OP_ACTION_DATA)
 259                                 ret = add_bits(p, p->data4[b >> 2], 32);
 260                         else
 261                                 inv = true;
 262                         break;
 263                 case OP_AMOUNT_2:
 264                         if (b != 0 && b != 2 && b != 4 && b != 6)
 265                                 inv = true;
 266                         if (t[i] & OP_ACTION_INDEX)
 267                                 ret = add_bits(p, p->index2[b >> 1], I2_BITS);
 268                         else if (t[i] & OP_ACTION_DATA)
 269                                 ret = add_bits(p, p->data2[b >> 1], 16);
 270                         else
 271                                 inv = true;
 272                         break;
 273                 case OP_AMOUNT_0:
 274                         inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
 275                         break;
 276                 default:
 277                         inv = true;
 278                         break;
 279                 }
 280 
 281                 if (ret)
 282                         return ret;
 283 
 284                 if (inv) {
 285                         pr_err("Invalid templ %x op %d : %x %x %x %x\n",
 286                                c, i, t[0], t[1], t[2], t[3]);
 287                         return -EINVAL;
 288                 }
 289 
 290                 b += t[i] & OP_AMOUNT;
 291         }
 292 
 293         if (b != 8) {
 294                 pr_err("Invalid template %x len %x : %x %x %x %x\n",
 295                        c, b, t[0], t[1], t[2], t[3]);
 296                 return -EINVAL;
 297         }
 298 
 299         if (sw842_template_counts)
 300                 atomic_inc(&template_count[t[4]]);
 301 
 302         return 0;
 303 }
 304 
 305 static int add_repeat_template(struct sw842_param *p, u8 r)
 306 {
 307         int ret;
 308 
 309         /* repeat param is 0-based */
 310         if (!r || --r > REPEAT_BITS_MAX)
 311                 return -EINVAL;
 312 
 313         ret = add_bits(p, OP_REPEAT, OP_BITS);
 314         if (ret)
 315                 return ret;
 316 
 317         ret = add_bits(p, r, REPEAT_BITS);
 318         if (ret)
 319                 return ret;
 320 
 321         if (sw842_template_counts)
 322                 atomic_inc(&template_repeat_count);
 323 
 324         return 0;
 325 }
 326 
 327 static int add_short_data_template(struct sw842_param *p, u8 b)
 328 {
 329         int ret, i;
 330 
 331         if (!b || b > SHORT_DATA_BITS_MAX)
 332                 return -EINVAL;
 333 
 334         ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
 335         if (ret)
 336                 return ret;
 337 
 338         ret = add_bits(p, b, SHORT_DATA_BITS);
 339         if (ret)
 340                 return ret;
 341 
 342         for (i = 0; i < b; i++) {
 343                 ret = add_bits(p, p->in[i], 8);
 344                 if (ret)
 345                         return ret;
 346         }
 347 
 348         if (sw842_template_counts)
 349                 atomic_inc(&template_short_data_count);
 350 
 351         return 0;
 352 }
 353 
 354 static int add_zeros_template(struct sw842_param *p)
 355 {
 356         int ret = add_bits(p, OP_ZEROS, OP_BITS);
 357 
 358         if (ret)
 359                 return ret;
 360 
 361         if (sw842_template_counts)
 362                 atomic_inc(&template_zeros_count);
 363 
 364         return 0;
 365 }
 366 
 367 static int add_end_template(struct sw842_param *p)
 368 {
 369         int ret = add_bits(p, OP_END, OP_BITS);
 370 
 371         if (ret)
 372                 return ret;
 373 
 374         if (sw842_template_counts)
 375                 atomic_inc(&template_end_count);
 376 
 377         return 0;
 378 }
 379 
 380 static bool check_template(struct sw842_param *p, u8 c)
 381 {
 382         u8 *t = comp_ops[c];
 383         int i, match, b = 0;
 384 
 385         if (c >= OPS_MAX)
 386                 return false;
 387 
 388         for (i = 0; i < 4; i++) {
 389                 if (t[i] & OP_ACTION_INDEX) {
 390                         if (t[i] & OP_AMOUNT_2)
 391                                 match = check_index(p, 2, b >> 1);
 392                         else if (t[i] & OP_AMOUNT_4)
 393                                 match = check_index(p, 4, b >> 2);
 394                         else if (t[i] & OP_AMOUNT_8)
 395                                 match = check_index(p, 8, 0);
 396                         else
 397                                 return false;
 398                         if (!match)
 399                                 return false;
 400                 }
 401 
 402                 b += t[i] & OP_AMOUNT;
 403         }
 404 
 405         return true;
 406 }
 407 
 408 static void get_next_data(struct sw842_param *p)
 409 {
 410         p->data8[0] = get_input_data(p, 0, 64);
 411         p->data4[0] = get_input_data(p, 0, 32);
 412         p->data4[1] = get_input_data(p, 4, 32);
 413         p->data2[0] = get_input_data(p, 0, 16);
 414         p->data2[1] = get_input_data(p, 2, 16);
 415         p->data2[2] = get_input_data(p, 4, 16);
 416         p->data2[3] = get_input_data(p, 6, 16);
 417 }
 418 
 419 /* update the hashtable entries.
 420  * only call this after finding/adding the current template
 421  * the dataN fields for the current 8 byte block must be already updated
 422  */
 423 static void update_hashtables(struct sw842_param *p)
 424 {
 425         u64 pos = p->in - p->instart;
 426         u64 n8 = (pos >> 3) % (1 << I8_BITS);
 427         u64 n4 = (pos >> 2) % (1 << I4_BITS);
 428         u64 n2 = (pos >> 1) % (1 << I2_BITS);
 429 
 430         replace_hash(p, 8, n8, 0);
 431         replace_hash(p, 4, n4, 0);
 432         replace_hash(p, 4, n4, 1);
 433         replace_hash(p, 2, n2, 0);
 434         replace_hash(p, 2, n2, 1);
 435         replace_hash(p, 2, n2, 2);
 436         replace_hash(p, 2, n2, 3);
 437 }
 438 
 439 /* find the next template to use, and add it
 440  * the p->dataN fields must already be set for the current 8 byte block
 441  */
 442 static int process_next(struct sw842_param *p)
 443 {
 444         int ret, i;
 445 
 446         p->index8[0] = INDEX_NOT_CHECKED;
 447         p->index4[0] = INDEX_NOT_CHECKED;
 448         p->index4[1] = INDEX_NOT_CHECKED;
 449         p->index2[0] = INDEX_NOT_CHECKED;
 450         p->index2[1] = INDEX_NOT_CHECKED;
 451         p->index2[2] = INDEX_NOT_CHECKED;
 452         p->index2[3] = INDEX_NOT_CHECKED;
 453 
 454         /* check up to OPS_MAX - 1; last op is our fallback */
 455         for (i = 0; i < OPS_MAX - 1; i++) {
 456                 if (check_template(p, i))
 457                         break;
 458         }
 459 
 460         ret = add_template(p, i);
 461         if (ret)
 462                 return ret;
 463 
 464         return 0;
 465 }
 466 
 467 /**
 468  * sw842_compress
 469  *
 470  * Compress the uncompressed buffer of length @ilen at @in to the output buffer
 471  * @out, using no more than @olen bytes, using the 842 compression format.
 472  *
 473  * Returns: 0 on success, error on failure.  The @olen parameter
 474  * will contain the number of output bytes written on success, or
 475  * 0 on error.
 476  */
 477 int sw842_compress(const u8 *in, unsigned int ilen,
 478                    u8 *out, unsigned int *olen, void *wmem)
 479 {
 480         struct sw842_param *p = (struct sw842_param *)wmem;
 481         int ret;
 482         u64 last, next, pad, total;
 483         u8 repeat_count = 0;
 484         u32 crc;
 485 
 486         BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
 487 
 488         init_hashtable_nodes(p, 8);
 489         init_hashtable_nodes(p, 4);
 490         init_hashtable_nodes(p, 2);
 491 
 492         p->in = (u8 *)in;
 493         p->instart = p->in;
 494         p->ilen = ilen;
 495         p->out = out;
 496         p->olen = *olen;
 497         p->bit = 0;
 498 
 499         total = p->olen;
 500 
 501         *olen = 0;
 502 
 503         /* if using strict mode, we can only compress a multiple of 8 */
 504         if (sw842_strict && (ilen % 8)) {
 505                 pr_err("Using strict mode, can't compress len %d\n", ilen);
 506                 return -EINVAL;
 507         }
 508 
 509         /* let's compress at least 8 bytes, mkay? */
 510         if (unlikely(ilen < 8))
 511                 goto skip_comp;
 512 
 513         /* make initial 'last' different so we don't match the first time */
 514         last = ~get_unaligned((u64 *)p->in);
 515 
 516         while (p->ilen > 7) {
 517                 next = get_unaligned((u64 *)p->in);
 518 
 519                 /* must get the next data, as we need to update the hashtable
 520                  * entries with the new data every time
 521                  */
 522                 get_next_data(p);
 523 
 524                 /* we don't care about endianness in last or next;
 525                  * we're just comparing 8 bytes to another 8 bytes,
 526                  * they're both the same endianness
 527                  */
 528                 if (next == last) {
 529                         /* repeat count bits are 0-based, so we stop at +1 */
 530                         if (++repeat_count <= REPEAT_BITS_MAX)
 531                                 goto repeat;
 532                 }
 533                 if (repeat_count) {
 534                         ret = add_repeat_template(p, repeat_count);
 535                         repeat_count = 0;
 536                         if (next == last) /* reached max repeat bits */
 537                                 goto repeat;
 538                 }
 539 
 540                 if (next == 0)
 541                         ret = add_zeros_template(p);
 542                 else
 543                         ret = process_next(p);
 544 
 545                 if (ret)
 546                         return ret;
 547 
 548 repeat:
 549                 last = next;
 550                 update_hashtables(p);
 551                 p->in += 8;
 552                 p->ilen -= 8;
 553         }
 554 
 555         if (repeat_count) {
 556                 ret = add_repeat_template(p, repeat_count);
 557                 if (ret)
 558                         return ret;
 559         }
 560 
 561 skip_comp:
 562         if (p->ilen > 0) {
 563                 ret = add_short_data_template(p, p->ilen);
 564                 if (ret)
 565                         return ret;
 566 
 567                 p->in += p->ilen;
 568                 p->ilen = 0;
 569         }
 570 
 571         ret = add_end_template(p);
 572         if (ret)
 573                 return ret;
 574 
 575         /*
 576          * crc(0:31) is appended to target data starting with the next
 577          * bit after End of stream template.
 578          * nx842 calculates CRC for data in big-endian format. So doing
 579          * same here so that sw842 decompression can be used for both
 580          * compressed data.
 581          */
 582         crc = crc32_be(0, in, ilen);
 583         ret = add_bits(p, crc, CRC_BITS);
 584         if (ret)
 585                 return ret;
 586 
 587         if (p->bit) {
 588                 p->out++;
 589                 p->olen--;
 590                 p->bit = 0;
 591         }
 592 
 593         /* pad compressed length to multiple of 8 */
 594         pad = (8 - ((total - p->olen) % 8)) % 8;
 595         if (pad) {
 596                 if (pad > p->olen) /* we were so close! */
 597                         return -ENOSPC;
 598                 memset(p->out, 0, pad);
 599                 p->out += pad;
 600                 p->olen -= pad;
 601         }
 602 
 603         if (unlikely((total - p->olen) > UINT_MAX))
 604                 return -ENOSPC;
 605 
 606         *olen = total - p->olen;
 607 
 608         return 0;
 609 }
 610 EXPORT_SYMBOL_GPL(sw842_compress);
 611 
 612 static int __init sw842_init(void)
 613 {
 614         if (sw842_template_counts)
 615                 sw842_debugfs_create();
 616 
 617         return 0;
 618 }
 619 module_init(sw842_init);
 620 
 621 static void __exit sw842_exit(void)
 622 {
 623         if (sw842_template_counts)
 624                 sw842_debugfs_remove();
 625 }
 626 module_exit(sw842_exit);
 627 
 628 MODULE_LICENSE("GPL");
 629 MODULE_DESCRIPTION("Software 842 Compressor");
 630 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");

/* [<][>][^][v][top][bottom][index][help] */