1/* 2 * LZ4 HC - High Compression Mode of LZ4 3 * Copyright (C) 2011-2012, Yann Collet. 4 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with the 15 * distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * You can contact the author at : 30 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 * - LZ4 source repository : http://code.google.com/p/lz4/ 32 * 33 * Changed for kernel use by: 34 * Chanho Min <chanho.min@lge.com> 35 */ 36 37#include <linux/module.h> 38#include <linux/kernel.h> 39#include <linux/lz4.h> 40#include <asm/unaligned.h> 41#include "lz4defs.h" 42 43struct lz4hc_data { 44 const u8 *base; 45 HTYPE hashtable[HASHTABLESIZE]; 46 u16 chaintable[MAXD]; 47 const u8 *nexttoupdate; 48} __attribute__((__packed__)); 49 50static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) 51{ 52 memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); 53 memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); 54 55#if LZ4_ARCH64 56 hc4->nexttoupdate = base + 1; 57#else 58 hc4->nexttoupdate = base; 59#endif 60 hc4->base = base; 61 return 1; 62} 63 64/* Update chains up to ip (excluded) */ 65static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) 66{ 67 u16 *chaintable = hc4->chaintable; 68 HTYPE *hashtable = hc4->hashtable; 69#if LZ4_ARCH64 70 const BYTE * const base = hc4->base; 71#else 72 const int base = 0; 73#endif 74 75 while (hc4->nexttoupdate < ip) { 76 const u8 *p = hc4->nexttoupdate; 77 size_t delta = p - (hashtable[HASH_VALUE(p)] + base); 78 if (delta > MAX_DISTANCE) 79 delta = MAX_DISTANCE; 80 chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; 81 hashtable[HASH_VALUE(p)] = (p) - base; 82 hc4->nexttoupdate++; 83 } 84} 85 86static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, 87 const u8 *const matchlimit) 88{ 89 const u8 *p1t = p1; 90 91 while (p1t < matchlimit - (STEPSIZE - 1)) { 92#if LZ4_ARCH64 93 u64 diff = A64(p2) ^ A64(p1t); 94#else 95 u32 diff = A32(p2) ^ A32(p1t); 96#endif 97 if (!diff) { 98 p1t += STEPSIZE; 99 p2 += STEPSIZE; 100 continue; 101 } 102 p1t += LZ4_NBCOMMONBYTES(diff); 103 return p1t - p1; 104 } 105#if LZ4_ARCH64 106 if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { 107 p1t += 4; 108 p2 += 4; 109 } 110#endif 111 112 if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { 113 p1t += 2; 114 p2 += 2; 115 } 116 if ((p1t < matchlimit) && (*p2 == *p1t)) 117 p1t++; 118 return p1t - p1; 119} 120 121static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, 122 const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) 123{ 124 u16 *const chaintable = hc4->chaintable; 125 HTYPE *const hashtable = hc4->hashtable; 126 const u8 *ref; 127#if LZ4_ARCH64 128 const BYTE * const base = hc4->base; 129#else 130 const int base = 0; 131#endif 132 int nbattempts = MAX_NB_ATTEMPTS; 133 size_t repl = 0, ml = 0; 134 u16 delta; 135 136 /* HC4 match finder */ 137 lz4hc_insert(hc4, ip); 138 ref = hashtable[HASH_VALUE(ip)] + base; 139 140 /* potential repetition */ 141 if (ref >= ip-4) { 142 /* confirmed */ 143 if (A32(ref) == A32(ip)) { 144 delta = (u16)(ip-ref); 145 repl = ml = lz4hc_commonlength(ip + MINMATCH, 146 ref + MINMATCH, matchlimit) + MINMATCH; 147 *matchpos = ref; 148 } 149 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; 150 } 151 152 while ((ref >= ip - MAX_DISTANCE) && nbattempts) { 153 nbattempts--; 154 if (*(ref + ml) == *(ip + ml)) { 155 if (A32(ref) == A32(ip)) { 156 size_t mlt = 157 lz4hc_commonlength(ip + MINMATCH, 158 ref + MINMATCH, matchlimit) + MINMATCH; 159 if (mlt > ml) { 160 ml = mlt; 161 *matchpos = ref; 162 } 163 } 164 } 165 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; 166 } 167 168 /* Complete table */ 169 if (repl) { 170 const BYTE *ptr = ip; 171 const BYTE *end; 172 end = ip + repl - (MINMATCH-1); 173 /* Pre-Load */ 174 while (ptr < end - delta) { 175 chaintable[(size_t)(ptr) & MAXD_MASK] = delta; 176 ptr++; 177 } 178 do { 179 chaintable[(size_t)(ptr) & MAXD_MASK] = delta; 180 /* Head of chain */ 181 hashtable[HASH_VALUE(ptr)] = (ptr) - base; 182 ptr++; 183 } while (ptr < end); 184 hc4->nexttoupdate = end; 185 } 186 187 return (int)ml; 188} 189 190static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, 191 const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, 192 const u8 **matchpos, const u8 **startpos) 193{ 194 u16 *const chaintable = hc4->chaintable; 195 HTYPE *const hashtable = hc4->hashtable; 196#if LZ4_ARCH64 197 const BYTE * const base = hc4->base; 198#else 199 const int base = 0; 200#endif 201 const u8 *ref; 202 int nbattempts = MAX_NB_ATTEMPTS; 203 int delta = (int)(ip - startlimit); 204 205 /* First Match */ 206 lz4hc_insert(hc4, ip); 207 ref = hashtable[HASH_VALUE(ip)] + base; 208 209 while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) 210 && (nbattempts)) { 211 nbattempts--; 212 if (*(startlimit + longest) == *(ref - delta + longest)) { 213 if (A32(ref) == A32(ip)) { 214 const u8 *reft = ref + MINMATCH; 215 const u8 *ipt = ip + MINMATCH; 216 const u8 *startt = ip; 217 218 while (ipt < matchlimit-(STEPSIZE - 1)) { 219 #if LZ4_ARCH64 220 u64 diff = A64(reft) ^ A64(ipt); 221 #else 222 u32 diff = A32(reft) ^ A32(ipt); 223 #endif 224 225 if (!diff) { 226 ipt += STEPSIZE; 227 reft += STEPSIZE; 228 continue; 229 } 230 ipt += LZ4_NBCOMMONBYTES(diff); 231 goto _endcount; 232 } 233 #if LZ4_ARCH64 234 if ((ipt < (matchlimit - 3)) 235 && (A32(reft) == A32(ipt))) { 236 ipt += 4; 237 reft += 4; 238 } 239 ipt += 2; 240 #endif 241 if ((ipt < (matchlimit - 1)) 242 && (A16(reft) == A16(ipt))) { 243 reft += 2; 244 } 245 if ((ipt < matchlimit) && (*reft == *ipt)) 246 ipt++; 247_endcount: 248 reft = ref; 249 250 while ((startt > startlimit) 251 && (reft > hc4->base) 252 && (startt[-1] == reft[-1])) { 253 startt--; 254 reft--; 255 } 256 257 if ((ipt - startt) > longest) { 258 longest = (int)(ipt - startt); 259 *matchpos = reft; 260 *startpos = startt; 261 } 262 } 263 } 264 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; 265 } 266 return longest; 267} 268 269static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, 270 int ml, const u8 *ref) 271{ 272 int length, len; 273 u8 *token; 274 275 /* Encode Literal length */ 276 length = (int)(*ip - *anchor); 277 token = (*op)++; 278 if (length >= (int)RUN_MASK) { 279 *token = (RUN_MASK << ML_BITS); 280 len = length - RUN_MASK; 281 for (; len > 254 ; len -= 255) 282 *(*op)++ = 255; 283 *(*op)++ = (u8)len; 284 } else 285 *token = (length << ML_BITS); 286 287 /* Copy Literals */ 288 LZ4_BLINDCOPY(*anchor, *op, length); 289 290 /* Encode Offset */ 291 LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); 292 293 /* Encode MatchLength */ 294 len = (int)(ml - MINMATCH); 295 if (len >= (int)ML_MASK) { 296 *token += ML_MASK; 297 len -= ML_MASK; 298 for (; len > 509 ; len -= 510) { 299 *(*op)++ = 255; 300 *(*op)++ = 255; 301 } 302 if (len > 254) { 303 len -= 255; 304 *(*op)++ = 255; 305 } 306 *(*op)++ = (u8)len; 307 } else 308 *token += len; 309 310 /* Prepare next loop */ 311 *ip += ml; 312 *anchor = *ip; 313 314 return 0; 315} 316 317static int lz4_compresshcctx(struct lz4hc_data *ctx, 318 const char *source, 319 char *dest, 320 int isize) 321{ 322 const u8 *ip = (const u8 *)source; 323 const u8 *anchor = ip; 324 const u8 *const iend = ip + isize; 325 const u8 *const mflimit = iend - MFLIMIT; 326 const u8 *const matchlimit = (iend - LASTLITERALS); 327 328 u8 *op = (u8 *)dest; 329 330 int ml, ml2, ml3, ml0; 331 const u8 *ref = NULL; 332 const u8 *start2 = NULL; 333 const u8 *ref2 = NULL; 334 const u8 *start3 = NULL; 335 const u8 *ref3 = NULL; 336 const u8 *start0; 337 const u8 *ref0; 338 int lastrun; 339 340 ip++; 341 342 /* Main Loop */ 343 while (ip < mflimit) { 344 ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); 345 if (!ml) { 346 ip++; 347 continue; 348 } 349 350 /* saved, in case we would skip too much */ 351 start0 = ip; 352 ref0 = ref; 353 ml0 = ml; 354_search2: 355 if (ip+ml < mflimit) 356 ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, 357 ip + 1, matchlimit, ml, &ref2, &start2); 358 else 359 ml2 = ml; 360 /* No better match */ 361 if (ml2 == ml) { 362 lz4_encodesequence(&ip, &op, &anchor, ml, ref); 363 continue; 364 } 365 366 if (start0 < ip) { 367 /* empirical */ 368 if (start2 < ip + ml0) { 369 ip = start0; 370 ref = ref0; 371 ml = ml0; 372 } 373 } 374 /* 375 * Here, start0==ip 376 * First Match too small : removed 377 */ 378 if ((start2 - ip) < 3) { 379 ml = ml2; 380 ip = start2; 381 ref = ref2; 382 goto _search2; 383 } 384 385_search3: 386 /* 387 * Currently we have : 388 * ml2 > ml1, and 389 * ip1+3 <= ip2 (usually < ip1+ml1) 390 */ 391 if ((start2 - ip) < OPTIMAL_ML) { 392 int correction; 393 int new_ml = ml; 394 if (new_ml > OPTIMAL_ML) 395 new_ml = OPTIMAL_ML; 396 if (ip + new_ml > start2 + ml2 - MINMATCH) 397 new_ml = (int)(start2 - ip) + ml2 - MINMATCH; 398 correction = new_ml - (int)(start2 - ip); 399 if (correction > 0) { 400 start2 += correction; 401 ref2 += correction; 402 ml2 -= correction; 403 } 404 } 405 /* 406 * Now, we have start2 = ip+new_ml, 407 * with new_ml=min(ml, OPTIMAL_ML=18) 408 */ 409 if (start2 + ml2 < mflimit) 410 ml3 = lz4hc_insertandgetwidermatch(ctx, 411 start2 + ml2 - 3, start2, matchlimit, 412 ml2, &ref3, &start3); 413 else 414 ml3 = ml2; 415 416 /* No better match : 2 sequences to encode */ 417 if (ml3 == ml2) { 418 /* ip & ref are known; Now for ml */ 419 if (start2 < ip+ml) 420 ml = (int)(start2 - ip); 421 422 /* Now, encode 2 sequences */ 423 lz4_encodesequence(&ip, &op, &anchor, ml, ref); 424 ip = start2; 425 lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); 426 continue; 427 } 428 429 /* Not enough space for match 2 : remove it */ 430 if (start3 < ip + ml + 3) { 431 /* 432 * can write Seq1 immediately ==> Seq2 is removed, 433 * so Seq3 becomes Seq1 434 */ 435 if (start3 >= (ip + ml)) { 436 if (start2 < ip + ml) { 437 int correction = 438 (int)(ip + ml - start2); 439 start2 += correction; 440 ref2 += correction; 441 ml2 -= correction; 442 if (ml2 < MINMATCH) { 443 start2 = start3; 444 ref2 = ref3; 445 ml2 = ml3; 446 } 447 } 448 449 lz4_encodesequence(&ip, &op, &anchor, ml, ref); 450 ip = start3; 451 ref = ref3; 452 ml = ml3; 453 454 start0 = start2; 455 ref0 = ref2; 456 ml0 = ml2; 457 goto _search2; 458 } 459 460 start2 = start3; 461 ref2 = ref3; 462 ml2 = ml3; 463 goto _search3; 464 } 465 466 /* 467 * OK, now we have 3 ascending matches; let's write at least 468 * the first one ip & ref are known; Now for ml 469 */ 470 if (start2 < ip + ml) { 471 if ((start2 - ip) < (int)ML_MASK) { 472 int correction; 473 if (ml > OPTIMAL_ML) 474 ml = OPTIMAL_ML; 475 if (ip + ml > start2 + ml2 - MINMATCH) 476 ml = (int)(start2 - ip) + ml2 477 - MINMATCH; 478 correction = ml - (int)(start2 - ip); 479 if (correction > 0) { 480 start2 += correction; 481 ref2 += correction; 482 ml2 -= correction; 483 } 484 } else 485 ml = (int)(start2 - ip); 486 } 487 lz4_encodesequence(&ip, &op, &anchor, ml, ref); 488 489 ip = start2; 490 ref = ref2; 491 ml = ml2; 492 493 start2 = start3; 494 ref2 = ref3; 495 ml2 = ml3; 496 497 goto _search3; 498 } 499 500 /* Encode Last Literals */ 501 lastrun = (int)(iend - anchor); 502 if (lastrun >= (int)RUN_MASK) { 503 *op++ = (RUN_MASK << ML_BITS); 504 lastrun -= RUN_MASK; 505 for (; lastrun > 254 ; lastrun -= 255) 506 *op++ = 255; 507 *op++ = (u8) lastrun; 508 } else 509 *op++ = (lastrun << ML_BITS); 510 memcpy(op, anchor, iend - anchor); 511 op += iend - anchor; 512 /* End */ 513 return (int) (((char *)op) - dest); 514} 515 516int lz4hc_compress(const unsigned char *src, size_t src_len, 517 unsigned char *dst, size_t *dst_len, void *wrkmem) 518{ 519 int ret = -1; 520 int out_len = 0; 521 522 struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; 523 lz4hc_init(hc4, (const u8 *)src); 524 out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, 525 (char *)dst, (int)src_len); 526 527 if (out_len < 0) 528 goto exit; 529 530 *dst_len = out_len; 531 return 0; 532 533exit: 534 return ret; 535} 536EXPORT_SYMBOL(lz4hc_compress); 537 538MODULE_LICENSE("Dual BSD/GPL"); 539MODULE_DESCRIPTION("LZ4HC compressor"); 540