root/drivers/media/platform/vicodec/codec-fwht.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rlc
  2. derlc
  3. quantize_intra
  4. dequantize_intra
  5. quantize_inter
  6. dequantize_inter
  7. fwht
  8. fwht16
  9. ifwht
  10. fill_encoder_block
  11. var_intra
  12. var_inter
  13. decide_blocktype
  14. fill_decoder_block
  15. add_deltas
  16. encode_plane
  17. fwht_encode_frame
  18. decode_plane
  19. fwht_decode_frame

   1 // SPDX-License-Identifier: LGPL-2.1+
   2 /*
   3  * Copyright 2016 Tom aan de Wiel
   4  * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
   5  *
   6  * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
   7  *
   8  * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
   9  * R.D. Brown, 1977
  10  */
  11 
  12 #include <linux/string.h>
  13 #include <linux/kernel.h>
  14 #include "codec-fwht.h"
  15 
  16 #define OVERFLOW_BIT BIT(14)
  17 
  18 /*
  19  * Note: bit 0 of the header must always be 0. Otherwise it cannot
  20  * be guaranteed that the magic 8 byte sequence (see below) can
  21  * never occur in the rlc output.
  22  */
  23 #define PFRAME_BIT BIT(15)
  24 #define DUPS_MASK 0x1ffe
  25 
  26 #define PBLOCK 0
  27 #define IBLOCK 1
  28 
  29 #define ALL_ZEROS 15
  30 
  31 static const uint8_t zigzag[64] = {
  32         0,
  33         1,  8,
  34         2,  9, 16,
  35         3, 10, 17, 24,
  36         4, 11, 18, 25, 32,
  37         5, 12, 19, 26, 33, 40,
  38         6, 13, 20, 27, 34, 41, 48,
  39         7, 14, 21, 28, 35, 42, 49, 56,
  40         15, 22, 29, 36, 43, 50, 57,
  41         23, 30, 37, 44, 51, 58,
  42         31, 38, 45, 52, 59,
  43         39, 46, 53, 60,
  44         47, 54, 61,
  45         55, 62,
  46         63,
  47 };
  48 
  49 /*
  50  * noinline_for_stack to work around
  51  * https://bugs.llvm.org/show_bug.cgi?id=38809
  52  */
  53 static int noinline_for_stack
  54 rlc(const s16 *in, __be16 *output, int blocktype)
  55 {
  56         s16 block[8 * 8];
  57         s16 *wp = block;
  58         int i = 0;
  59         int x, y;
  60         int ret = 0;
  61 
  62         /* read in block from framebuffer */
  63         int lastzero_run = 0;
  64         int to_encode;
  65 
  66         for (y = 0; y < 8; y++) {
  67                 for (x = 0; x < 8; x++) {
  68                         *wp = in[x + y * 8];
  69                         wp++;
  70                 }
  71         }
  72 
  73         /* keep track of amount of trailing zeros */
  74         for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
  75                 lastzero_run++;
  76 
  77         *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
  78         ret++;
  79 
  80         to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
  81 
  82         i = 0;
  83         while (i < to_encode) {
  84                 int cnt = 0;
  85                 int tmp;
  86 
  87                 /* count leading zeros */
  88                 while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
  89                         cnt++;
  90                         i++;
  91                         if (i == to_encode) {
  92                                 cnt--;
  93                                 break;
  94                         }
  95                 }
  96                 /* 4 bits for run, 12 for coefficient (quantization by 4) */
  97                 *output++ = htons((cnt | tmp << 4));
  98                 i++;
  99                 ret++;
 100         }
 101         if (lastzero_run > 14) {
 102                 *output = htons(ALL_ZEROS | 0);
 103                 ret++;
 104         }
 105 
 106         return ret;
 107 }
 108 
 109 /*
 110  * This function will worst-case increase rlc_in by 65*2 bytes:
 111  * one s16 value for the header and 8 * 8 coefficients of type s16.
 112  */
 113 static noinline_for_stack u16
 114 derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input)
 115 {
 116         /* header */
 117         const __be16 *input = *rlc_in;
 118         u16 stat;
 119         int dec_count = 0;
 120         s16 block[8 * 8 + 16];
 121         s16 *wp = block;
 122         int i;
 123 
 124         if (input > end_of_input)
 125                 return OVERFLOW_BIT;
 126         stat = ntohs(*input++);
 127 
 128         /*
 129          * Now de-compress, it expands one byte to up to 15 bytes
 130          * (or fills the remainder of the 64 bytes with zeroes if it
 131          * is the last byte to expand).
 132          *
 133          * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
 134          * allow for overflow if the incoming data was malformed.
 135          */
 136         while (dec_count < 8 * 8) {
 137                 s16 in;
 138                 int length;
 139                 int coeff;
 140 
 141                 if (input > end_of_input)
 142                         return OVERFLOW_BIT;
 143                 in = ntohs(*input++);
 144                 length = in & 0xf;
 145                 coeff = in >> 4;
 146 
 147                 /* fill remainder with zeros */
 148                 if (length == 15) {
 149                         for (i = 0; i < 64 - dec_count; i++)
 150                                 *wp++ = 0;
 151                         break;
 152                 }
 153 
 154                 for (i = 0; i < length; i++)
 155                         *wp++ = 0;
 156                 *wp++ = coeff;
 157                 dec_count += length + 1;
 158         }
 159 
 160         wp = block;
 161 
 162         for (i = 0; i < 64; i++) {
 163                 int pos = zigzag[i];
 164                 int y = pos / 8;
 165                 int x = pos % 8;
 166 
 167                 dwht_out[x + y * 8] = *wp++;
 168         }
 169         *rlc_in = input;
 170         return stat;
 171 }
 172 
 173 static const int quant_table[] = {
 174         2, 2, 2, 2, 2, 2,  2,  2,
 175         2, 2, 2, 2, 2, 2,  2,  2,
 176         2, 2, 2, 2, 2, 2,  2,  3,
 177         2, 2, 2, 2, 2, 2,  3,  6,
 178         2, 2, 2, 2, 2, 3,  6,  6,
 179         2, 2, 2, 2, 3, 6,  6,  6,
 180         2, 2, 2, 3, 6, 6,  6,  6,
 181         2, 2, 3, 6, 6, 6,  6,  8,
 182 };
 183 
 184 static const int quant_table_p[] = {
 185         3, 3, 3, 3, 3, 3,  3,  3,
 186         3, 3, 3, 3, 3, 3,  3,  3,
 187         3, 3, 3, 3, 3, 3,  3,  3,
 188         3, 3, 3, 3, 3, 3,  3,  6,
 189         3, 3, 3, 3, 3, 3,  6,  6,
 190         3, 3, 3, 3, 3, 6,  6,  9,
 191         3, 3, 3, 3, 6, 6,  9,  9,
 192         3, 3, 3, 6, 6, 9,  9,  10,
 193 };
 194 
 195 static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
 196 {
 197         const int *quant = quant_table;
 198         int i, j;
 199 
 200         for (j = 0; j < 8; j++) {
 201                 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
 202                         *coeff >>= *quant;
 203                         if (*coeff >= -qp && *coeff <= qp)
 204                                 *coeff = *de_coeff = 0;
 205                         else
 206                                 *de_coeff = *coeff << *quant;
 207                 }
 208         }
 209 }
 210 
 211 static void dequantize_intra(s16 *coeff)
 212 {
 213         const int *quant = quant_table;
 214         int i, j;
 215 
 216         for (j = 0; j < 8; j++)
 217                 for (i = 0; i < 8; i++, quant++, coeff++)
 218                         *coeff <<= *quant;
 219 }
 220 
 221 static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
 222 {
 223         const int *quant = quant_table_p;
 224         int i, j;
 225 
 226         for (j = 0; j < 8; j++) {
 227                 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
 228                         *coeff >>= *quant;
 229                         if (*coeff >= -qp && *coeff <= qp)
 230                                 *coeff = *de_coeff = 0;
 231                         else
 232                                 *de_coeff = *coeff << *quant;
 233                 }
 234         }
 235 }
 236 
 237 static void dequantize_inter(s16 *coeff)
 238 {
 239         const int *quant = quant_table_p;
 240         int i, j;
 241 
 242         for (j = 0; j < 8; j++)
 243                 for (i = 0; i < 8; i++, quant++, coeff++)
 244                         *coeff <<= *quant;
 245 }
 246 
 247 static void noinline_for_stack fwht(const u8 *block, s16 *output_block,
 248                                     unsigned int stride,
 249                                     unsigned int input_step, bool intra)
 250 {
 251         /* we'll need more than 8 bits for the transformed coefficients */
 252         s32 workspace1[8], workspace2[8];
 253         const u8 *tmp = block;
 254         s16 *out = output_block;
 255         int add = intra ? 256 : 0;
 256         unsigned int i;
 257 
 258         /* stage 1 */
 259         for (i = 0; i < 8; i++, tmp += stride, out += 8) {
 260                 switch (input_step) {
 261                 case 1:
 262                         workspace1[0]  = tmp[0] + tmp[1] - add;
 263                         workspace1[1]  = tmp[0] - tmp[1];
 264 
 265                         workspace1[2]  = tmp[2] + tmp[3] - add;
 266                         workspace1[3]  = tmp[2] - tmp[3];
 267 
 268                         workspace1[4]  = tmp[4] + tmp[5] - add;
 269                         workspace1[5]  = tmp[4] - tmp[5];
 270 
 271                         workspace1[6]  = tmp[6] + tmp[7] - add;
 272                         workspace1[7]  = tmp[6] - tmp[7];
 273                         break;
 274                 case 2:
 275                         workspace1[0]  = tmp[0] + tmp[2] - add;
 276                         workspace1[1]  = tmp[0] - tmp[2];
 277 
 278                         workspace1[2]  = tmp[4] + tmp[6] - add;
 279                         workspace1[3]  = tmp[4] - tmp[6];
 280 
 281                         workspace1[4]  = tmp[8] + tmp[10] - add;
 282                         workspace1[5]  = tmp[8] - tmp[10];
 283 
 284                         workspace1[6]  = tmp[12] + tmp[14] - add;
 285                         workspace1[7]  = tmp[12] - tmp[14];
 286                         break;
 287                 case 3:
 288                         workspace1[0]  = tmp[0] + tmp[3] - add;
 289                         workspace1[1]  = tmp[0] - tmp[3];
 290 
 291                         workspace1[2]  = tmp[6] + tmp[9] - add;
 292                         workspace1[3]  = tmp[6] - tmp[9];
 293 
 294                         workspace1[4]  = tmp[12] + tmp[15] - add;
 295                         workspace1[5]  = tmp[12] - tmp[15];
 296 
 297                         workspace1[6]  = tmp[18] + tmp[21] - add;
 298                         workspace1[7]  = tmp[18] - tmp[21];
 299                         break;
 300                 default:
 301                         workspace1[0]  = tmp[0] + tmp[4] - add;
 302                         workspace1[1]  = tmp[0] - tmp[4];
 303 
 304                         workspace1[2]  = tmp[8] + tmp[12] - add;
 305                         workspace1[3]  = tmp[8] - tmp[12];
 306 
 307                         workspace1[4]  = tmp[16] + tmp[20] - add;
 308                         workspace1[5]  = tmp[16] - tmp[20];
 309 
 310                         workspace1[6]  = tmp[24] + tmp[28] - add;
 311                         workspace1[7]  = tmp[24] - tmp[28];
 312                         break;
 313                 }
 314 
 315                 /* stage 2 */
 316                 workspace2[0] = workspace1[0] + workspace1[2];
 317                 workspace2[1] = workspace1[0] - workspace1[2];
 318                 workspace2[2] = workspace1[1] - workspace1[3];
 319                 workspace2[3] = workspace1[1] + workspace1[3];
 320 
 321                 workspace2[4] = workspace1[4] + workspace1[6];
 322                 workspace2[5] = workspace1[4] - workspace1[6];
 323                 workspace2[6] = workspace1[5] - workspace1[7];
 324                 workspace2[7] = workspace1[5] + workspace1[7];
 325 
 326                 /* stage 3 */
 327                 out[0] = workspace2[0] + workspace2[4];
 328                 out[1] = workspace2[0] - workspace2[4];
 329                 out[2] = workspace2[1] - workspace2[5];
 330                 out[3] = workspace2[1] + workspace2[5];
 331                 out[4] = workspace2[2] + workspace2[6];
 332                 out[5] = workspace2[2] - workspace2[6];
 333                 out[6] = workspace2[3] - workspace2[7];
 334                 out[7] = workspace2[3] + workspace2[7];
 335         }
 336 
 337         out = output_block;
 338 
 339         for (i = 0; i < 8; i++, out++) {
 340                 /* stage 1 */
 341                 workspace1[0]  = out[0] + out[1 * 8];
 342                 workspace1[1]  = out[0] - out[1 * 8];
 343 
 344                 workspace1[2]  = out[2 * 8] + out[3 * 8];
 345                 workspace1[3]  = out[2 * 8] - out[3 * 8];
 346 
 347                 workspace1[4]  = out[4 * 8] + out[5 * 8];
 348                 workspace1[5]  = out[4 * 8] - out[5 * 8];
 349 
 350                 workspace1[6]  = out[6 * 8] + out[7 * 8];
 351                 workspace1[7]  = out[6 * 8] - out[7 * 8];
 352 
 353                 /* stage 2 */
 354                 workspace2[0] = workspace1[0] + workspace1[2];
 355                 workspace2[1] = workspace1[0] - workspace1[2];
 356                 workspace2[2] = workspace1[1] - workspace1[3];
 357                 workspace2[3] = workspace1[1] + workspace1[3];
 358 
 359                 workspace2[4] = workspace1[4] + workspace1[6];
 360                 workspace2[5] = workspace1[4] - workspace1[6];
 361                 workspace2[6] = workspace1[5] - workspace1[7];
 362                 workspace2[7] = workspace1[5] + workspace1[7];
 363                 /* stage 3 */
 364                 out[0 * 8] = workspace2[0] + workspace2[4];
 365                 out[1 * 8] = workspace2[0] - workspace2[4];
 366                 out[2 * 8] = workspace2[1] - workspace2[5];
 367                 out[3 * 8] = workspace2[1] + workspace2[5];
 368                 out[4 * 8] = workspace2[2] + workspace2[6];
 369                 out[5 * 8] = workspace2[2] - workspace2[6];
 370                 out[6 * 8] = workspace2[3] - workspace2[7];
 371                 out[7 * 8] = workspace2[3] + workspace2[7];
 372         }
 373 }
 374 
 375 /*
 376  * Not the nicest way of doing it, but P-blocks get twice the range of
 377  * that of the I-blocks. Therefore we need a type bigger than 8 bits.
 378  * Furthermore values can be negative... This is just a version that
 379  * works with 16 signed data
 380  */
 381 static void noinline_for_stack
 382 fwht16(const s16 *block, s16 *output_block, int stride, int intra)
 383 {
 384         /* we'll need more than 8 bits for the transformed coefficients */
 385         s32 workspace1[8], workspace2[8];
 386         const s16 *tmp = block;
 387         s16 *out = output_block;
 388         int i;
 389 
 390         for (i = 0; i < 8; i++, tmp += stride, out += 8) {
 391                 /* stage 1 */
 392                 workspace1[0]  = tmp[0] + tmp[1];
 393                 workspace1[1]  = tmp[0] - tmp[1];
 394 
 395                 workspace1[2]  = tmp[2] + tmp[3];
 396                 workspace1[3]  = tmp[2] - tmp[3];
 397 
 398                 workspace1[4]  = tmp[4] + tmp[5];
 399                 workspace1[5]  = tmp[4] - tmp[5];
 400 
 401                 workspace1[6]  = tmp[6] + tmp[7];
 402                 workspace1[7]  = tmp[6] - tmp[7];
 403 
 404                 /* stage 2 */
 405                 workspace2[0] = workspace1[0] + workspace1[2];
 406                 workspace2[1] = workspace1[0] - workspace1[2];
 407                 workspace2[2] = workspace1[1] - workspace1[3];
 408                 workspace2[3] = workspace1[1] + workspace1[3];
 409 
 410                 workspace2[4] = workspace1[4] + workspace1[6];
 411                 workspace2[5] = workspace1[4] - workspace1[6];
 412                 workspace2[6] = workspace1[5] - workspace1[7];
 413                 workspace2[7] = workspace1[5] + workspace1[7];
 414 
 415                 /* stage 3 */
 416                 out[0] = workspace2[0] + workspace2[4];
 417                 out[1] = workspace2[0] - workspace2[4];
 418                 out[2] = workspace2[1] - workspace2[5];
 419                 out[3] = workspace2[1] + workspace2[5];
 420                 out[4] = workspace2[2] + workspace2[6];
 421                 out[5] = workspace2[2] - workspace2[6];
 422                 out[6] = workspace2[3] - workspace2[7];
 423                 out[7] = workspace2[3] + workspace2[7];
 424         }
 425 
 426         out = output_block;
 427 
 428         for (i = 0; i < 8; i++, out++) {
 429                 /* stage 1 */
 430                 workspace1[0]  = out[0] + out[1*8];
 431                 workspace1[1]  = out[0] - out[1*8];
 432 
 433                 workspace1[2]  = out[2*8] + out[3*8];
 434                 workspace1[3]  = out[2*8] - out[3*8];
 435 
 436                 workspace1[4]  = out[4*8] + out[5*8];
 437                 workspace1[5]  = out[4*8] - out[5*8];
 438 
 439                 workspace1[6]  = out[6*8] + out[7*8];
 440                 workspace1[7]  = out[6*8] - out[7*8];
 441 
 442                 /* stage 2 */
 443                 workspace2[0] = workspace1[0] + workspace1[2];
 444                 workspace2[1] = workspace1[0] - workspace1[2];
 445                 workspace2[2] = workspace1[1] - workspace1[3];
 446                 workspace2[3] = workspace1[1] + workspace1[3];
 447 
 448                 workspace2[4] = workspace1[4] + workspace1[6];
 449                 workspace2[5] = workspace1[4] - workspace1[6];
 450                 workspace2[6] = workspace1[5] - workspace1[7];
 451                 workspace2[7] = workspace1[5] + workspace1[7];
 452 
 453                 /* stage 3 */
 454                 out[0*8] = workspace2[0] + workspace2[4];
 455                 out[1*8] = workspace2[0] - workspace2[4];
 456                 out[2*8] = workspace2[1] - workspace2[5];
 457                 out[3*8] = workspace2[1] + workspace2[5];
 458                 out[4*8] = workspace2[2] + workspace2[6];
 459                 out[5*8] = workspace2[2] - workspace2[6];
 460                 out[6*8] = workspace2[3] - workspace2[7];
 461                 out[7*8] = workspace2[3] + workspace2[7];
 462         }
 463 }
 464 
 465 static noinline_for_stack void
 466 ifwht(const s16 *block, s16 *output_block, int intra)
 467 {
 468         /*
 469          * we'll need more than 8 bits for the transformed coefficients
 470          * use native unit of cpu
 471          */
 472         int workspace1[8], workspace2[8];
 473         int inter = intra ? 0 : 1;
 474         const s16 *tmp = block;
 475         s16 *out = output_block;
 476         int i;
 477 
 478         for (i = 0; i < 8; i++, tmp += 8, out += 8) {
 479                 /* stage 1 */
 480                 workspace1[0]  = tmp[0] + tmp[1];
 481                 workspace1[1]  = tmp[0] - tmp[1];
 482 
 483                 workspace1[2]  = tmp[2] + tmp[3];
 484                 workspace1[3]  = tmp[2] - tmp[3];
 485 
 486                 workspace1[4]  = tmp[4] + tmp[5];
 487                 workspace1[5]  = tmp[4] - tmp[5];
 488 
 489                 workspace1[6]  = tmp[6] + tmp[7];
 490                 workspace1[7]  = tmp[6] - tmp[7];
 491 
 492                 /* stage 2 */
 493                 workspace2[0] = workspace1[0] + workspace1[2];
 494                 workspace2[1] = workspace1[0] - workspace1[2];
 495                 workspace2[2] = workspace1[1] - workspace1[3];
 496                 workspace2[3] = workspace1[1] + workspace1[3];
 497 
 498                 workspace2[4] = workspace1[4] + workspace1[6];
 499                 workspace2[5] = workspace1[4] - workspace1[6];
 500                 workspace2[6] = workspace1[5] - workspace1[7];
 501                 workspace2[7] = workspace1[5] + workspace1[7];
 502 
 503                 /* stage 3 */
 504                 out[0] = workspace2[0] + workspace2[4];
 505                 out[1] = workspace2[0] - workspace2[4];
 506                 out[2] = workspace2[1] - workspace2[5];
 507                 out[3] = workspace2[1] + workspace2[5];
 508                 out[4] = workspace2[2] + workspace2[6];
 509                 out[5] = workspace2[2] - workspace2[6];
 510                 out[6] = workspace2[3] - workspace2[7];
 511                 out[7] = workspace2[3] + workspace2[7];
 512         }
 513 
 514         out = output_block;
 515 
 516         for (i = 0; i < 8; i++, out++) {
 517                 /* stage 1 */
 518                 workspace1[0]  = out[0] + out[1 * 8];
 519                 workspace1[1]  = out[0] - out[1 * 8];
 520 
 521                 workspace1[2]  = out[2 * 8] + out[3 * 8];
 522                 workspace1[3]  = out[2 * 8] - out[3 * 8];
 523 
 524                 workspace1[4]  = out[4 * 8] + out[5 * 8];
 525                 workspace1[5]  = out[4 * 8] - out[5 * 8];
 526 
 527                 workspace1[6]  = out[6 * 8] + out[7 * 8];
 528                 workspace1[7]  = out[6 * 8] - out[7 * 8];
 529 
 530                 /* stage 2 */
 531                 workspace2[0] = workspace1[0] + workspace1[2];
 532                 workspace2[1] = workspace1[0] - workspace1[2];
 533                 workspace2[2] = workspace1[1] - workspace1[3];
 534                 workspace2[3] = workspace1[1] + workspace1[3];
 535 
 536                 workspace2[4] = workspace1[4] + workspace1[6];
 537                 workspace2[5] = workspace1[4] - workspace1[6];
 538                 workspace2[6] = workspace1[5] - workspace1[7];
 539                 workspace2[7] = workspace1[5] + workspace1[7];
 540 
 541                 /* stage 3 */
 542                 if (inter) {
 543                         int d;
 544 
 545                         out[0 * 8] = workspace2[0] + workspace2[4];
 546                         out[1 * 8] = workspace2[0] - workspace2[4];
 547                         out[2 * 8] = workspace2[1] - workspace2[5];
 548                         out[3 * 8] = workspace2[1] + workspace2[5];
 549                         out[4 * 8] = workspace2[2] + workspace2[6];
 550                         out[5 * 8] = workspace2[2] - workspace2[6];
 551                         out[6 * 8] = workspace2[3] - workspace2[7];
 552                         out[7 * 8] = workspace2[3] + workspace2[7];
 553 
 554                         for (d = 0; d < 8; d++)
 555                                 out[8 * d] >>= 6;
 556                 } else {
 557                         int d;
 558 
 559                         out[0 * 8] = workspace2[0] + workspace2[4];
 560                         out[1 * 8] = workspace2[0] - workspace2[4];
 561                         out[2 * 8] = workspace2[1] - workspace2[5];
 562                         out[3 * 8] = workspace2[1] + workspace2[5];
 563                         out[4 * 8] = workspace2[2] + workspace2[6];
 564                         out[5 * 8] = workspace2[2] - workspace2[6];
 565                         out[6 * 8] = workspace2[3] - workspace2[7];
 566                         out[7 * 8] = workspace2[3] + workspace2[7];
 567 
 568                         for (d = 0; d < 8; d++) {
 569                                 out[8 * d] >>= 6;
 570                                 out[8 * d] += 128;
 571                         }
 572                 }
 573         }
 574 }
 575 
 576 static void fill_encoder_block(const u8 *input, s16 *dst,
 577                                unsigned int stride, unsigned int input_step)
 578 {
 579         int i, j;
 580 
 581         for (i = 0; i < 8; i++) {
 582                 for (j = 0; j < 8; j++, input += input_step)
 583                         *dst++ = *input;
 584                 input += stride - 8 * input_step;
 585         }
 586 }
 587 
 588 static int var_intra(const s16 *input)
 589 {
 590         int32_t mean = 0;
 591         int32_t ret = 0;
 592         const s16 *tmp = input;
 593         int i;
 594 
 595         for (i = 0; i < 8 * 8; i++, tmp++)
 596                 mean += *tmp;
 597         mean /= 64;
 598         tmp = input;
 599         for (i = 0; i < 8 * 8; i++, tmp++)
 600                 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
 601         return ret;
 602 }
 603 
 604 static int var_inter(const s16 *old, const s16 *new)
 605 {
 606         int32_t ret = 0;
 607         int i;
 608 
 609         for (i = 0; i < 8 * 8; i++, old++, new++)
 610                 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
 611         return ret;
 612 }
 613 
 614 static noinline_for_stack int
 615 decide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock,
 616                  unsigned int stride, unsigned int input_step)
 617 {
 618         s16 tmp[64];
 619         s16 old[64];
 620         s16 *work = tmp;
 621         unsigned int k, l;
 622         int vari;
 623         int vard;
 624 
 625         fill_encoder_block(cur, tmp, stride, input_step);
 626         fill_encoder_block(reference, old, 8, 1);
 627         vari = var_intra(tmp);
 628 
 629         for (k = 0; k < 8; k++) {
 630                 for (l = 0; l < 8; l++) {
 631                         *deltablock = *work - *reference;
 632                         deltablock++;
 633                         work++;
 634                         reference++;
 635                 }
 636         }
 637         deltablock -= 64;
 638         vard = var_inter(old, tmp);
 639         return vari <= vard ? IBLOCK : PBLOCK;
 640 }
 641 
 642 static void fill_decoder_block(u8 *dst, const s16 *input, int stride,
 643                                unsigned int dst_step)
 644 {
 645         int i, j;
 646 
 647         for (i = 0; i < 8; i++) {
 648                 for (j = 0; j < 8; j++, input++, dst += dst_step) {
 649                         if (*input < 0)
 650                                 *dst = 0;
 651                         else if (*input > 255)
 652                                 *dst = 255;
 653                         else
 654                                 *dst = *input;
 655                 }
 656                 dst += stride - (8 * dst_step);
 657         }
 658 }
 659 
 660 static void add_deltas(s16 *deltas, const u8 *ref, int stride,
 661                        unsigned int ref_step)
 662 {
 663         int k, l;
 664 
 665         for (k = 0; k < 8; k++) {
 666                 for (l = 0; l < 8; l++) {
 667                         *deltas += *ref;
 668                         ref += ref_step;
 669                         /*
 670                          * Due to quantizing, it might possible that the
 671                          * decoded coefficients are slightly out of range
 672                          */
 673                         if (*deltas < 0)
 674                                 *deltas = 0;
 675                         else if (*deltas > 255)
 676                                 *deltas = 255;
 677                         deltas++;
 678                 }
 679                 ref += stride - (8 * ref_step);
 680         }
 681 }
 682 
 683 static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
 684                         struct fwht_cframe *cf, u32 height, u32 width,
 685                         u32 stride, unsigned int input_step,
 686                         bool is_intra, bool next_is_intra)
 687 {
 688         u8 *input_start = input;
 689         __be16 *rlco_start = *rlco;
 690         s16 deltablock[64];
 691         __be16 pframe_bit = htons(PFRAME_BIT);
 692         u32 encoding = 0;
 693         unsigned int last_size = 0;
 694         unsigned int i, j;
 695 
 696         width = round_up(width, 8);
 697         height = round_up(height, 8);
 698 
 699         for (j = 0; j < height / 8; j++) {
 700                 input = input_start + j * 8 * stride;
 701                 for (i = 0; i < width / 8; i++) {
 702                         /* intra code, first frame is always intra coded. */
 703                         int blocktype = IBLOCK;
 704                         unsigned int size;
 705 
 706                         if (!is_intra)
 707                                 blocktype = decide_blocktype(input, refp,
 708                                         deltablock, stride, input_step);
 709                         if (blocktype == IBLOCK) {
 710                                 fwht(input, cf->coeffs, stride, input_step, 1);
 711                                 quantize_intra(cf->coeffs, cf->de_coeffs,
 712                                                cf->i_frame_qp);
 713                         } else {
 714                                 /* inter code */
 715                                 encoding |= FWHT_FRAME_PCODED;
 716                                 fwht16(deltablock, cf->coeffs, 8, 0);
 717                                 quantize_inter(cf->coeffs, cf->de_coeffs,
 718                                                cf->p_frame_qp);
 719                         }
 720                         if (!next_is_intra) {
 721                                 ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
 722 
 723                                 if (blocktype == PBLOCK)
 724                                         add_deltas(cf->de_fwht, refp, 8, 1);
 725                                 fill_decoder_block(refp, cf->de_fwht, 8, 1);
 726                         }
 727 
 728                         input += 8 * input_step;
 729                         refp += 8 * 8;
 730 
 731                         size = rlc(cf->coeffs, *rlco, blocktype);
 732                         if (last_size == size &&
 733                             !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
 734                                 __be16 *last_rlco = *rlco - size;
 735                                 s16 hdr = ntohs(*last_rlco);
 736 
 737                                 if (!((*last_rlco ^ **rlco) & pframe_bit) &&
 738                                     (hdr & DUPS_MASK) < DUPS_MASK)
 739                                         *last_rlco = htons(hdr + 2);
 740                                 else
 741                                         *rlco += size;
 742                         } else {
 743                                 *rlco += size;
 744                         }
 745                         if (*rlco >= rlco_max) {
 746                                 encoding |= FWHT_FRAME_UNENCODED;
 747                                 goto exit_loop;
 748                         }
 749                         last_size = size;
 750                 }
 751         }
 752 
 753 exit_loop:
 754         if (encoding & FWHT_FRAME_UNENCODED) {
 755                 u8 *out = (u8 *)rlco_start;
 756                 u8 *p;
 757 
 758                 input = input_start;
 759                 /*
 760                  * The compressed stream should never contain the magic
 761                  * header, so when we copy the YUV data we replace 0xff
 762                  * by 0xfe. Since YUV is limited range such values
 763                  * shouldn't appear anyway.
 764                  */
 765                 for (j = 0; j < height; j++) {
 766                         for (i = 0, p = input; i < width; i++, p += input_step)
 767                                 *out++ = (*p == 0xff) ? 0xfe : *p;
 768                         input += stride;
 769                 }
 770                 *rlco = (__be16 *)out;
 771                 encoding &= ~FWHT_FRAME_PCODED;
 772         }
 773         return encoding;
 774 }
 775 
 776 u32 fwht_encode_frame(struct fwht_raw_frame *frm,
 777                       struct fwht_raw_frame *ref_frm,
 778                       struct fwht_cframe *cf,
 779                       bool is_intra, bool next_is_intra,
 780                       unsigned int width, unsigned int height,
 781                       unsigned int stride, unsigned int chroma_stride)
 782 {
 783         unsigned int size = height * width;
 784         __be16 *rlco = cf->rlc_data;
 785         __be16 *rlco_max;
 786         u32 encoding;
 787 
 788         rlco_max = rlco + size / 2 - 256;
 789         encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
 790                                 height, width, stride,
 791                                 frm->luma_alpha_step, is_intra, next_is_intra);
 792         if (encoding & FWHT_FRAME_UNENCODED)
 793                 encoding |= FWHT_LUMA_UNENCODED;
 794         encoding &= ~FWHT_FRAME_UNENCODED;
 795 
 796         if (frm->components_num >= 3) {
 797                 u32 chroma_h = height / frm->height_div;
 798                 u32 chroma_w = width / frm->width_div;
 799                 unsigned int chroma_size = chroma_h * chroma_w;
 800 
 801                 rlco_max = rlco + chroma_size / 2 - 256;
 802                 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max,
 803                                          cf, chroma_h, chroma_w,
 804                                          chroma_stride, frm->chroma_step,
 805                                          is_intra, next_is_intra);
 806                 if (encoding & FWHT_FRAME_UNENCODED)
 807                         encoding |= FWHT_CB_UNENCODED;
 808                 encoding &= ~FWHT_FRAME_UNENCODED;
 809                 rlco_max = rlco + chroma_size / 2 - 256;
 810                 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max,
 811                                          cf, chroma_h, chroma_w,
 812                                          chroma_stride, frm->chroma_step,
 813                                          is_intra, next_is_intra);
 814                 if (encoding & FWHT_FRAME_UNENCODED)
 815                         encoding |= FWHT_CR_UNENCODED;
 816                 encoding &= ~FWHT_FRAME_UNENCODED;
 817         }
 818 
 819         if (frm->components_num == 4) {
 820                 rlco_max = rlco + size / 2 - 256;
 821                 encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco,
 822                                          rlco_max, cf, height, width,
 823                                          stride, frm->luma_alpha_step,
 824                                          is_intra, next_is_intra);
 825                 if (encoding & FWHT_FRAME_UNENCODED)
 826                         encoding |= FWHT_ALPHA_UNENCODED;
 827                 encoding &= ~FWHT_FRAME_UNENCODED;
 828         }
 829 
 830         cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
 831         return encoding;
 832 }
 833 
 834 static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
 835                          u32 height, u32 width, const u8 *ref, u32 ref_stride,
 836                          unsigned int ref_step, u8 *dst,
 837                          unsigned int dst_stride, unsigned int dst_step,
 838                          bool uncompressed, const __be16 *end_of_rlco_buf)
 839 {
 840         unsigned int copies = 0;
 841         s16 copy[8 * 8];
 842         u16 stat;
 843         unsigned int i, j;
 844         bool is_intra = !ref;
 845 
 846         width = round_up(width, 8);
 847         height = round_up(height, 8);
 848 
 849         if (uncompressed) {
 850                 int i;
 851 
 852                 if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
 853                         return false;
 854                 for (i = 0; i < height; i++) {
 855                         memcpy(dst, *rlco, width);
 856                         dst += dst_stride;
 857                         *rlco += width / 2;
 858                 }
 859                 return true;
 860         }
 861 
 862         /*
 863          * When decoding each macroblock the rlco pointer will be increased
 864          * by 65 * 2 bytes worst-case.
 865          * To avoid overflow the buffer has to be 65/64th of the actual raw
 866          * image size, just in case someone feeds it malicious data.
 867          */
 868         for (j = 0; j < height / 8; j++) {
 869                 for (i = 0; i < width / 8; i++) {
 870                         const u8 *refp = ref + j * 8 * ref_stride +
 871                                 i * 8 * ref_step;
 872                         u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
 873 
 874                         if (copies) {
 875                                 memcpy(cf->de_fwht, copy, sizeof(copy));
 876                                 if ((stat & PFRAME_BIT) && !is_intra)
 877                                         add_deltas(cf->de_fwht, refp,
 878                                                    ref_stride, ref_step);
 879                                 fill_decoder_block(dstp, cf->de_fwht,
 880                                                    dst_stride, dst_step);
 881                                 copies--;
 882                                 continue;
 883                         }
 884 
 885                         stat = derlc(rlco, cf->coeffs, end_of_rlco_buf);
 886                         if (stat & OVERFLOW_BIT)
 887                                 return false;
 888                         if ((stat & PFRAME_BIT) && !is_intra)
 889                                 dequantize_inter(cf->coeffs);
 890                         else
 891                                 dequantize_intra(cf->coeffs);
 892 
 893                         ifwht(cf->coeffs, cf->de_fwht,
 894                               ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1);
 895 
 896                         copies = (stat & DUPS_MASK) >> 1;
 897                         if (copies)
 898                                 memcpy(copy, cf->de_fwht, sizeof(copy));
 899                         if ((stat & PFRAME_BIT) && !is_intra)
 900                                 add_deltas(cf->de_fwht, refp,
 901                                            ref_stride, ref_step);
 902                         fill_decoder_block(dstp, cf->de_fwht, dst_stride,
 903                                            dst_step);
 904                 }
 905         }
 906         return true;
 907 }
 908 
 909 bool fwht_decode_frame(struct fwht_cframe *cf, u32 hdr_flags,
 910                        unsigned int components_num, unsigned int width,
 911                        unsigned int height, const struct fwht_raw_frame *ref,
 912                        unsigned int ref_stride, unsigned int ref_chroma_stride,
 913                        struct fwht_raw_frame *dst, unsigned int dst_stride,
 914                        unsigned int dst_chroma_stride)
 915 {
 916         const __be16 *rlco = cf->rlc_data;
 917         const __be16 *end_of_rlco_buf = cf->rlc_data +
 918                         (cf->size / sizeof(*rlco)) - 1;
 919 
 920         if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride,
 921                           ref->luma_alpha_step, dst->luma, dst_stride,
 922                           dst->luma_alpha_step,
 923                           hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
 924                           end_of_rlco_buf))
 925                 return false;
 926 
 927         if (components_num >= 3) {
 928                 u32 h = height;
 929                 u32 w = width;
 930 
 931                 if (!(hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT))
 932                         h /= 2;
 933                 if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH))
 934                         w /= 2;
 935 
 936                 if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride,
 937                                   ref->chroma_step, dst->cb, dst_chroma_stride,
 938                                   dst->chroma_step,
 939                                   hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
 940                                   end_of_rlco_buf))
 941                         return false;
 942                 if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride,
 943                                   ref->chroma_step, dst->cr, dst_chroma_stride,
 944                                   dst->chroma_step,
 945                                   hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
 946                                   end_of_rlco_buf))
 947                         return false;
 948         }
 949 
 950         if (components_num == 4)
 951                 if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride,
 952                                   ref->luma_alpha_step, dst->alpha, dst_stride,
 953                                   dst->luma_alpha_step,
 954                                   hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
 955                                   end_of_rlco_buf))
 956                         return false;
 957         return true;
 958 }

/* [<][>][^][v][top][bottom][index][help] */