backward_references.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. // Copyright 2012 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Author: Jyrki Alakuijala (jyrki@google.com)
  11. //
  12. #include <assert.h>
  13. #include <math.h>
  14. #include "./backward_references.h"
  15. #include "./histogram.h"
  16. #include "../dsp/lossless.h"
  17. #include "../utils/color_cache.h"
  18. #include "../utils/utils.h"
  19. #define VALUES_IN_BYTE 256
  20. #define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
  21. #define MIN_BLOCK_SIZE 256 // minimum block size for backward references
  22. #define MAX_ENTROPY (1e30f)
  23. // 1M window (4M bytes) minus 120 special codes for short distances.
  24. #define WINDOW_SIZE ((1 << 20) - 120)
  25. // Bounds for the match length.
  26. #define MIN_LENGTH 2
  27. #define MAX_LENGTH 4096
  28. // -----------------------------------------------------------------------------
  29. static const uint8_t plane_to_code_lut[128] = {
  30. 96, 73, 55, 39, 23, 13, 5, 1, 255, 255, 255, 255, 255, 255, 255, 255,
  31. 101, 78, 58, 42, 26, 16, 8, 2, 0, 3, 9, 17, 27, 43, 59, 79,
  32. 102, 86, 62, 46, 32, 20, 10, 6, 4, 7, 11, 21, 33, 47, 63, 87,
  33. 105, 90, 70, 52, 37, 28, 18, 14, 12, 15, 19, 29, 38, 53, 71, 91,
  34. 110, 99, 82, 66, 48, 35, 30, 24, 22, 25, 31, 36, 49, 67, 83, 100,
  35. 115, 108, 94, 76, 64, 50, 44, 40, 34, 41, 45, 51, 65, 77, 95, 109,
  36. 118, 113, 103, 92, 80, 68, 60, 56, 54, 57, 61, 69, 81, 93, 104, 114,
  37. 119, 116, 111, 106, 97, 88, 84, 74, 72, 75, 85, 89, 98, 107, 112, 117
  38. };
  39. static int DistanceToPlaneCode(int xsize, int dist) {
  40. const int yoffset = dist / xsize;
  41. const int xoffset = dist - yoffset * xsize;
  42. if (xoffset <= 8 && yoffset < 8) {
  43. return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
  44. } else if (xoffset > xsize - 8 && yoffset < 7) {
  45. return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
  46. }
  47. return dist + 120;
  48. }
  49. static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
  50. const uint32_t* const array2,
  51. const int max_limit) {
  52. int match_len = 0;
  53. while (match_len < max_limit && array1[match_len] == array2[match_len]) {
  54. ++match_len;
  55. }
  56. return match_len;
  57. }
  58. // -----------------------------------------------------------------------------
  59. // VP8LBackwardRefs
  60. struct PixOrCopyBlock {
  61. PixOrCopyBlock* next_; // next block (or NULL)
  62. PixOrCopy* start_; // data start
  63. int size_; // currently used size
  64. };
  65. static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
  66. assert(refs != NULL);
  67. if (refs->tail_ != NULL) {
  68. *refs->tail_ = refs->free_blocks_; // recycle all blocks at once
  69. }
  70. refs->free_blocks_ = refs->refs_;
  71. refs->tail_ = &refs->refs_;
  72. refs->last_block_ = NULL;
  73. refs->refs_ = NULL;
  74. }
  75. void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
  76. assert(refs != NULL);
  77. ClearBackwardRefs(refs);
  78. while (refs->free_blocks_ != NULL) {
  79. PixOrCopyBlock* const next = refs->free_blocks_->next_;
  80. WebPSafeFree(refs->free_blocks_);
  81. refs->free_blocks_ = next;
  82. }
  83. }
  84. void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
  85. assert(refs != NULL);
  86. memset(refs, 0, sizeof(*refs));
  87. refs->tail_ = &refs->refs_;
  88. refs->block_size_ =
  89. (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
  90. }
  91. VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
  92. VP8LRefsCursor c;
  93. c.cur_block_ = refs->refs_;
  94. if (refs->refs_ != NULL) {
  95. c.cur_pos = c.cur_block_->start_;
  96. c.last_pos_ = c.cur_pos + c.cur_block_->size_;
  97. } else {
  98. c.cur_pos = NULL;
  99. c.last_pos_ = NULL;
  100. }
  101. return c;
  102. }
  103. void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
  104. PixOrCopyBlock* const b = c->cur_block_->next_;
  105. c->cur_pos = (b == NULL) ? NULL : b->start_;
  106. c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
  107. c->cur_block_ = b;
  108. }
  109. // Create a new block, either from the free list or allocated
  110. static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
  111. PixOrCopyBlock* b = refs->free_blocks_;
  112. if (b == NULL) { // allocate new memory chunk
  113. const size_t total_size =
  114. sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
  115. b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
  116. if (b == NULL) {
  117. refs->error_ |= 1;
  118. return NULL;
  119. }
  120. b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned
  121. } else { // recycle from free-list
  122. refs->free_blocks_ = b->next_;
  123. }
  124. *refs->tail_ = b;
  125. refs->tail_ = &b->next_;
  126. refs->last_block_ = b;
  127. b->next_ = NULL;
  128. b->size_ = 0;
  129. return b;
  130. }
  131. static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
  132. const PixOrCopy v) {
  133. PixOrCopyBlock* b = refs->last_block_;
  134. if (b == NULL || b->size_ == refs->block_size_) {
  135. b = BackwardRefsNewBlock(refs);
  136. if (b == NULL) return; // refs->error_ is set
  137. }
  138. b->start_[b->size_++] = v;
  139. }
  140. int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
  141. VP8LBackwardRefs* const dst) {
  142. const PixOrCopyBlock* b = src->refs_;
  143. ClearBackwardRefs(dst);
  144. assert(src->block_size_ == dst->block_size_);
  145. while (b != NULL) {
  146. PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
  147. if (new_b == NULL) return 0; // dst->error_ is set
  148. memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
  149. new_b->size_ = b->size_;
  150. b = b->next_;
  151. }
  152. return 1;
  153. }
  154. // -----------------------------------------------------------------------------
  155. // Hash chains
  156. // initialize as empty
  157. static void HashChainInit(VP8LHashChain* const p) {
  158. int i;
  159. assert(p != NULL);
  160. for (i = 0; i < p->size_; ++i) {
  161. p->chain_[i] = -1;
  162. }
  163. for (i = 0; i < HASH_SIZE; ++i) {
  164. p->hash_to_first_index_[i] = -1;
  165. }
  166. }
  167. int VP8LHashChainInit(VP8LHashChain* const p, int size) {
  168. assert(p->size_ == 0);
  169. assert(p->chain_ == NULL);
  170. assert(size > 0);
  171. p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
  172. if (p->chain_ == NULL) return 0;
  173. p->size_ = size;
  174. HashChainInit(p);
  175. return 1;
  176. }
  177. void VP8LHashChainClear(VP8LHashChain* const p) {
  178. assert(p != NULL);
  179. WebPSafeFree(p->chain_);
  180. p->size_ = 0;
  181. p->chain_ = NULL;
  182. }
  183. // -----------------------------------------------------------------------------
  184. static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
  185. uint64_t key = ((uint64_t)argb[1] << 32) | argb[0];
  186. key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
  187. return key;
  188. }
  189. // Insertion of two pixels at a time.
  190. static void HashChainInsert(VP8LHashChain* const p,
  191. const uint32_t* const argb, int pos) {
  192. const uint64_t hash_code = GetPixPairHash64(argb);
  193. p->chain_[pos] = p->hash_to_first_index_[hash_code];
  194. p->hash_to_first_index_[hash_code] = pos;
  195. }
  196. static void GetParamsForHashChainFindCopy(int quality, int xsize,
  197. int cache_bits, int* window_size,
  198. int* iter_pos, int* iter_limit) {
  199. const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
  200. const int iter_neg = -iter_mult * (quality >> 1);
  201. // Limit the backward-ref window size for lower qualities.
  202. const int max_window_size = (quality > 50) ? WINDOW_SIZE
  203. : (quality > 25) ? (xsize << 8)
  204. : (xsize << 4);
  205. assert(xsize > 0);
  206. *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
  207. : max_window_size;
  208. *iter_pos = 8 + (quality >> 3);
  209. // For lower entropy images, the rigorous search loop in HashChainFindCopy
  210. // can be relaxed.
  211. *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
  212. }
  213. static int HashChainFindCopy(const VP8LHashChain* const p,
  214. int base_position, int xsize_signed,
  215. const uint32_t* const argb, int max_len,
  216. int window_size, int iter_pos, int iter_limit,
  217. int* const distance_ptr,
  218. int* const length_ptr) {
  219. const uint32_t* const argb_start = argb + base_position;
  220. uint64_t best_val = 0;
  221. uint32_t best_length = 1;
  222. uint32_t best_distance = 0;
  223. const uint32_t xsize = (uint32_t)xsize_signed;
  224. const int min_pos =
  225. (base_position > window_size) ? base_position - window_size : 0;
  226. int pos;
  227. assert(xsize > 0);
  228. if (max_len > MAX_LENGTH) {
  229. max_len = MAX_LENGTH;
  230. }
  231. for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
  232. pos >= min_pos;
  233. pos = p->chain_[pos]) {
  234. uint64_t val;
  235. uint32_t curr_length;
  236. uint32_t distance;
  237. const uint32_t* const ptr1 = (argb + pos + best_length - 1);
  238. const uint32_t* const ptr2 = (argb_start + best_length - 1);
  239. if (iter_pos < 0) {
  240. if (iter_pos < iter_limit || best_val >= 0xff0000) {
  241. break;
  242. }
  243. }
  244. --iter_pos;
  245. // Before 'expensive' linear match, check if the two arrays match at the
  246. // current best length index and also for the succeeding elements.
  247. if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue;
  248. curr_length = FindMatchLength(argb + pos, argb_start, max_len);
  249. if (curr_length < best_length) continue;
  250. distance = (uint32_t)(base_position - pos);
  251. val = curr_length << 16;
  252. // Favoring 2d locality here gives savings for certain images.
  253. if (distance < 9 * xsize) {
  254. const uint32_t y = distance / xsize;
  255. uint32_t x = distance % xsize;
  256. if (x > (xsize >> 1)) {
  257. x = xsize - x;
  258. }
  259. if (x <= 7) {
  260. val += 9 * 9 + 9 * 9;
  261. val -= y * y + x * x;
  262. }
  263. }
  264. if (best_val < val) {
  265. best_val = val;
  266. best_length = curr_length;
  267. best_distance = distance;
  268. if (curr_length >= (uint32_t)max_len) {
  269. break;
  270. }
  271. if ((best_distance == 1 || distance == xsize) &&
  272. best_length >= 128) {
  273. break;
  274. }
  275. }
  276. }
  277. *distance_ptr = (int)best_distance;
  278. *length_ptr = best_length;
  279. return (best_length >= MIN_LENGTH);
  280. }
  281. static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
  282. while (length >= MAX_LENGTH) {
  283. BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, MAX_LENGTH));
  284. length -= MAX_LENGTH;
  285. }
  286. if (length > 0) {
  287. BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, length));
  288. }
  289. }
  290. static int BackwardReferencesRle(int xsize, int ysize,
  291. const uint32_t* const argb,
  292. VP8LBackwardRefs* const refs) {
  293. const int pix_count = xsize * ysize;
  294. int match_len = 0;
  295. int i;
  296. ClearBackwardRefs(refs);
  297. PushBackCopy(refs, match_len); // i=0 case
  298. BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[0]));
  299. for (i = 1; i < pix_count; ++i) {
  300. if (argb[i] == argb[i - 1]) {
  301. ++match_len;
  302. } else {
  303. PushBackCopy(refs, match_len);
  304. match_len = 0;
  305. BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[i]));
  306. }
  307. }
  308. PushBackCopy(refs, match_len);
  309. return !refs->error_;
  310. }
  311. static int BackwardReferencesHashChain(int xsize, int ysize,
  312. const uint32_t* const argb,
  313. int cache_bits, int quality,
  314. VP8LHashChain* const hash_chain,
  315. VP8LBackwardRefs* const refs) {
  316. int i;
  317. int ok = 0;
  318. int cc_init = 0;
  319. const int use_color_cache = (cache_bits > 0);
  320. const int pix_count = xsize * ysize;
  321. VP8LColorCache hashers;
  322. int window_size = WINDOW_SIZE;
  323. int iter_pos = 1;
  324. int iter_limit = -1;
  325. if (use_color_cache) {
  326. cc_init = VP8LColorCacheInit(&hashers, cache_bits);
  327. if (!cc_init) goto Error;
  328. }
  329. ClearBackwardRefs(refs);
  330. GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
  331. &window_size, &iter_pos, &iter_limit);
  332. HashChainInit(hash_chain);
  333. for (i = 0; i < pix_count; ) {
  334. // Alternative#1: Code the pixels starting at 'i' using backward reference.
  335. int offset = 0;
  336. int len = 0;
  337. if (i < pix_count - 1) { // FindCopy(i,..) reads pixels at [i] and [i + 1].
  338. int max_len = pix_count - i;
  339. HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
  340. window_size, iter_pos, iter_limit,
  341. &offset, &len);
  342. }
  343. if (len >= MIN_LENGTH) {
  344. // Alternative#2: Insert the pixel at 'i' as literal, and code the
  345. // pixels starting at 'i + 1' using backward reference.
  346. int offset2 = 0;
  347. int len2 = 0;
  348. int k;
  349. HashChainInsert(hash_chain, &argb[i], i);
  350. if (i < pix_count - 2) { // FindCopy(i+1,..) reads [i + 1] and [i + 2].
  351. int max_len = pix_count - (i + 1);
  352. HashChainFindCopy(hash_chain, i + 1, xsize, argb, max_len,
  353. window_size, iter_pos, iter_limit,
  354. &offset2, &len2);
  355. if (len2 > len + 1) {
  356. const uint32_t pixel = argb[i];
  357. // Alternative#2 is a better match. So push pixel at 'i' as literal.
  358. PixOrCopy v;
  359. if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
  360. const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
  361. v = PixOrCopyCreateCacheIdx(ix);
  362. } else {
  363. if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
  364. v = PixOrCopyCreateLiteral(pixel);
  365. }
  366. BackwardRefsCursorAdd(refs, v);
  367. i++; // Backward reference to be done for next pixel.
  368. len = len2;
  369. offset = offset2;
  370. }
  371. }
  372. if (len >= MAX_LENGTH) {
  373. len = MAX_LENGTH - 1;
  374. }
  375. BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
  376. if (use_color_cache) {
  377. for (k = 0; k < len; ++k) {
  378. VP8LColorCacheInsert(&hashers, argb[i + k]);
  379. }
  380. }
  381. // Add to the hash_chain (but cannot add the last pixel).
  382. {
  383. const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
  384. for (k = 1; k < last; ++k) {
  385. HashChainInsert(hash_chain, &argb[i + k], i + k);
  386. }
  387. }
  388. i += len;
  389. } else {
  390. const uint32_t pixel = argb[i];
  391. PixOrCopy v;
  392. if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
  393. // push pixel as a PixOrCopyCreateCacheIdx pixel
  394. const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
  395. v = PixOrCopyCreateCacheIdx(ix);
  396. } else {
  397. if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
  398. v = PixOrCopyCreateLiteral(pixel);
  399. }
  400. BackwardRefsCursorAdd(refs, v);
  401. if (i + 1 < pix_count) {
  402. HashChainInsert(hash_chain, &argb[i], i);
  403. }
  404. ++i;
  405. }
  406. }
  407. ok = !refs->error_;
  408. Error:
  409. if (cc_init) VP8LColorCacheClear(&hashers);
  410. return ok;
  411. }
  412. // -----------------------------------------------------------------------------
  413. typedef struct {
  414. double alpha_[VALUES_IN_BYTE];
  415. double red_[VALUES_IN_BYTE];
  416. double literal_[PIX_OR_COPY_CODES_MAX];
  417. double blue_[VALUES_IN_BYTE];
  418. double distance_[NUM_DISTANCE_CODES];
  419. } CostModel;
  420. static int BackwardReferencesTraceBackwards(
  421. int xsize, int ysize, int recursive_cost_model,
  422. const uint32_t* const argb, int quality, int cache_bits,
  423. VP8LHashChain* const hash_chain,
  424. VP8LBackwardRefs* const refs);
  425. static void ConvertPopulationCountTableToBitEstimates(
  426. int num_symbols, const uint32_t population_counts[], double output[]) {
  427. uint32_t sum = 0;
  428. int nonzeros = 0;
  429. int i;
  430. for (i = 0; i < num_symbols; ++i) {
  431. sum += population_counts[i];
  432. if (population_counts[i] > 0) {
  433. ++nonzeros;
  434. }
  435. }
  436. if (nonzeros <= 1) {
  437. memset(output, 0, num_symbols * sizeof(*output));
  438. } else {
  439. const double logsum = VP8LFastLog2(sum);
  440. for (i = 0; i < num_symbols; ++i) {
  441. output[i] = logsum - VP8LFastLog2(population_counts[i]);
  442. }
  443. }
  444. }
  445. static int CostModelBuild(CostModel* const m, int xsize, int ysize,
  446. int recursion_level, const uint32_t* const argb,
  447. int quality, int cache_bits,
  448. VP8LHashChain* const hash_chain,
  449. VP8LBackwardRefs* const refs) {
  450. int ok = 0;
  451. VP8LHistogram* histo = NULL;
  452. ClearBackwardRefs(refs);
  453. if (recursion_level > 0) {
  454. if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
  455. argb, quality, cache_bits, hash_chain,
  456. refs)) {
  457. goto Error;
  458. }
  459. } else {
  460. if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
  461. hash_chain, refs)) {
  462. goto Error;
  463. }
  464. }
  465. histo = VP8LAllocateHistogram(cache_bits);
  466. if (histo == NULL) goto Error;
  467. VP8LHistogramCreate(histo, refs, cache_bits);
  468. ConvertPopulationCountTableToBitEstimates(
  469. VP8LHistogramNumCodes(histo->palette_code_bits_),
  470. histo->literal_, m->literal_);
  471. ConvertPopulationCountTableToBitEstimates(
  472. VALUES_IN_BYTE, histo->red_, m->red_);
  473. ConvertPopulationCountTableToBitEstimates(
  474. VALUES_IN_BYTE, histo->blue_, m->blue_);
  475. ConvertPopulationCountTableToBitEstimates(
  476. VALUES_IN_BYTE, histo->alpha_, m->alpha_);
  477. ConvertPopulationCountTableToBitEstimates(
  478. NUM_DISTANCE_CODES, histo->distance_, m->distance_);
  479. ok = 1;
  480. Error:
  481. VP8LFreeHistogram(histo);
  482. return ok;
  483. }
  484. static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
  485. return m->alpha_[v >> 24] +
  486. m->red_[(v >> 16) & 0xff] +
  487. m->literal_[(v >> 8) & 0xff] +
  488. m->blue_[v & 0xff];
  489. }
  490. static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
  491. const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
  492. return m->literal_[literal_idx];
  493. }
  494. static WEBP_INLINE double GetLengthCost(const CostModel* const m,
  495. uint32_t length) {
  496. int code, extra_bits;
  497. VP8LPrefixEncodeBits(length, &code, &extra_bits);
  498. return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
  499. }
  500. static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
  501. uint32_t distance) {
  502. int code, extra_bits;
  503. VP8LPrefixEncodeBits(distance, &code, &extra_bits);
  504. return m->distance_[code] + extra_bits;
  505. }
  506. static int BackwardReferencesHashChainDistanceOnly(
  507. int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
  508. int quality, int cache_bits, VP8LHashChain* const hash_chain,
  509. VP8LBackwardRefs* const refs, uint32_t* const dist_array) {
  510. int i;
  511. int ok = 0;
  512. int cc_init = 0;
  513. const int pix_count = xsize * ysize;
  514. const int use_color_cache = (cache_bits > 0);
  515. float* const cost =
  516. (float*)WebPSafeMalloc(pix_count, sizeof(*cost));
  517. CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model));
  518. VP8LColorCache hashers;
  519. const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
  520. const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
  521. const int min_distance_code = 2; // TODO(vikasa): tune as function of quality
  522. int window_size = WINDOW_SIZE;
  523. int iter_pos = 1;
  524. int iter_limit = -1;
  525. if (cost == NULL || cost_model == NULL) goto Error;
  526. if (use_color_cache) {
  527. cc_init = VP8LColorCacheInit(&hashers, cache_bits);
  528. if (!cc_init) goto Error;
  529. }
  530. if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
  531. quality, cache_bits, hash_chain, refs)) {
  532. goto Error;
  533. }
  534. for (i = 0; i < pix_count; ++i) cost[i] = 1e38f;
  535. // We loop one pixel at a time, but store all currently best points to
  536. // non-processed locations from this point.
  537. dist_array[0] = 0;
  538. GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
  539. &window_size, &iter_pos, &iter_limit);
  540. HashChainInit(hash_chain);
  541. for (i = 0; i < pix_count; ++i) {
  542. double prev_cost = 0.0;
  543. int shortmax;
  544. if (i > 0) {
  545. prev_cost = cost[i - 1];
  546. }
  547. for (shortmax = 0; shortmax < 2; ++shortmax) {
  548. int offset = 0;
  549. int len = 0;
  550. if (i < pix_count - 1) { // FindCopy reads pixels at [i] and [i + 1].
  551. int max_len = shortmax ? 2 : pix_count - i;
  552. HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
  553. window_size, iter_pos, iter_limit,
  554. &offset, &len);
  555. }
  556. if (len >= MIN_LENGTH) {
  557. const int code = DistanceToPlaneCode(xsize, offset);
  558. const double distance_cost =
  559. prev_cost + GetDistanceCost(cost_model, code);
  560. int k;
  561. for (k = 1; k < len; ++k) {
  562. const double cost_val = distance_cost + GetLengthCost(cost_model, k);
  563. if (cost[i + k] > cost_val) {
  564. cost[i + k] = (float)cost_val;
  565. dist_array[i + k] = k + 1;
  566. }
  567. }
  568. // This if is for speedup only. It roughly doubles the speed, and
  569. // makes compression worse by .1 %.
  570. if (len >= 128 && code <= min_distance_code) {
  571. // Long copy for short distances, let's skip the middle
  572. // lookups for better copies.
  573. // 1) insert the hashes.
  574. if (use_color_cache) {
  575. for (k = 0; k < len; ++k) {
  576. VP8LColorCacheInsert(&hashers, argb[i + k]);
  577. }
  578. }
  579. // 2) Add to the hash_chain (but cannot add the last pixel)
  580. {
  581. const int last = (len + i < pix_count - 1) ? len + i
  582. : pix_count - 1;
  583. for (k = i; k < last; ++k) {
  584. HashChainInsert(hash_chain, &argb[k], k);
  585. }
  586. }
  587. // 3) jump.
  588. i += len - 1; // for loop does ++i, thus -1 here.
  589. goto next_symbol;
  590. }
  591. }
  592. }
  593. if (i < pix_count - 1) {
  594. HashChainInsert(hash_chain, &argb[i], i);
  595. }
  596. {
  597. // inserting a literal pixel
  598. double cost_val = prev_cost;
  599. if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
  600. const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
  601. cost_val += GetCacheCost(cost_model, ix) * mul0;
  602. } else {
  603. if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
  604. cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
  605. }
  606. if (cost[i] > cost_val) {
  607. cost[i] = (float)cost_val;
  608. dist_array[i] = 1; // only one is inserted.
  609. }
  610. }
  611. next_symbol: ;
  612. }
  613. // Last pixel still to do, it can only be a single step if not reached
  614. // through cheaper means already.
  615. ok = !refs->error_;
  616. Error:
  617. if (cc_init) VP8LColorCacheClear(&hashers);
  618. WebPSafeFree(cost_model);
  619. WebPSafeFree(cost);
  620. return ok;
  621. }
  622. // We pack the path at the end of *dist_array and return
  623. // a pointer to this part of the array. Example:
  624. // dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
  625. static void TraceBackwards(uint32_t* const dist_array,
  626. int dist_array_size,
  627. uint32_t** const chosen_path,
  628. int* const chosen_path_size) {
  629. uint32_t* path = dist_array + dist_array_size;
  630. uint32_t* cur = dist_array + dist_array_size - 1;
  631. while (cur >= dist_array) {
  632. const int k = *cur;
  633. --path;
  634. *path = k;
  635. cur -= k;
  636. }
  637. *chosen_path = path;
  638. *chosen_path_size = (int)(dist_array + dist_array_size - path);
  639. }
  640. static int BackwardReferencesHashChainFollowChosenPath(
  641. int xsize, int ysize, const uint32_t* const argb,
  642. int quality, int cache_bits,
  643. const uint32_t* const chosen_path, int chosen_path_size,
  644. VP8LHashChain* const hash_chain,
  645. VP8LBackwardRefs* const refs) {
  646. const int pix_count = xsize * ysize;
  647. const int use_color_cache = (cache_bits > 0);
  648. int size = 0;
  649. int i = 0;
  650. int k;
  651. int ix;
  652. int ok = 0;
  653. int cc_init = 0;
  654. int window_size = WINDOW_SIZE;
  655. int iter_pos = 1;
  656. int iter_limit = -1;
  657. VP8LColorCache hashers;
  658. if (use_color_cache) {
  659. cc_init = VP8LColorCacheInit(&hashers, cache_bits);
  660. if (!cc_init) goto Error;
  661. }
  662. ClearBackwardRefs(refs);
  663. GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
  664. &window_size, &iter_pos, &iter_limit);
  665. HashChainInit(hash_chain);
  666. for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
  667. int offset = 0;
  668. int len = 0;
  669. int max_len = chosen_path[ix];
  670. if (max_len != 1) {
  671. HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
  672. window_size, iter_pos, iter_limit,
  673. &offset, &len);
  674. assert(len == max_len);
  675. BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
  676. if (use_color_cache) {
  677. for (k = 0; k < len; ++k) {
  678. VP8LColorCacheInsert(&hashers, argb[i + k]);
  679. }
  680. }
  681. {
  682. const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
  683. for (k = 0; k < last; ++k) {
  684. HashChainInsert(hash_chain, &argb[i + k], i + k);
  685. }
  686. }
  687. i += len;
  688. } else {
  689. PixOrCopy v;
  690. if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
  691. // push pixel as a color cache index
  692. const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
  693. v = PixOrCopyCreateCacheIdx(idx);
  694. } else {
  695. if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
  696. v = PixOrCopyCreateLiteral(argb[i]);
  697. }
  698. BackwardRefsCursorAdd(refs, v);
  699. if (i + 1 < pix_count) {
  700. HashChainInsert(hash_chain, &argb[i], i);
  701. }
  702. ++i;
  703. }
  704. }
  705. ok = !refs->error_;
  706. Error:
  707. if (cc_init) VP8LColorCacheClear(&hashers);
  708. return ok;
  709. }
  710. // Returns 1 on success.
  711. static int BackwardReferencesTraceBackwards(int xsize, int ysize,
  712. int recursive_cost_model,
  713. const uint32_t* const argb,
  714. int quality, int cache_bits,
  715. VP8LHashChain* const hash_chain,
  716. VP8LBackwardRefs* const refs) {
  717. int ok = 0;
  718. const int dist_array_size = xsize * ysize;
  719. uint32_t* chosen_path = NULL;
  720. int chosen_path_size = 0;
  721. uint32_t* dist_array =
  722. (uint32_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
  723. if (dist_array == NULL) goto Error;
  724. if (!BackwardReferencesHashChainDistanceOnly(
  725. xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain,
  726. refs, dist_array)) {
  727. goto Error;
  728. }
  729. TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
  730. if (!BackwardReferencesHashChainFollowChosenPath(
  731. xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
  732. hash_chain, refs)) {
  733. goto Error;
  734. }
  735. ok = 1;
  736. Error:
  737. WebPSafeFree(dist_array);
  738. return ok;
  739. }
  740. static void BackwardReferences2DLocality(int xsize,
  741. const VP8LBackwardRefs* const refs) {
  742. VP8LRefsCursor c = VP8LRefsCursorInit(refs);
  743. while (VP8LRefsCursorOk(&c)) {
  744. if (PixOrCopyIsCopy(c.cur_pos)) {
  745. const int dist = c.cur_pos->argb_or_distance;
  746. const int transformed_dist = DistanceToPlaneCode(xsize, dist);
  747. c.cur_pos->argb_or_distance = transformed_dist;
  748. }
  749. VP8LRefsCursorNext(&c);
  750. }
  751. }
  752. VP8LBackwardRefs* VP8LGetBackwardReferences(
  753. int width, int height, const uint32_t* const argb, int quality,
  754. int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain,
  755. VP8LBackwardRefs refs_array[2]) {
  756. int lz77_is_useful;
  757. const int num_pix = width * height;
  758. VP8LBackwardRefs* best = NULL;
  759. VP8LBackwardRefs* const refs_lz77 = &refs_array[0];
  760. VP8LBackwardRefs* const refs_rle = &refs_array[1];
  761. if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
  762. hash_chain, refs_lz77)) {
  763. return NULL;
  764. }
  765. if (!BackwardReferencesRle(width, height, argb, refs_rle)) {
  766. return NULL;
  767. }
  768. {
  769. double bit_cost_lz77, bit_cost_rle;
  770. VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
  771. if (histo == NULL) return NULL;
  772. // Evaluate LZ77 coding.
  773. VP8LHistogramCreate(histo, refs_lz77, cache_bits);
  774. bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
  775. // Evaluate RLE coding.
  776. VP8LHistogramCreate(histo, refs_rle, cache_bits);
  777. bit_cost_rle = VP8LHistogramEstimateBits(histo);
  778. // Decide if LZ77 is useful.
  779. lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
  780. VP8LFreeHistogram(histo);
  781. }
  782. // Choose appropriate backward reference.
  783. if (lz77_is_useful) {
  784. // TraceBackwards is costly. Don't execute it at lower quality.
  785. const int try_lz77_trace_backwards = (quality >= 25);
  786. best = refs_lz77; // default guess: lz77 is better
  787. if (try_lz77_trace_backwards) {
  788. // Set recursion level for large images using a color cache.
  789. const int recursion_level =
  790. (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
  791. VP8LBackwardRefs* const refs_trace = &refs_array[1];
  792. ClearBackwardRefs(refs_trace);
  793. if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb,
  794. quality, cache_bits, hash_chain,
  795. refs_trace)) {
  796. best = refs_trace;
  797. }
  798. }
  799. } else {
  800. best = refs_rle;
  801. }
  802. if (use_2d_locality) BackwardReferences2DLocality(width, best);
  803. return best;
  804. }
  805. // Returns entropy for the given cache bits.
  806. static double ComputeCacheEntropy(const uint32_t* const argb,
  807. int xsize, int ysize,
  808. const VP8LBackwardRefs* const refs,
  809. int cache_bits) {
  810. int pixel_index = 0;
  811. uint32_t k;
  812. const int use_color_cache = (cache_bits > 0);
  813. int cc_init = 0;
  814. double entropy = MAX_ENTROPY;
  815. const double kSmallPenaltyForLargeCache = 4.0;
  816. VP8LColorCache hashers;
  817. VP8LRefsCursor c = VP8LRefsCursorInit(refs);
  818. VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
  819. if (histo == NULL) goto Error;
  820. if (use_color_cache) {
  821. cc_init = VP8LColorCacheInit(&hashers, cache_bits);
  822. if (!cc_init) goto Error;
  823. }
  824. while (VP8LRefsCursorOk(&c)) {
  825. const PixOrCopy* const v = c.cur_pos;
  826. if (PixOrCopyIsLiteral(v)) {
  827. if (use_color_cache &&
  828. VP8LColorCacheContains(&hashers, argb[pixel_index])) {
  829. // push pixel as a cache index
  830. const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
  831. const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
  832. VP8LHistogramAddSinglePixOrCopy(histo, &token);
  833. } else {
  834. VP8LHistogramAddSinglePixOrCopy(histo, v);
  835. }
  836. } else {
  837. VP8LHistogramAddSinglePixOrCopy(histo, v);
  838. }
  839. if (use_color_cache) {
  840. for (k = 0; k < PixOrCopyLength(v); ++k) {
  841. VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
  842. }
  843. }
  844. pixel_index += PixOrCopyLength(v);
  845. VP8LRefsCursorNext(&c);
  846. }
  847. assert(pixel_index == xsize * ysize);
  848. (void)xsize; // xsize is not used in non-debug compilations otherwise.
  849. (void)ysize; // ysize is not used in non-debug compilations otherwise.
  850. entropy = VP8LHistogramEstimateBits(histo) +
  851. kSmallPenaltyForLargeCache * cache_bits;
  852. Error:
  853. if (cc_init) VP8LColorCacheClear(&hashers);
  854. VP8LFreeHistogram(histo);
  855. return entropy;
  856. }
  857. // *best_cache_bits will contain how many bits are to be used for a color cache.
  858. // Returns 0 in case of memory error.
  859. int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
  860. int xsize, int ysize, int quality,
  861. VP8LHashChain* const hash_chain,
  862. VP8LBackwardRefs* const refs,
  863. int* const best_cache_bits) {
  864. int eval_low = 1;
  865. int eval_high = 1;
  866. double entropy_low = MAX_ENTROPY;
  867. double entropy_high = MAX_ENTROPY;
  868. int cache_bits_low = 0;
  869. int cache_bits_high = MAX_COLOR_CACHE_BITS;
  870. if (!BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, hash_chain,
  871. refs)) {
  872. return 0;
  873. }
  874. // Do a binary search to find the optimal entropy for cache_bits.
  875. while (cache_bits_high - cache_bits_low > 1) {
  876. if (eval_low) {
  877. entropy_low =
  878. ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_low);
  879. eval_low = 0;
  880. }
  881. if (eval_high) {
  882. entropy_high =
  883. ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_high);
  884. eval_high = 0;
  885. }
  886. if (entropy_high < entropy_low) {
  887. *best_cache_bits = cache_bits_high;
  888. cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
  889. eval_low = 1;
  890. } else {
  891. *best_cache_bits = cache_bits_low;
  892. cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
  893. eval_high = 1;
  894. }
  895. }
  896. return 1;
  897. }