jchuff.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. /*
  2. * jchuff.c
  3. *
  4. * This file was part of the Independent JPEG Group's software:
  5. * Copyright (C) 1991-1997, Thomas G. Lane.
  6. * libjpeg-turbo Modifications:
  7. * Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
  8. * Copyright (C) 2015, Matthieu Darbois.
  9. * For conditions of distribution and use, see the accompanying README.ijg
  10. * file.
  11. *
  12. * This file contains Huffman entropy encoding routines.
  13. *
  14. * Much of the complexity here has to do with supporting output suspension.
  15. * If the data destination module demands suspension, we want to be able to
  16. * back up to the start of the current MCU. To do this, we copy state
  17. * variables into local working storage, and update them back to the
  18. * permanent JPEG objects only upon successful completion of an MCU.
  19. *
  20. * NOTE: All referenced figures are from
  21. * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
  22. */
  23. #define JPEG_INTERNALS
  24. #include "jinclude.h"
  25. #include "jpeglib.h"
  26. #include "jsimd.h"
  27. #include "jconfigint.h"
  28. #include <limits.h>
  29. /*
  30. * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
  31. * used for bit counting rather than the lookup table. This will reduce the
  32. * memory footprint by 64k, which is important for some mobile applications
  33. * that create many isolated instances of libjpeg-turbo (web browsers, for
  34. * instance.) This may improve performance on some mobile platforms as well.
  35. * This feature is enabled by default only on ARM processors, because some x86
  36. * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
  37. * shown to have a significant performance impact even on the x86 chips that
  38. * have a fast implementation of it. When building for ARMv6, you can
  39. * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
  40. * flags (this defines __thumb__).
  41. */
  42. /* NOTE: Both GCC and Clang define __GNUC__ */
  43. #if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
  44. #if !defined(__thumb__) || defined(__thumb2__)
  45. #define USE_CLZ_INTRINSIC
  46. #endif
  47. #endif
  48. #ifdef USE_CLZ_INTRINSIC
  49. #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
  50. #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
  51. #else
  52. #include "jpeg_nbits_table.h"
  53. #define JPEG_NBITS(x) (jpeg_nbits_table[x])
  54. #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
  55. #endif
  56. /* Expanded entropy encoder object for Huffman encoding.
  57. *
  58. * The savable_state subrecord contains fields that change within an MCU,
  59. * but must not be updated permanently until we complete the MCU.
  60. */
  61. typedef struct {
  62. size_t put_buffer; /* current bit-accumulation buffer */
  63. int put_bits; /* # of bits now in it */
  64. int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
  65. } savable_state;
  66. /* This macro is to work around compilers with missing or broken
  67. * structure assignment. You'll need to fix this code if you have
  68. * such a compiler and you change MAX_COMPS_IN_SCAN.
  69. */
  70. #ifndef NO_STRUCT_ASSIGN
  71. #define ASSIGN_STATE(dest, src) ((dest) = (src))
  72. #else
  73. #if MAX_COMPS_IN_SCAN == 4
  74. #define ASSIGN_STATE(dest, src) \
  75. ((dest).put_buffer = (src).put_buffer, \
  76. (dest).put_bits = (src).put_bits, \
  77. (dest).last_dc_val[0] = (src).last_dc_val[0], \
  78. (dest).last_dc_val[1] = (src).last_dc_val[1], \
  79. (dest).last_dc_val[2] = (src).last_dc_val[2], \
  80. (dest).last_dc_val[3] = (src).last_dc_val[3])
  81. #endif
  82. #endif
  83. typedef struct {
  84. struct jpeg_entropy_encoder pub; /* public fields */
  85. savable_state saved; /* Bit buffer & DC state at start of MCU */
  86. /* These fields are NOT loaded into local working state. */
  87. unsigned int restarts_to_go; /* MCUs left in this restart interval */
  88. int next_restart_num; /* next restart number to write (0-7) */
  89. /* Pointers to derived tables (these workspaces have image lifespan) */
  90. c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];
  91. c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];
  92. #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */
  93. long *dc_count_ptrs[NUM_HUFF_TBLS];
  94. long *ac_count_ptrs[NUM_HUFF_TBLS];
  95. #endif
  96. int simd;
  97. } huff_entropy_encoder;
  98. typedef huff_entropy_encoder *huff_entropy_ptr;
  99. /* Working state while writing an MCU.
  100. * This struct contains all the fields that are needed by subroutines.
  101. */
  102. typedef struct {
  103. JOCTET *next_output_byte; /* => next byte to write in buffer */
  104. size_t free_in_buffer; /* # of byte spaces remaining in buffer */
  105. savable_state cur; /* Current bit buffer & DC state */
  106. j_compress_ptr cinfo; /* dump_buffer needs access to this */
  107. } working_state;
  108. /* Forward declarations */
  109. METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data);
  110. METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);
  111. #ifdef ENTROPY_OPT_SUPPORTED
  112. METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo,
  113. JBLOCKROW *MCU_data);
  114. METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo);
  115. #endif
  116. /*
  117. * Initialize for a Huffman-compressed scan.
  118. * If gather_statistics is TRUE, we do not output anything during the scan,
  119. * just count the Huffman symbols used and generate Huffman code tables.
  120. */
  121. METHODDEF(void)
  122. start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
  123. {
  124. huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
  125. int ci, dctbl, actbl;
  126. jpeg_component_info *compptr;
  127. if (gather_statistics) {
  128. #ifdef ENTROPY_OPT_SUPPORTED
  129. entropy->pub.encode_mcu = encode_mcu_gather;
  130. entropy->pub.finish_pass = finish_pass_gather;
  131. #else
  132. ERREXIT(cinfo, JERR_NOT_COMPILED);
  133. #endif
  134. } else {
  135. entropy->pub.encode_mcu = encode_mcu_huff;
  136. entropy->pub.finish_pass = finish_pass_huff;
  137. }
  138. entropy->simd = jsimd_can_huff_encode_one_block();
  139. for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
  140. compptr = cinfo->cur_comp_info[ci];
  141. dctbl = compptr->dc_tbl_no;
  142. actbl = compptr->ac_tbl_no;
  143. if (gather_statistics) {
  144. #ifdef ENTROPY_OPT_SUPPORTED
  145. /* Check for invalid table indexes */
  146. /* (make_c_derived_tbl does this in the other path) */
  147. if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
  148. ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
  149. if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
  150. ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
  151. /* Allocate and zero the statistics tables */
  152. /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
  153. if (entropy->dc_count_ptrs[dctbl] == NULL)
  154. entropy->dc_count_ptrs[dctbl] = (long *)
  155. (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
  156. 257 * sizeof(long));
  157. MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long));
  158. if (entropy->ac_count_ptrs[actbl] == NULL)
  159. entropy->ac_count_ptrs[actbl] = (long *)
  160. (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
  161. 257 * sizeof(long));
  162. MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long));
  163. #endif
  164. } else {
  165. /* Compute derived values for Huffman tables */
  166. /* We may do this more than once for a table, but it's not expensive */
  167. jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
  168. &entropy->dc_derived_tbls[dctbl]);
  169. jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
  170. &entropy->ac_derived_tbls[actbl]);
  171. }
  172. /* Initialize DC predictions to 0 */
  173. entropy->saved.last_dc_val[ci] = 0;
  174. }
  175. /* Initialize bit buffer to empty */
  176. entropy->saved.put_buffer = 0;
  177. entropy->saved.put_bits = 0;
  178. /* Initialize restart stuff */
  179. entropy->restarts_to_go = cinfo->restart_interval;
  180. entropy->next_restart_num = 0;
  181. }
  182. /*
  183. * Compute the derived values for a Huffman table.
  184. * This routine also performs some validation checks on the table.
  185. *
  186. * Note this is also used by jcphuff.c.
  187. */
  188. GLOBAL(void)
  189. jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
  190. c_derived_tbl **pdtbl)
  191. {
  192. JHUFF_TBL *htbl;
  193. c_derived_tbl *dtbl;
  194. int p, i, l, lastp, si, maxsymbol;
  195. char huffsize[257];
  196. unsigned int huffcode[257];
  197. unsigned int code;
  198. /* Note that huffsize[] and huffcode[] are filled in code-length order,
  199. * paralleling the order of the symbols themselves in htbl->huffval[].
  200. */
  201. /* Find the input Huffman table */
  202. if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
  203. ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
  204. htbl =
  205. isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
  206. if (htbl == NULL)
  207. ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
  208. /* Allocate a workspace if we haven't already done so. */
  209. if (*pdtbl == NULL)
  210. *pdtbl = (c_derived_tbl *)
  211. (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
  212. sizeof(c_derived_tbl));
  213. dtbl = *pdtbl;
  214. /* Figure C.1: make table of Huffman code length for each symbol */
  215. p = 0;
  216. for (l = 1; l <= 16; l++) {
  217. i = (int)htbl->bits[l];
  218. if (i < 0 || p + i > 256) /* protect against table overrun */
  219. ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
  220. while (i--)
  221. huffsize[p++] = (char)l;
  222. }
  223. huffsize[p] = 0;
  224. lastp = p;
  225. /* Figure C.2: generate the codes themselves */
  226. /* We also validate that the counts represent a legal Huffman code tree. */
  227. code = 0;
  228. si = huffsize[0];
  229. p = 0;
  230. while (huffsize[p]) {
  231. while (((int)huffsize[p]) == si) {
  232. huffcode[p++] = code;
  233. code++;
  234. }
  235. /* code is now 1 more than the last code used for codelength si; but
  236. * it must still fit in si bits, since no code is allowed to be all ones.
  237. */
  238. if (((JLONG)code) >= (((JLONG)1) << si))
  239. ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
  240. code <<= 1;
  241. si++;
  242. }
  243. /* Figure C.3: generate encoding tables */
  244. /* These are code and size indexed by symbol value */
  245. /* Set all codeless symbols to have code length 0;
  246. * this lets us detect duplicate VAL entries here, and later
  247. * allows emit_bits to detect any attempt to emit such symbols.
  248. */
  249. MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
  250. /* This is also a convenient place to check for out-of-range
  251. * and duplicated VAL entries. We allow 0..255 for AC symbols
  252. * but only 0..15 for DC. (We could constrain them further
  253. * based on data depth and mode, but this seems enough.)
  254. */
  255. maxsymbol = isDC ? 15 : 255;
  256. for (p = 0; p < lastp; p++) {
  257. i = htbl->huffval[p];
  258. if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
  259. ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
  260. dtbl->ehufco[i] = huffcode[p];
  261. dtbl->ehufsi[i] = huffsize[p];
  262. }
  263. }
  264. /* Outputting bytes to the file */
  265. /* Emit a byte, taking 'action' if must suspend. */
  266. #define emit_byte(state, val, action) { \
  267. *(state)->next_output_byte++ = (JOCTET)(val); \
  268. if (--(state)->free_in_buffer == 0) \
  269. if (!dump_buffer(state)) \
  270. { action; } \
  271. }
  272. LOCAL(boolean)
  273. dump_buffer(working_state *state)
  274. /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
  275. {
  276. struct jpeg_destination_mgr *dest = state->cinfo->dest;
  277. if (!(*dest->empty_output_buffer) (state->cinfo))
  278. return FALSE;
  279. /* After a successful buffer dump, must reset buffer pointers */
  280. state->next_output_byte = dest->next_output_byte;
  281. state->free_in_buffer = dest->free_in_buffer;
  282. return TRUE;
  283. }
  284. /* Outputting bits to the file */
  285. /* These macros perform the same task as the emit_bits() function in the
  286. * original libjpeg code. In addition to reducing overhead by explicitly
  287. * inlining the code, additional performance is achieved by taking into
  288. * account the size of the bit buffer and waiting until it is almost full
  289. * before emptying it. This mostly benefits 64-bit platforms, since 6
  290. * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
  291. */
  292. #define EMIT_BYTE() { \
  293. JOCTET c; \
  294. put_bits -= 8; \
  295. c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
  296. *buffer++ = c; \
  297. if (c == 0xFF) /* need to stuff a zero byte? */ \
  298. *buffer++ = 0; \
  299. }
  300. #define PUT_BITS(code, size) { \
  301. put_bits += size; \
  302. put_buffer = (put_buffer << size) | code; \
  303. }
  304. #if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
  305. #define CHECKBUF15() { \
  306. if (put_bits > 15) { \
  307. EMIT_BYTE() \
  308. EMIT_BYTE() \
  309. } \
  310. }
  311. #endif
  312. #define CHECKBUF31() { \
  313. if (put_bits > 31) { \
  314. EMIT_BYTE() \
  315. EMIT_BYTE() \
  316. EMIT_BYTE() \
  317. EMIT_BYTE() \
  318. } \
  319. }
  320. #define CHECKBUF47() { \
  321. if (put_bits > 47) { \
  322. EMIT_BYTE() \
  323. EMIT_BYTE() \
  324. EMIT_BYTE() \
  325. EMIT_BYTE() \
  326. EMIT_BYTE() \
  327. EMIT_BYTE() \
  328. } \
  329. }
  330. #if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)
  331. #error Cannot determine word size
  332. #endif
  333. #if SIZEOF_SIZE_T == 8 || defined(_WIN64)
  334. #define EMIT_BITS(code, size) { \
  335. CHECKBUF47() \
  336. PUT_BITS(code, size) \
  337. }
  338. #define EMIT_CODE(code, size) { \
  339. temp2 &= (((JLONG)1) << nbits) - 1; \
  340. CHECKBUF31() \
  341. PUT_BITS(code, size) \
  342. PUT_BITS(temp2, nbits) \
  343. }
  344. #else
  345. #define EMIT_BITS(code, size) { \
  346. PUT_BITS(code, size) \
  347. CHECKBUF15() \
  348. }
  349. #define EMIT_CODE(code, size) { \
  350. temp2 &= (((JLONG)1) << nbits) - 1; \
  351. PUT_BITS(code, size) \
  352. CHECKBUF15() \
  353. PUT_BITS(temp2, nbits) \
  354. CHECKBUF15() \
  355. }
  356. #endif
  357. /* Although it is exceedingly rare, it is possible for a Huffman-encoded
  358. * coefficient block to be larger than the 128-byte unencoded block. For each
  359. * of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can
  360. * theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per
  361. * encoded block.) If, for instance, one artificially sets the AC
  362. * coefficients to alternating values of 32767 and -32768 (using the JPEG
  363. * scanning order-- 1, 8, 16, etc.), then this will produce an encoded block
  364. * larger than 200 bytes.
  365. */
  366. #define BUFSIZE (DCTSIZE2 * 8)
  367. #define LOAD_BUFFER() { \
  368. if (state->free_in_buffer < BUFSIZE) { \
  369. localbuf = 1; \
  370. buffer = _buffer; \
  371. } else \
  372. buffer = state->next_output_byte; \
  373. }
  374. #define STORE_BUFFER() { \
  375. if (localbuf) { \
  376. bytes = buffer - _buffer; \
  377. buffer = _buffer; \
  378. while (bytes > 0) { \
  379. bytestocopy = MIN(bytes, state->free_in_buffer); \
  380. MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
  381. state->next_output_byte += bytestocopy; \
  382. buffer += bytestocopy; \
  383. state->free_in_buffer -= bytestocopy; \
  384. if (state->free_in_buffer == 0) \
  385. if (!dump_buffer(state)) return FALSE; \
  386. bytes -= bytestocopy; \
  387. } \
  388. } else { \
  389. state->free_in_buffer -= (buffer - state->next_output_byte); \
  390. state->next_output_byte = buffer; \
  391. } \
  392. }
  393. LOCAL(boolean)
  394. flush_bits(working_state *state)
  395. {
  396. JOCTET _buffer[BUFSIZE], *buffer;
  397. size_t put_buffer; int put_bits;
  398. size_t bytes, bytestocopy; int localbuf = 0;
  399. put_buffer = state->cur.put_buffer;
  400. put_bits = state->cur.put_bits;
  401. LOAD_BUFFER()
  402. /* fill any partial byte with ones */
  403. PUT_BITS(0x7F, 7)
  404. while (put_bits >= 8) EMIT_BYTE()
  405. state->cur.put_buffer = 0; /* and reset bit-buffer to empty */
  406. state->cur.put_bits = 0;
  407. STORE_BUFFER()
  408. return TRUE;
  409. }
  410. /* Encode a single block's worth of coefficients */
  411. LOCAL(boolean)
  412. encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
  413. c_derived_tbl *dctbl, c_derived_tbl *actbl)
  414. {
  415. JOCTET _buffer[BUFSIZE], *buffer;
  416. size_t bytes, bytestocopy; int localbuf = 0;
  417. LOAD_BUFFER()
  418. buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val,
  419. dctbl, actbl);
  420. STORE_BUFFER()
  421. return TRUE;
  422. }
  423. LOCAL(boolean)
  424. encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
  425. c_derived_tbl *dctbl, c_derived_tbl *actbl)
  426. {
  427. int temp, temp2, temp3;
  428. int nbits;
  429. int r, code, size;
  430. JOCTET _buffer[BUFSIZE], *buffer;
  431. size_t put_buffer; int put_bits;
  432. int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
  433. size_t bytes, bytestocopy; int localbuf = 0;
  434. put_buffer = state->cur.put_buffer;
  435. put_bits = state->cur.put_bits;
  436. LOAD_BUFFER()
  437. /* Encode the DC coefficient difference per section F.1.2.1 */
  438. temp = temp2 = block[0] - last_dc_val;
  439. /* This is a well-known technique for obtaining the absolute value without a
  440. * branch. It is derived from an assembly language technique presented in
  441. * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
  442. * Agner Fog.
  443. */
  444. temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
  445. temp ^= temp3;
  446. temp -= temp3;
  447. /* For a negative input, want temp2 = bitwise complement of abs(input) */
  448. /* This code assumes we are on a two's complement machine */
  449. temp2 += temp3;
  450. /* Find the number of bits needed for the magnitude of the coefficient */
  451. nbits = JPEG_NBITS(temp);
  452. /* Emit the Huffman-coded symbol for the number of bits */
  453. code = dctbl->ehufco[nbits];
  454. size = dctbl->ehufsi[nbits];
  455. EMIT_BITS(code, size)
  456. /* Mask off any extra bits in code */
  457. temp2 &= (((JLONG)1) << nbits) - 1;
  458. /* Emit that number of bits of the value, if positive, */
  459. /* or the complement of its magnitude, if negative. */
  460. EMIT_BITS(temp2, nbits)
  461. /* Encode the AC coefficients per section F.1.2.2 */
  462. r = 0; /* r = run length of zeros */
  463. /* Manually unroll the k loop to eliminate the counter variable. This
  464. * improves performance greatly on systems with a limited number of
  465. * registers (such as x86.)
  466. */
  467. #define kloop(jpeg_natural_order_of_k) { \
  468. if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
  469. r++; \
  470. } else { \
  471. temp2 = temp; \
  472. /* Branch-less absolute value, bitwise complement, etc., same as above */ \
  473. temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
  474. temp ^= temp3; \
  475. temp -= temp3; \
  476. temp2 += temp3; \
  477. nbits = JPEG_NBITS_NONZERO(temp); \
  478. /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
  479. while (r > 15) { \
  480. EMIT_BITS(code_0xf0, size_0xf0) \
  481. r -= 16; \
  482. } \
  483. /* Emit Huffman symbol for run length / number of bits */ \
  484. temp3 = (r << 4) + nbits; \
  485. code = actbl->ehufco[temp3]; \
  486. size = actbl->ehufsi[temp3]; \
  487. EMIT_CODE(code, size) \
  488. r = 0; \
  489. } \
  490. }
  491. /* One iteration for each value in jpeg_natural_order[] */
  492. kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);
  493. kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);
  494. kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);
  495. kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);
  496. kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);
  497. kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);
  498. kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);
  499. kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);
  500. kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);
  501. kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);
  502. kloop(55); kloop(62); kloop(63);
  503. /* If the last coef(s) were zero, emit an end-of-block code */
  504. if (r > 0) {
  505. code = actbl->ehufco[0];
  506. size = actbl->ehufsi[0];
  507. EMIT_BITS(code, size)
  508. }
  509. state->cur.put_buffer = put_buffer;
  510. state->cur.put_bits = put_bits;
  511. STORE_BUFFER()
  512. return TRUE;
  513. }
  514. /*
  515. * Emit a restart marker & resynchronize predictions.
  516. */
  517. LOCAL(boolean)
  518. emit_restart(working_state *state, int restart_num)
  519. {
  520. int ci;
  521. if (!flush_bits(state))
  522. return FALSE;
  523. emit_byte(state, 0xFF, return FALSE);
  524. emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
  525. /* Re-initialize DC predictions to 0 */
  526. for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
  527. state->cur.last_dc_val[ci] = 0;
  528. /* The restart counter is not updated until we successfully write the MCU. */
  529. return TRUE;
  530. }
  531. /*
  532. * Encode and output one MCU's worth of Huffman-compressed coefficients.
  533. */
  534. METHODDEF(boolean)
  535. encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
  536. {
  537. huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
  538. working_state state;
  539. int blkn, ci;
  540. jpeg_component_info *compptr;
  541. /* Load up working state */
  542. state.next_output_byte = cinfo->dest->next_output_byte;
  543. state.free_in_buffer = cinfo->dest->free_in_buffer;
  544. ASSIGN_STATE(state.cur, entropy->saved);
  545. state.cinfo = cinfo;
  546. /* Emit restart marker if needed */
  547. if (cinfo->restart_interval) {
  548. if (entropy->restarts_to_go == 0)
  549. if (!emit_restart(&state, entropy->next_restart_num))
  550. return FALSE;
  551. }
  552. /* Encode the MCU data blocks */
  553. if (entropy->simd) {
  554. for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
  555. ci = cinfo->MCU_membership[blkn];
  556. compptr = cinfo->cur_comp_info[ci];
  557. if (!encode_one_block_simd(&state,
  558. MCU_data[blkn][0], state.cur.last_dc_val[ci],
  559. entropy->dc_derived_tbls[compptr->dc_tbl_no],
  560. entropy->ac_derived_tbls[compptr->ac_tbl_no]))
  561. return FALSE;
  562. /* Update last_dc_val */
  563. state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
  564. }
  565. } else {
  566. for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
  567. ci = cinfo->MCU_membership[blkn];
  568. compptr = cinfo->cur_comp_info[ci];
  569. if (!encode_one_block(&state,
  570. MCU_data[blkn][0], state.cur.last_dc_val[ci],
  571. entropy->dc_derived_tbls[compptr->dc_tbl_no],
  572. entropy->ac_derived_tbls[compptr->ac_tbl_no]))
  573. return FALSE;
  574. /* Update last_dc_val */
  575. state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
  576. }
  577. }
  578. /* Completed MCU, so update state */
  579. cinfo->dest->next_output_byte = state.next_output_byte;
  580. cinfo->dest->free_in_buffer = state.free_in_buffer;
  581. ASSIGN_STATE(entropy->saved, state.cur);
  582. /* Update restart-interval state too */
  583. if (cinfo->restart_interval) {
  584. if (entropy->restarts_to_go == 0) {
  585. entropy->restarts_to_go = cinfo->restart_interval;
  586. entropy->next_restart_num++;
  587. entropy->next_restart_num &= 7;
  588. }
  589. entropy->restarts_to_go--;
  590. }
  591. return TRUE;
  592. }
  593. /*
  594. * Finish up at the end of a Huffman-compressed scan.
  595. */
  596. METHODDEF(void)
  597. finish_pass_huff(j_compress_ptr cinfo)
  598. {
  599. huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
  600. working_state state;
  601. /* Load up working state ... flush_bits needs it */
  602. state.next_output_byte = cinfo->dest->next_output_byte;
  603. state.free_in_buffer = cinfo->dest->free_in_buffer;
  604. ASSIGN_STATE(state.cur, entropy->saved);
  605. state.cinfo = cinfo;
  606. /* Flush out the last data */
  607. if (!flush_bits(&state))
  608. ERREXIT(cinfo, JERR_CANT_SUSPEND);
  609. /* Update state */
  610. cinfo->dest->next_output_byte = state.next_output_byte;
  611. cinfo->dest->free_in_buffer = state.free_in_buffer;
  612. ASSIGN_STATE(entropy->saved, state.cur);
  613. }
  614. /*
  615. * Huffman coding optimization.
  616. *
  617. * We first scan the supplied data and count the number of uses of each symbol
  618. * that is to be Huffman-coded. (This process MUST agree with the code above.)
  619. * Then we build a Huffman coding tree for the observed counts.
  620. * Symbols which are not needed at all for the particular image are not
  621. * assigned any code, which saves space in the DHT marker as well as in
  622. * the compressed data.
  623. */
  624. #ifdef ENTROPY_OPT_SUPPORTED
  625. /* Process a single block's worth of coefficients */
  626. LOCAL(void)
  627. htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
  628. long dc_counts[], long ac_counts[])
  629. {
  630. register int temp;
  631. register int nbits;
  632. register int k, r;
  633. /* Encode the DC coefficient difference per section F.1.2.1 */
  634. temp = block[0] - last_dc_val;
  635. if (temp < 0)
  636. temp = -temp;
  637. /* Find the number of bits needed for the magnitude of the coefficient */
  638. nbits = 0;
  639. while (temp) {
  640. nbits++;
  641. temp >>= 1;
  642. }
  643. /* Check for out-of-range coefficient values.
  644. * Since we're encoding a difference, the range limit is twice as much.
  645. */
  646. if (nbits > MAX_COEF_BITS + 1)
  647. ERREXIT(cinfo, JERR_BAD_DCT_COEF);
  648. /* Count the Huffman symbol for the number of bits */
  649. dc_counts[nbits]++;
  650. /* Encode the AC coefficients per section F.1.2.2 */
  651. r = 0; /* r = run length of zeros */
  652. for (k = 1; k < DCTSIZE2; k++) {
  653. if ((temp = block[jpeg_natural_order[k]]) == 0) {
  654. r++;
  655. } else {
  656. /* if run length > 15, must emit special run-length-16 codes (0xF0) */
  657. while (r > 15) {
  658. ac_counts[0xF0]++;
  659. r -= 16;
  660. }
  661. /* Find the number of bits needed for the magnitude of the coefficient */
  662. if (temp < 0)
  663. temp = -temp;
  664. /* Find the number of bits needed for the magnitude of the coefficient */
  665. nbits = 1; /* there must be at least one 1 bit */
  666. while ((temp >>= 1))
  667. nbits++;
  668. /* Check for out-of-range coefficient values */
  669. if (nbits > MAX_COEF_BITS)
  670. ERREXIT(cinfo, JERR_BAD_DCT_COEF);
  671. /* Count Huffman symbol for run length / number of bits */
  672. ac_counts[(r << 4) + nbits]++;
  673. r = 0;
  674. }
  675. }
  676. /* If the last coef(s) were zero, emit an end-of-block code */
  677. if (r > 0)
  678. ac_counts[0]++;
  679. }
  680. /*
  681. * Trial-encode one MCU's worth of Huffman-compressed coefficients.
  682. * No data is actually output, so no suspension return is possible.
  683. */
  684. METHODDEF(boolean)
  685. encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
  686. {
  687. huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
  688. int blkn, ci;
  689. jpeg_component_info *compptr;
  690. /* Take care of restart intervals if needed */
  691. if (cinfo->restart_interval) {
  692. if (entropy->restarts_to_go == 0) {
  693. /* Re-initialize DC predictions to 0 */
  694. for (ci = 0; ci < cinfo->comps_in_scan; ci++)
  695. entropy->saved.last_dc_val[ci] = 0;
  696. /* Update restart state */
  697. entropy->restarts_to_go = cinfo->restart_interval;
  698. }
  699. entropy->restarts_to_go--;
  700. }
  701. for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
  702. ci = cinfo->MCU_membership[blkn];
  703. compptr = cinfo->cur_comp_info[ci];
  704. htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
  705. entropy->dc_count_ptrs[compptr->dc_tbl_no],
  706. entropy->ac_count_ptrs[compptr->ac_tbl_no]);
  707. entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
  708. }
  709. return TRUE;
  710. }
  711. /*
  712. * Generate the best Huffman code table for the given counts, fill htbl.
  713. * Note this is also used by jcphuff.c.
  714. *
  715. * The JPEG standard requires that no symbol be assigned a codeword of all
  716. * one bits (so that padding bits added at the end of a compressed segment
  717. * can't look like a valid code). Because of the canonical ordering of
  718. * codewords, this just means that there must be an unused slot in the
  719. * longest codeword length category. Annex K (Clause K.2) of
  720. * Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot
  721. * by pretending that symbol 256 is a valid symbol with count 1. In theory
  722. * that's not optimal; giving it count zero but including it in the symbol set
  723. * anyway should give a better Huffman code. But the theoretically better code
  724. * actually seems to come out worse in practice, because it produces more
  725. * all-ones bytes (which incur stuffed zero bytes in the final file). In any
  726. * case the difference is tiny.
  727. *
  728. * The JPEG standard requires Huffman codes to be no more than 16 bits long.
  729. * If some symbols have a very small but nonzero probability, the Huffman tree
  730. * must be adjusted to meet the code length restriction. We currently use
  731. * the adjustment method suggested in JPEG section K.2. This method is *not*
  732. * optimal; it may not choose the best possible limited-length code. But
  733. * typically only very-low-frequency symbols will be given less-than-optimal
  734. * lengths, so the code is almost optimal. Experimental comparisons against
  735. * an optimal limited-length-code algorithm indicate that the difference is
  736. * microscopic --- usually less than a hundredth of a percent of total size.
  737. * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
  738. */
  739. GLOBAL(void)
  740. jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
  741. {
  742. #define MAX_CLEN 32 /* assumed maximum initial code length */
  743. UINT8 bits[MAX_CLEN + 1]; /* bits[k] = # of symbols with code length k */
  744. int codesize[257]; /* codesize[k] = code length of symbol k */
  745. int others[257]; /* next symbol in current branch of tree */
  746. int c1, c2;
  747. int p, i, j;
  748. long v;
  749. /* This algorithm is explained in section K.2 of the JPEG standard */
  750. MEMZERO(bits, sizeof(bits));
  751. MEMZERO(codesize, sizeof(codesize));
  752. for (i = 0; i < 257; i++)
  753. others[i] = -1; /* init links to empty */
  754. freq[256] = 1; /* make sure 256 has a nonzero count */
  755. /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
  756. * that no real symbol is given code-value of all ones, because 256
  757. * will be placed last in the largest codeword category.
  758. */
  759. /* Huffman's basic algorithm to assign optimal code lengths to symbols */
  760. for (;;) {
  761. /* Find the smallest nonzero frequency, set c1 = its symbol */
  762. /* In case of ties, take the larger symbol number */
  763. c1 = -1;
  764. v = 1000000000L;
  765. for (i = 0; i <= 256; i++) {
  766. if (freq[i] && freq[i] <= v) {
  767. v = freq[i];
  768. c1 = i;
  769. }
  770. }
  771. /* Find the next smallest nonzero frequency, set c2 = its symbol */
  772. /* In case of ties, take the larger symbol number */
  773. c2 = -1;
  774. v = 1000000000L;
  775. for (i = 0; i <= 256; i++) {
  776. if (freq[i] && freq[i] <= v && i != c1) {
  777. v = freq[i];
  778. c2 = i;
  779. }
  780. }
  781. /* Done if we've merged everything into one frequency */
  782. if (c2 < 0)
  783. break;
  784. /* Else merge the two counts/trees */
  785. freq[c1] += freq[c2];
  786. freq[c2] = 0;
  787. /* Increment the codesize of everything in c1's tree branch */
  788. codesize[c1]++;
  789. while (others[c1] >= 0) {
  790. c1 = others[c1];
  791. codesize[c1]++;
  792. }
  793. others[c1] = c2; /* chain c2 onto c1's tree branch */
  794. /* Increment the codesize of everything in c2's tree branch */
  795. codesize[c2]++;
  796. while (others[c2] >= 0) {
  797. c2 = others[c2];
  798. codesize[c2]++;
  799. }
  800. }
  801. /* Now count the number of symbols of each code length */
  802. for (i = 0; i <= 256; i++) {
  803. if (codesize[i]) {
  804. /* The JPEG standard seems to think that this can't happen, */
  805. /* but I'm paranoid... */
  806. if (codesize[i] > MAX_CLEN)
  807. ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
  808. bits[codesize[i]]++;
  809. }
  810. }
  811. /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
  812. * Huffman procedure assigned any such lengths, we must adjust the coding.
  813. * Here is what Rec. ITU-T T.81 | ISO/IEC 10918-1 says about how this next
  814. * bit works: Since symbols are paired for the longest Huffman code, the
  815. * symbols are removed from this length category two at a time. The prefix
  816. * for the pair (which is one bit shorter) is allocated to one of the pair;
  817. * then, skipping the BITS entry for that prefix length, a code word from the
  818. * next shortest nonzero BITS entry is converted into a prefix for two code
  819. * words one bit longer.
  820. */
  821. for (i = MAX_CLEN; i > 16; i--) {
  822. while (bits[i] > 0) {
  823. j = i - 2; /* find length of new prefix to be used */
  824. while (bits[j] == 0)
  825. j--;
  826. bits[i] -= 2; /* remove two symbols */
  827. bits[i - 1]++; /* one goes in this length */
  828. bits[j + 1] += 2; /* two new symbols in this length */
  829. bits[j]--; /* symbol of this length is now a prefix */
  830. }
  831. }
  832. /* Remove the count for the pseudo-symbol 256 from the largest codelength */
  833. while (bits[i] == 0) /* find largest codelength still in use */
  834. i--;
  835. bits[i]--;
  836. /* Return final symbol counts (only for lengths 0..16) */
  837. MEMCOPY(htbl->bits, bits, sizeof(htbl->bits));
  838. /* Return a list of the symbols sorted by code length */
  839. /* It's not real clear to me why we don't need to consider the codelength
  840. * changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think
  841. * this works.
  842. */
  843. p = 0;
  844. for (i = 1; i <= MAX_CLEN; i++) {
  845. for (j = 0; j <= 255; j++) {
  846. if (codesize[j] == i) {
  847. htbl->huffval[p] = (UINT8)j;
  848. p++;
  849. }
  850. }
  851. }
  852. /* Set sent_table FALSE so updated table will be written to JPEG file. */
  853. htbl->sent_table = FALSE;
  854. }
  855. /*
  856. * Finish up a statistics-gathering pass and create the new Huffman tables.
  857. */
  858. METHODDEF(void)
  859. finish_pass_gather(j_compress_ptr cinfo)
  860. {
  861. huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
  862. int ci, dctbl, actbl;
  863. jpeg_component_info *compptr;
  864. JHUFF_TBL **htblptr;
  865. boolean did_dc[NUM_HUFF_TBLS];
  866. boolean did_ac[NUM_HUFF_TBLS];
  867. /* It's important not to apply jpeg_gen_optimal_table more than once
  868. * per table, because it clobbers the input frequency counts!
  869. */
  870. MEMZERO(did_dc, sizeof(did_dc));
  871. MEMZERO(did_ac, sizeof(did_ac));
  872. for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
  873. compptr = cinfo->cur_comp_info[ci];
  874. dctbl = compptr->dc_tbl_no;
  875. actbl = compptr->ac_tbl_no;
  876. if (!did_dc[dctbl]) {
  877. htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl];
  878. if (*htblptr == NULL)
  879. *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
  880. jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
  881. did_dc[dctbl] = TRUE;
  882. }
  883. if (!did_ac[actbl]) {
  884. htblptr = &cinfo->ac_huff_tbl_ptrs[actbl];
  885. if (*htblptr == NULL)
  886. *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
  887. jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
  888. did_ac[actbl] = TRUE;
  889. }
  890. }
  891. }
  892. #endif /* ENTROPY_OPT_SUPPORTED */
  893. /*
  894. * Module initialization routine for Huffman entropy encoding.
  895. */
  896. GLOBAL(void)
  897. jinit_huff_encoder(j_compress_ptr cinfo)
  898. {
  899. huff_entropy_ptr entropy;
  900. int i;
  901. entropy = (huff_entropy_ptr)
  902. (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
  903. sizeof(huff_entropy_encoder));
  904. cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
  905. entropy->pub.start_pass = start_pass_huff;
  906. /* Mark tables unallocated */
  907. for (i = 0; i < NUM_HUFF_TBLS; i++) {
  908. entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
  909. #ifdef ENTROPY_OPT_SUPPORTED
  910. entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
  911. #endif
  912. }
  913. }