jsimd.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076
  1. /*
  2. * jsimd_x86_64.c
  3. *
  4. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
  6. * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
  7. *
  8. * Based on the x86 SIMD extension for IJG JPEG library,
  9. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  10. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  11. *
  12. * This file contains the interface between the "normal" portions
  13. * of the library and the SIMD implementations when running on a
  14. * 64-bit x86 architecture.
  15. */
  16. #define JPEG_INTERNALS
  17. #include "../../jinclude.h"
  18. #include "../../jpeglib.h"
  19. #include "../../jsimd.h"
  20. #include "../../jdct.h"
  21. #include "../../jsimddct.h"
  22. #include "../jsimd.h"
  23. #include "jconfigint.h"
  24. /*
  25. * In the PIC cases, we have no guarantee that constants will keep
  26. * their alignment. This macro allows us to verify it at runtime.
  27. */
  28. #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
  29. #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
  30. #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
  31. static unsigned int simd_support = (unsigned int)(~0);
  32. static unsigned int simd_huffman = 1;
  33. /*
  34. * Check what SIMD accelerations are supported.
  35. *
  36. * FIXME: This code is racy under a multi-threaded environment.
  37. */
  38. LOCAL(void)
  39. init_simd(void)
  40. {
  41. #ifndef NO_GETENV
  42. char *env = NULL;
  43. #endif
  44. if (simd_support != ~0U)
  45. return;
  46. simd_support = jpeg_simd_cpu_support();
  47. #ifndef NO_GETENV
  48. /* Force different settings through environment variables */
  49. env = getenv("JSIMD_FORCESSE2");
  50. if ((env != NULL) && (strcmp(env, "1") == 0))
  51. simd_support &= JSIMD_SSE2;
  52. env = getenv("JSIMD_FORCEAVX2");
  53. if ((env != NULL) && (strcmp(env, "1") == 0))
  54. simd_support &= JSIMD_AVX2;
  55. env = getenv("JSIMD_FORCENONE");
  56. if ((env != NULL) && (strcmp(env, "1") == 0))
  57. simd_support = 0;
  58. env = getenv("JSIMD_NOHUFFENC");
  59. if ((env != NULL) && (strcmp(env, "1") == 0))
  60. simd_huffman = 0;
  61. #endif
  62. }
  63. GLOBAL(int)
  64. jsimd_can_rgb_ycc(void)
  65. {
  66. init_simd();
  67. /* The code is optimised for these values only */
  68. if (BITS_IN_JSAMPLE != 8)
  69. return 0;
  70. if (sizeof(JDIMENSION) != 4)
  71. return 0;
  72. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  73. return 0;
  74. if ((simd_support & JSIMD_AVX2) &&
  75. IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
  76. return 1;
  77. if ((simd_support & JSIMD_SSE2) &&
  78. IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
  79. return 1;
  80. return 0;
  81. }
  82. GLOBAL(int)
  83. jsimd_can_rgb_gray(void)
  84. {
  85. init_simd();
  86. /* The code is optimised for these values only */
  87. if (BITS_IN_JSAMPLE != 8)
  88. return 0;
  89. if (sizeof(JDIMENSION) != 4)
  90. return 0;
  91. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  92. return 0;
  93. if ((simd_support & JSIMD_AVX2) &&
  94. IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
  95. return 1;
  96. if ((simd_support & JSIMD_SSE2) &&
  97. IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
  98. return 1;
  99. return 0;
  100. }
  101. GLOBAL(int)
  102. jsimd_can_ycc_rgb(void)
  103. {
  104. init_simd();
  105. /* The code is optimised for these values only */
  106. if (BITS_IN_JSAMPLE != 8)
  107. return 0;
  108. if (sizeof(JDIMENSION) != 4)
  109. return 0;
  110. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  111. return 0;
  112. if ((simd_support & JSIMD_AVX2) &&
  113. IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
  114. return 1;
  115. if ((simd_support & JSIMD_SSE2) &&
  116. IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
  117. return 1;
  118. return 0;
  119. }
  120. GLOBAL(int)
  121. jsimd_can_ycc_rgb565(void)
  122. {
  123. return 0;
  124. }
  125. GLOBAL(void)
  126. jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  127. JSAMPIMAGE output_buf, JDIMENSION output_row,
  128. int num_rows)
  129. {
  130. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  131. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  132. switch (cinfo->in_color_space) {
  133. case JCS_EXT_RGB:
  134. avx2fct = jsimd_extrgb_ycc_convert_avx2;
  135. sse2fct = jsimd_extrgb_ycc_convert_sse2;
  136. break;
  137. case JCS_EXT_RGBX:
  138. case JCS_EXT_RGBA:
  139. avx2fct = jsimd_extrgbx_ycc_convert_avx2;
  140. sse2fct = jsimd_extrgbx_ycc_convert_sse2;
  141. break;
  142. case JCS_EXT_BGR:
  143. avx2fct = jsimd_extbgr_ycc_convert_avx2;
  144. sse2fct = jsimd_extbgr_ycc_convert_sse2;
  145. break;
  146. case JCS_EXT_BGRX:
  147. case JCS_EXT_BGRA:
  148. avx2fct = jsimd_extbgrx_ycc_convert_avx2;
  149. sse2fct = jsimd_extbgrx_ycc_convert_sse2;
  150. break;
  151. case JCS_EXT_XBGR:
  152. case JCS_EXT_ABGR:
  153. avx2fct = jsimd_extxbgr_ycc_convert_avx2;
  154. sse2fct = jsimd_extxbgr_ycc_convert_sse2;
  155. break;
  156. case JCS_EXT_XRGB:
  157. case JCS_EXT_ARGB:
  158. avx2fct = jsimd_extxrgb_ycc_convert_avx2;
  159. sse2fct = jsimd_extxrgb_ycc_convert_sse2;
  160. break;
  161. default:
  162. avx2fct = jsimd_rgb_ycc_convert_avx2;
  163. sse2fct = jsimd_rgb_ycc_convert_sse2;
  164. break;
  165. }
  166. if (simd_support & JSIMD_AVX2)
  167. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  168. else
  169. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  170. }
  171. GLOBAL(void)
  172. jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  173. JSAMPIMAGE output_buf, JDIMENSION output_row,
  174. int num_rows)
  175. {
  176. void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  177. void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  178. switch (cinfo->in_color_space) {
  179. case JCS_EXT_RGB:
  180. avx2fct = jsimd_extrgb_gray_convert_avx2;
  181. sse2fct = jsimd_extrgb_gray_convert_sse2;
  182. break;
  183. case JCS_EXT_RGBX:
  184. case JCS_EXT_RGBA:
  185. avx2fct = jsimd_extrgbx_gray_convert_avx2;
  186. sse2fct = jsimd_extrgbx_gray_convert_sse2;
  187. break;
  188. case JCS_EXT_BGR:
  189. avx2fct = jsimd_extbgr_gray_convert_avx2;
  190. sse2fct = jsimd_extbgr_gray_convert_sse2;
  191. break;
  192. case JCS_EXT_BGRX:
  193. case JCS_EXT_BGRA:
  194. avx2fct = jsimd_extbgrx_gray_convert_avx2;
  195. sse2fct = jsimd_extbgrx_gray_convert_sse2;
  196. break;
  197. case JCS_EXT_XBGR:
  198. case JCS_EXT_ABGR:
  199. avx2fct = jsimd_extxbgr_gray_convert_avx2;
  200. sse2fct = jsimd_extxbgr_gray_convert_sse2;
  201. break;
  202. case JCS_EXT_XRGB:
  203. case JCS_EXT_ARGB:
  204. avx2fct = jsimd_extxrgb_gray_convert_avx2;
  205. sse2fct = jsimd_extxrgb_gray_convert_sse2;
  206. break;
  207. default:
  208. avx2fct = jsimd_rgb_gray_convert_avx2;
  209. sse2fct = jsimd_rgb_gray_convert_sse2;
  210. break;
  211. }
  212. if (simd_support & JSIMD_AVX2)
  213. avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  214. else
  215. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  216. }
  217. GLOBAL(void)
  218. jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  219. JDIMENSION input_row, JSAMPARRAY output_buf,
  220. int num_rows)
  221. {
  222. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  223. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  224. switch (cinfo->out_color_space) {
  225. case JCS_EXT_RGB:
  226. avx2fct = jsimd_ycc_extrgb_convert_avx2;
  227. sse2fct = jsimd_ycc_extrgb_convert_sse2;
  228. break;
  229. case JCS_EXT_RGBX:
  230. case JCS_EXT_RGBA:
  231. avx2fct = jsimd_ycc_extrgbx_convert_avx2;
  232. sse2fct = jsimd_ycc_extrgbx_convert_sse2;
  233. break;
  234. case JCS_EXT_BGR:
  235. avx2fct = jsimd_ycc_extbgr_convert_avx2;
  236. sse2fct = jsimd_ycc_extbgr_convert_sse2;
  237. break;
  238. case JCS_EXT_BGRX:
  239. case JCS_EXT_BGRA:
  240. avx2fct = jsimd_ycc_extbgrx_convert_avx2;
  241. sse2fct = jsimd_ycc_extbgrx_convert_sse2;
  242. break;
  243. case JCS_EXT_XBGR:
  244. case JCS_EXT_ABGR:
  245. avx2fct = jsimd_ycc_extxbgr_convert_avx2;
  246. sse2fct = jsimd_ycc_extxbgr_convert_sse2;
  247. break;
  248. case JCS_EXT_XRGB:
  249. case JCS_EXT_ARGB:
  250. avx2fct = jsimd_ycc_extxrgb_convert_avx2;
  251. sse2fct = jsimd_ycc_extxrgb_convert_sse2;
  252. break;
  253. default:
  254. avx2fct = jsimd_ycc_rgb_convert_avx2;
  255. sse2fct = jsimd_ycc_rgb_convert_sse2;
  256. break;
  257. }
  258. if (simd_support & JSIMD_AVX2)
  259. avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  260. else
  261. sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  262. }
  263. GLOBAL(void)
  264. jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  265. JDIMENSION input_row, JSAMPARRAY output_buf,
  266. int num_rows)
  267. {
  268. }
  269. GLOBAL(int)
  270. jsimd_can_h2v2_downsample(void)
  271. {
  272. init_simd();
  273. /* The code is optimised for these values only */
  274. if (BITS_IN_JSAMPLE != 8)
  275. return 0;
  276. if (sizeof(JDIMENSION) != 4)
  277. return 0;
  278. if (simd_support & JSIMD_AVX2)
  279. return 1;
  280. if (simd_support & JSIMD_SSE2)
  281. return 1;
  282. return 0;
  283. }
  284. GLOBAL(int)
  285. jsimd_can_h2v1_downsample(void)
  286. {
  287. init_simd();
  288. /* The code is optimised for these values only */
  289. if (BITS_IN_JSAMPLE != 8)
  290. return 0;
  291. if (sizeof(JDIMENSION) != 4)
  292. return 0;
  293. if (simd_support & JSIMD_AVX2)
  294. return 1;
  295. if (simd_support & JSIMD_SSE2)
  296. return 1;
  297. return 0;
  298. }
  299. GLOBAL(void)
  300. jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  301. JSAMPARRAY input_data, JSAMPARRAY output_data)
  302. {
  303. if (simd_support & JSIMD_AVX2)
  304. jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  305. compptr->v_samp_factor,
  306. compptr->width_in_blocks, input_data,
  307. output_data);
  308. else
  309. jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  310. compptr->v_samp_factor,
  311. compptr->width_in_blocks, input_data,
  312. output_data);
  313. }
  314. GLOBAL(void)
  315. jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  316. JSAMPARRAY input_data, JSAMPARRAY output_data)
  317. {
  318. if (simd_support & JSIMD_AVX2)
  319. jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
  320. compptr->v_samp_factor,
  321. compptr->width_in_blocks, input_data,
  322. output_data);
  323. else
  324. jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  325. compptr->v_samp_factor,
  326. compptr->width_in_blocks, input_data,
  327. output_data);
  328. }
  329. GLOBAL(int)
  330. jsimd_can_h2v2_upsample(void)
  331. {
  332. init_simd();
  333. /* The code is optimised for these values only */
  334. if (BITS_IN_JSAMPLE != 8)
  335. return 0;
  336. if (sizeof(JDIMENSION) != 4)
  337. return 0;
  338. if (simd_support & JSIMD_AVX2)
  339. return 1;
  340. if (simd_support & JSIMD_SSE2)
  341. return 1;
  342. return 0;
  343. }
  344. GLOBAL(int)
  345. jsimd_can_h2v1_upsample(void)
  346. {
  347. init_simd();
  348. /* The code is optimised for these values only */
  349. if (BITS_IN_JSAMPLE != 8)
  350. return 0;
  351. if (sizeof(JDIMENSION) != 4)
  352. return 0;
  353. if (simd_support & JSIMD_AVX2)
  354. return 1;
  355. if (simd_support & JSIMD_SSE2)
  356. return 1;
  357. return 0;
  358. }
  359. GLOBAL(void)
  360. jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  361. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  362. {
  363. if (simd_support & JSIMD_AVX2)
  364. jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  365. input_data, output_data_ptr);
  366. else
  367. jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  368. input_data, output_data_ptr);
  369. }
  370. GLOBAL(void)
  371. jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  372. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  373. {
  374. if (simd_support & JSIMD_AVX2)
  375. jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
  376. input_data, output_data_ptr);
  377. else
  378. jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  379. input_data, output_data_ptr);
  380. }
  381. GLOBAL(int)
  382. jsimd_can_h2v2_fancy_upsample(void)
  383. {
  384. init_simd();
  385. /* The code is optimised for these values only */
  386. if (BITS_IN_JSAMPLE != 8)
  387. return 0;
  388. if (sizeof(JDIMENSION) != 4)
  389. return 0;
  390. if ((simd_support & JSIMD_AVX2) &&
  391. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  392. return 1;
  393. if ((simd_support & JSIMD_SSE2) &&
  394. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  395. return 1;
  396. return 0;
  397. }
  398. GLOBAL(int)
  399. jsimd_can_h2v1_fancy_upsample(void)
  400. {
  401. init_simd();
  402. /* The code is optimised for these values only */
  403. if (BITS_IN_JSAMPLE != 8)
  404. return 0;
  405. if (sizeof(JDIMENSION) != 4)
  406. return 0;
  407. if ((simd_support & JSIMD_AVX2) &&
  408. IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
  409. return 1;
  410. if ((simd_support & JSIMD_SSE2) &&
  411. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  412. return 1;
  413. return 0;
  414. }
  415. GLOBAL(void)
  416. jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  417. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  418. {
  419. if (simd_support & JSIMD_AVX2)
  420. jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  421. compptr->downsampled_width, input_data,
  422. output_data_ptr);
  423. else
  424. jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  425. compptr->downsampled_width, input_data,
  426. output_data_ptr);
  427. }
  428. GLOBAL(void)
  429. jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  430. JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
  431. {
  432. if (simd_support & JSIMD_AVX2)
  433. jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
  434. compptr->downsampled_width, input_data,
  435. output_data_ptr);
  436. else
  437. jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  438. compptr->downsampled_width, input_data,
  439. output_data_ptr);
  440. }
  441. GLOBAL(int)
  442. jsimd_can_h2v2_merged_upsample(void)
  443. {
  444. init_simd();
  445. /* The code is optimised for these values only */
  446. if (BITS_IN_JSAMPLE != 8)
  447. return 0;
  448. if (sizeof(JDIMENSION) != 4)
  449. return 0;
  450. if ((simd_support & JSIMD_AVX2) &&
  451. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  452. return 1;
  453. if ((simd_support & JSIMD_SSE2) &&
  454. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  455. return 1;
  456. return 0;
  457. }
  458. GLOBAL(int)
  459. jsimd_can_h2v1_merged_upsample(void)
  460. {
  461. init_simd();
  462. /* The code is optimised for these values only */
  463. if (BITS_IN_JSAMPLE != 8)
  464. return 0;
  465. if (sizeof(JDIMENSION) != 4)
  466. return 0;
  467. if ((simd_support & JSIMD_AVX2) &&
  468. IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
  469. return 1;
  470. if ((simd_support & JSIMD_SSE2) &&
  471. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  472. return 1;
  473. return 0;
  474. }
  475. GLOBAL(void)
  476. jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  477. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  478. {
  479. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  480. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  481. switch (cinfo->out_color_space) {
  482. case JCS_EXT_RGB:
  483. avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
  484. sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
  485. break;
  486. case JCS_EXT_RGBX:
  487. case JCS_EXT_RGBA:
  488. avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
  489. sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
  490. break;
  491. case JCS_EXT_BGR:
  492. avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
  493. sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
  494. break;
  495. case JCS_EXT_BGRX:
  496. case JCS_EXT_BGRA:
  497. avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
  498. sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
  499. break;
  500. case JCS_EXT_XBGR:
  501. case JCS_EXT_ABGR:
  502. avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
  503. sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
  504. break;
  505. case JCS_EXT_XRGB:
  506. case JCS_EXT_ARGB:
  507. avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
  508. sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
  509. break;
  510. default:
  511. avx2fct = jsimd_h2v2_merged_upsample_avx2;
  512. sse2fct = jsimd_h2v2_merged_upsample_sse2;
  513. break;
  514. }
  515. if (simd_support & JSIMD_AVX2)
  516. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  517. else
  518. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  519. }
  520. GLOBAL(void)
  521. jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  522. JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
  523. {
  524. void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  525. void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  526. switch (cinfo->out_color_space) {
  527. case JCS_EXT_RGB:
  528. avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
  529. sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
  530. break;
  531. case JCS_EXT_RGBX:
  532. case JCS_EXT_RGBA:
  533. avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
  534. sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
  535. break;
  536. case JCS_EXT_BGR:
  537. avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
  538. sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
  539. break;
  540. case JCS_EXT_BGRX:
  541. case JCS_EXT_BGRA:
  542. avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
  543. sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
  544. break;
  545. case JCS_EXT_XBGR:
  546. case JCS_EXT_ABGR:
  547. avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
  548. sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
  549. break;
  550. case JCS_EXT_XRGB:
  551. case JCS_EXT_ARGB:
  552. avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
  553. sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
  554. break;
  555. default:
  556. avx2fct = jsimd_h2v1_merged_upsample_avx2;
  557. sse2fct = jsimd_h2v1_merged_upsample_sse2;
  558. break;
  559. }
  560. if (simd_support & JSIMD_AVX2)
  561. avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  562. else
  563. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  564. }
  565. GLOBAL(int)
  566. jsimd_can_convsamp(void)
  567. {
  568. init_simd();
  569. /* The code is optimised for these values only */
  570. if (DCTSIZE != 8)
  571. return 0;
  572. if (BITS_IN_JSAMPLE != 8)
  573. return 0;
  574. if (sizeof(JDIMENSION) != 4)
  575. return 0;
  576. if (sizeof(DCTELEM) != 2)
  577. return 0;
  578. if (simd_support & JSIMD_AVX2)
  579. return 1;
  580. if (simd_support & JSIMD_SSE2)
  581. return 1;
  582. return 0;
  583. }
  584. GLOBAL(int)
  585. jsimd_can_convsamp_float(void)
  586. {
  587. init_simd();
  588. /* The code is optimised for these values only */
  589. if (DCTSIZE != 8)
  590. return 0;
  591. if (BITS_IN_JSAMPLE != 8)
  592. return 0;
  593. if (sizeof(JDIMENSION) != 4)
  594. return 0;
  595. if (sizeof(FAST_FLOAT) != 4)
  596. return 0;
  597. if (simd_support & JSIMD_SSE2)
  598. return 1;
  599. return 0;
  600. }
  601. GLOBAL(void)
  602. jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
  603. DCTELEM *workspace)
  604. {
  605. if (simd_support & JSIMD_AVX2)
  606. jsimd_convsamp_avx2(sample_data, start_col, workspace);
  607. else
  608. jsimd_convsamp_sse2(sample_data, start_col, workspace);
  609. }
  610. GLOBAL(void)
  611. jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
  612. FAST_FLOAT *workspace)
  613. {
  614. jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
  615. }
  616. GLOBAL(int)
  617. jsimd_can_fdct_islow(void)
  618. {
  619. init_simd();
  620. /* The code is optimised for these values only */
  621. if (DCTSIZE != 8)
  622. return 0;
  623. if (sizeof(DCTELEM) != 2)
  624. return 0;
  625. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
  626. return 1;
  627. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
  628. return 1;
  629. return 0;
  630. }
  631. GLOBAL(int)
  632. jsimd_can_fdct_ifast(void)
  633. {
  634. init_simd();
  635. /* The code is optimised for these values only */
  636. if (DCTSIZE != 8)
  637. return 0;
  638. if (sizeof(DCTELEM) != 2)
  639. return 0;
  640. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
  641. return 1;
  642. return 0;
  643. }
  644. GLOBAL(int)
  645. jsimd_can_fdct_float(void)
  646. {
  647. init_simd();
  648. /* The code is optimised for these values only */
  649. if (DCTSIZE != 8)
  650. return 0;
  651. if (sizeof(FAST_FLOAT) != 4)
  652. return 0;
  653. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
  654. return 1;
  655. return 0;
  656. }
  657. GLOBAL(void)
  658. jsimd_fdct_islow(DCTELEM *data)
  659. {
  660. if (simd_support & JSIMD_AVX2)
  661. jsimd_fdct_islow_avx2(data);
  662. else
  663. jsimd_fdct_islow_sse2(data);
  664. }
  665. GLOBAL(void)
  666. jsimd_fdct_ifast(DCTELEM *data)
  667. {
  668. jsimd_fdct_ifast_sse2(data);
  669. }
  670. GLOBAL(void)
  671. jsimd_fdct_float(FAST_FLOAT *data)
  672. {
  673. jsimd_fdct_float_sse(data);
  674. }
  675. GLOBAL(int)
  676. jsimd_can_quantize(void)
  677. {
  678. init_simd();
  679. /* The code is optimised for these values only */
  680. if (DCTSIZE != 8)
  681. return 0;
  682. if (sizeof(JCOEF) != 2)
  683. return 0;
  684. if (sizeof(DCTELEM) != 2)
  685. return 0;
  686. if (simd_support & JSIMD_AVX2)
  687. return 1;
  688. if (simd_support & JSIMD_SSE2)
  689. return 1;
  690. return 0;
  691. }
  692. GLOBAL(int)
  693. jsimd_can_quantize_float(void)
  694. {
  695. init_simd();
  696. /* The code is optimised for these values only */
  697. if (DCTSIZE != 8)
  698. return 0;
  699. if (sizeof(JCOEF) != 2)
  700. return 0;
  701. if (sizeof(FAST_FLOAT) != 4)
  702. return 0;
  703. if (simd_support & JSIMD_SSE2)
  704. return 1;
  705. return 0;
  706. }
  707. GLOBAL(void)
  708. jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
  709. {
  710. if (simd_support & JSIMD_AVX2)
  711. jsimd_quantize_avx2(coef_block, divisors, workspace);
  712. else
  713. jsimd_quantize_sse2(coef_block, divisors, workspace);
  714. }
  715. GLOBAL(void)
  716. jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
  717. FAST_FLOAT *workspace)
  718. {
  719. jsimd_quantize_float_sse2(coef_block, divisors, workspace);
  720. }
  721. GLOBAL(int)
  722. jsimd_can_idct_2x2(void)
  723. {
  724. init_simd();
  725. /* The code is optimised for these values only */
  726. if (DCTSIZE != 8)
  727. return 0;
  728. if (sizeof(JCOEF) != 2)
  729. return 0;
  730. if (BITS_IN_JSAMPLE != 8)
  731. return 0;
  732. if (sizeof(JDIMENSION) != 4)
  733. return 0;
  734. if (sizeof(ISLOW_MULT_TYPE) != 2)
  735. return 0;
  736. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  737. return 1;
  738. return 0;
  739. }
  740. GLOBAL(int)
  741. jsimd_can_idct_4x4(void)
  742. {
  743. init_simd();
  744. /* The code is optimised for these values only */
  745. if (DCTSIZE != 8)
  746. return 0;
  747. if (sizeof(JCOEF) != 2)
  748. return 0;
  749. if (BITS_IN_JSAMPLE != 8)
  750. return 0;
  751. if (sizeof(JDIMENSION) != 4)
  752. return 0;
  753. if (sizeof(ISLOW_MULT_TYPE) != 2)
  754. return 0;
  755. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  756. return 1;
  757. return 0;
  758. }
  759. GLOBAL(void)
  760. jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  761. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  762. JDIMENSION output_col)
  763. {
  764. jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  765. }
  766. GLOBAL(void)
  767. jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  768. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  769. JDIMENSION output_col)
  770. {
  771. jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  772. }
  773. GLOBAL(int)
  774. jsimd_can_idct_islow(void)
  775. {
  776. init_simd();
  777. /* The code is optimised for these values only */
  778. if (DCTSIZE != 8)
  779. return 0;
  780. if (sizeof(JCOEF) != 2)
  781. return 0;
  782. if (BITS_IN_JSAMPLE != 8)
  783. return 0;
  784. if (sizeof(JDIMENSION) != 4)
  785. return 0;
  786. if (sizeof(ISLOW_MULT_TYPE) != 2)
  787. return 0;
  788. if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
  789. return 1;
  790. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
  791. return 1;
  792. return 0;
  793. }
  794. GLOBAL(int)
  795. jsimd_can_idct_ifast(void)
  796. {
  797. init_simd();
  798. /* The code is optimised for these values only */
  799. if (DCTSIZE != 8)
  800. return 0;
  801. if (sizeof(JCOEF) != 2)
  802. return 0;
  803. if (BITS_IN_JSAMPLE != 8)
  804. return 0;
  805. if (sizeof(JDIMENSION) != 4)
  806. return 0;
  807. if (sizeof(IFAST_MULT_TYPE) != 2)
  808. return 0;
  809. if (IFAST_SCALE_BITS != 2)
  810. return 0;
  811. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
  812. return 1;
  813. return 0;
  814. }
  815. GLOBAL(int)
  816. jsimd_can_idct_float(void)
  817. {
  818. init_simd();
  819. if (DCTSIZE != 8)
  820. return 0;
  821. if (sizeof(JCOEF) != 2)
  822. return 0;
  823. if (BITS_IN_JSAMPLE != 8)
  824. return 0;
  825. if (sizeof(JDIMENSION) != 4)
  826. return 0;
  827. if (sizeof(FAST_FLOAT) != 4)
  828. return 0;
  829. if (sizeof(FLOAT_MULT_TYPE) != 4)
  830. return 0;
  831. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
  832. return 1;
  833. return 0;
  834. }
  835. GLOBAL(void)
  836. jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  837. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  838. JDIMENSION output_col)
  839. {
  840. if (simd_support & JSIMD_AVX2)
  841. jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
  842. output_col);
  843. else
  844. jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
  845. output_col);
  846. }
  847. GLOBAL(void)
  848. jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  849. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  850. JDIMENSION output_col)
  851. {
  852. jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
  853. output_col);
  854. }
  855. GLOBAL(void)
  856. jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  857. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  858. JDIMENSION output_col)
  859. {
  860. jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
  861. output_col);
  862. }
  863. GLOBAL(int)
  864. jsimd_can_huff_encode_one_block(void)
  865. {
  866. init_simd();
  867. if (DCTSIZE != 8)
  868. return 0;
  869. if (sizeof(JCOEF) != 2)
  870. return 0;
  871. if ((simd_support & JSIMD_SSE2) && simd_huffman &&
  872. IS_ALIGNED_SSE(jconst_huff_encode_one_block))
  873. return 1;
  874. return 0;
  875. }
  876. GLOBAL(JOCTET *)
  877. jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
  878. int last_dc_val, c_derived_tbl *dctbl,
  879. c_derived_tbl *actbl)
  880. {
  881. return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
  882. dctbl, actbl);
  883. }
  884. GLOBAL(int)
  885. jsimd_can_encode_mcu_AC_first_prepare(void)
  886. {
  887. init_simd();
  888. if (DCTSIZE != 8)
  889. return 0;
  890. if (sizeof(JCOEF) != 2)
  891. return 0;
  892. if (SIZEOF_SIZE_T != 8)
  893. return 0;
  894. if (simd_support & JSIMD_SSE2)
  895. return 1;
  896. return 0;
  897. }
  898. GLOBAL(void)
  899. jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
  900. const int *jpeg_natural_order_start, int Sl,
  901. int Al, JCOEF *values, size_t *zerobits)
  902. {
  903. jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
  904. Sl, Al, values, zerobits);
  905. }
  906. GLOBAL(int)
  907. jsimd_can_encode_mcu_AC_refine_prepare(void)
  908. {
  909. init_simd();
  910. if (DCTSIZE != 8)
  911. return 0;
  912. if (sizeof(JCOEF) != 2)
  913. return 0;
  914. if (SIZEOF_SIZE_T != 8)
  915. return 0;
  916. if (simd_support & JSIMD_SSE2)
  917. return 1;
  918. return 0;
  919. }
  920. GLOBAL(int)
  921. jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
  922. const int *jpeg_natural_order_start, int Sl,
  923. int Al, JCOEF *absvalues, size_t *bits)
  924. {
  925. return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
  926. jpeg_natural_order_start,
  927. Sl, Al, absvalues, bits);
  928. }