jsimdcpu.asm 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. ;
  2. ; jsimdcpu.asm - SIMD instruction support check
  3. ;
  4. ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. ; Copyright (C) 2016, D. R. Commander.
  6. ;
  7. ; Based on the x86 SIMD extension for IJG JPEG library
  8. ; Copyright (C) 1999-2006, MIYASAKA Masaru.
  9. ; For conditions of distribution and use, see copyright notice in jsimdext.inc
  10. ;
  11. ; This file should be assembled with NASM (Netwide Assembler),
  12. ; can *not* be assembled with Microsoft's MASM or any compatible
  13. ; assembler (including Borland's Turbo Assembler).
  14. ; NASM is available from http://nasm.sourceforge.net/ or
  15. ; http://sourceforge.net/project/showfiles.php?group_id=6208
  16. %include "jsimdext.inc"
  17. ; --------------------------------------------------------------------------
  18. SECTION SEG_TEXT
  19. BITS 32
  20. ;
  21. ; Check if the CPU supports SIMD instructions
  22. ;
  23. ; GLOBAL(unsigned int)
  24. ; jpeg_simd_cpu_support(void)
  25. ;
  26. align 32
  27. GLOBAL_FUNCTION(jpeg_simd_cpu_support)
  28. EXTN(jpeg_simd_cpu_support):
  29. push ebx
  30. ; push ecx ; need not be preserved
  31. ; push edx ; need not be preserved
  32. ; push esi ; unused
  33. push edi
  34. xor edi, edi ; simd support flag
  35. pushfd
  36. pop eax
  37. mov edx, eax
  38. xor eax, 1<<21 ; flip ID bit in EFLAGS
  39. push eax
  40. popfd
  41. pushfd
  42. pop eax
  43. xor eax, edx
  44. jz near .return ; CPUID is not supported
  45. ; Check whether CPUID leaf 07H is supported
  46. ; (leaf 07H is used to check for AVX2 instruction support)
  47. xor eax, eax
  48. cpuid
  49. test eax, eax
  50. jz near .return
  51. cmp eax, 7
  52. jl short .no_avx2 ; Maximum leaf < 07H
  53. ; Check for AVX2 instruction support
  54. mov eax, 7
  55. xor ecx, ecx
  56. cpuid
  57. mov eax, ebx
  58. test eax, 1<<5 ; bit5:AVX2
  59. jz short .no_avx2
  60. ; Check for AVX2 O/S support
  61. mov eax, 1
  62. xor ecx, ecx
  63. cpuid
  64. test ecx, 1<<27
  65. jz short .no_avx2 ; O/S does not support XSAVE
  66. test ecx, 1<<28
  67. jz short .no_avx2 ; CPU does not support AVX2
  68. xor ecx, ecx
  69. xgetbv
  70. and eax, 6
  71. cmp eax, 6 ; O/S does not manage XMM/YMM state
  72. ; using XSAVE
  73. jnz short .no_avx2
  74. or edi, JSIMD_AVX2
  75. .no_avx2:
  76. ; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
  77. xor eax, eax
  78. inc eax
  79. cpuid
  80. mov eax, edx ; eax = Standard feature flags
  81. ; Check for MMX instruction support
  82. test eax, 1<<23 ; bit23:MMX
  83. jz short .no_mmx
  84. or edi, byte JSIMD_MMX
  85. .no_mmx:
  86. test eax, 1<<25 ; bit25:SSE
  87. jz short .no_sse
  88. or edi, byte JSIMD_SSE
  89. .no_sse:
  90. test eax, 1<<26 ; bit26:SSE2
  91. jz short .no_sse2
  92. or edi, byte JSIMD_SSE2
  93. .no_sse2:
  94. ; Check for 3DNow! instruction support
  95. mov eax, 0x80000000
  96. cpuid
  97. cmp eax, 0x80000000
  98. jbe short .return
  99. mov eax, 0x80000001
  100. cpuid
  101. mov eax, edx ; eax = Extended feature flags
  102. test eax, 1<<31 ; bit31:3DNow!(vendor independent)
  103. jz short .no_3dnow
  104. or edi, byte JSIMD_3DNOW
  105. .no_3dnow:
  106. .return:
  107. mov eax, edi
  108. pop edi
  109. ; pop esi ; unused
  110. ; pop edx ; need not be preserved
  111. ; pop ecx ; need not be preserved
  112. pop ebx
  113. ret
  114. ; For some reason, the OS X linker does not honor the request to align the
  115. ; segment unless we do this.
  116. align 32