jsimdcpu.asm 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. ;
  2. ; jsimdcpu.asm - SIMD instruction support check
  3. ;
  4. ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. ; Copyright (C) 2016, D. R. Commander.
  6. ;
  7. ; Based on
  8. ; x86 SIMD extension for IJG JPEG library
  9. ; Copyright (C) 1999-2006, MIYASAKA Masaru.
  10. ; For conditions of distribution and use, see copyright notice in jsimdext.inc
  11. ;
  12. ; This file should be assembled with NASM (Netwide Assembler),
  13. ; can *not* be assembled with Microsoft's MASM or any compatible
  14. ; assembler (including Borland's Turbo Assembler).
  15. ; NASM is available from http://nasm.sourceforge.net/ or
  16. ; http://sourceforge.net/project/showfiles.php?group_id=6208
  17. %include "jsimdext.inc"
  18. ; --------------------------------------------------------------------------
  19. SECTION SEG_TEXT
  20. BITS 64
  21. ;
  22. ; Check if the CPU supports SIMD instructions
  23. ;
  24. ; GLOBAL(unsigned int)
  25. ; jpeg_simd_cpu_support(void)
  26. ;
  27. align 32
  28. GLOBAL_FUNCTION(jpeg_simd_cpu_support)
  29. EXTN(jpeg_simd_cpu_support):
  30. push rbx
  31. push rdi
  32. xor rdi, rdi ; simd support flag
  33. ; Assume that all x86-64 processors support SSE & SSE2 instructions
  34. or rdi, JSIMD_SSE2
  35. or rdi, JSIMD_SSE
  36. ; Check whether CPUID leaf 07H is supported
  37. ; (leaf 07H is used to check for AVX2 instruction support)
  38. mov rax, 0
  39. cpuid
  40. cmp rax, 7
  41. jl short .return ; Maximum leaf < 07H
  42. ; Check for AVX2 instruction support
  43. mov rax, 7
  44. xor rcx, rcx
  45. cpuid
  46. mov rax, rbx ; rax = Extended feature flags
  47. test rax, 1<<5 ; bit5:AVX2
  48. jz short .return
  49. ; Check for AVX2 O/S support
  50. mov rax, 1
  51. xor rcx, rcx
  52. cpuid
  53. test rcx, 1<<27
  54. jz short .return ; O/S does not support XSAVE
  55. test rcx, 1<<28
  56. jz short .return ; CPU does not support AVX2
  57. xor rcx, rcx
  58. xgetbv
  59. and rax, 6
  60. cmp rax, 6 ; O/S does not manage XMM/YMM state
  61. ; using XSAVE
  62. jnz short .return
  63. or rdi, JSIMD_AVX2
  64. .return:
  65. mov rax, rdi
  66. pop rdi
  67. pop rbx
  68. ret
  69. ; For some reason, the OS X linker does not honor the request to align the
  70. ; segment unless we do this.
  71. align 32