Blame view

3rdparty/ffmpeg-4.4.4/x264/common/x86/bitstream-a.asm 3.74 KB
f244cbd5   Hu Chunming   ffmpeg支持h264编码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  ;*****************************************************************************
  ;* bitstream-a.asm: x86 bitstream functions
  ;*****************************************************************************
  ;* Copyright (C) 2010-2024 x264 project
  ;*
  ;* Authors: Fiona Glaser <fiona@x264.com>
  ;*          Henrik Gramner <henrik@gramner.com>
  ;*
  ;* This program is free software; you can redistribute it and/or modify
  ;* it under the terms of the GNU General Public License as published by
  ;* the Free Software Foundation; either version 2 of the License, or
  ;* (at your option) any later version.
  ;*
  ;* This program is distributed in the hope that it will be useful,
  ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  ;* GNU General Public License for more details.
  ;*
  ;* You should have received a copy of the GNU General Public License
  ;* along with this program; if not, write to the Free Software
  ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  ;*
  ;* This program is also available under a commercial proprietary license.
  ;* For more information, contact us at licensing@x264.com.
  ;*****************************************************************************
  
  %include "x86inc.asm"
  %include "x86util.asm"
  
  SECTION .text
  
  ;-----------------------------------------------------------------------------
  ; uint8_t *x264_nal_escape( uint8_t *dst, uint8_t *src, uint8_t *end )
  ;-----------------------------------------------------------------------------
  %macro NAL_LOOP 2
  %%escape:
      ; Detect false positive to avoid unnecessary escape loop
      xor      r3d, r3d
      cmp byte [r0+r1-1], 0
      setnz    r3b
      xor       k3, k4
      jnz .escape
      jmp %%continue
  ALIGN 16
  %1:
      mova [r0+r1+mmsize], m1
      pcmpeqb   m1, m0
      mova [r0+r1], m2
      pcmpeqb   m2, m0
      pmovmskb r3d, m1
      %2        m1, [r1+r2+3*mmsize]
      pmovmskb r4d, m2
      %2        m2, [r1+r2+2*mmsize]
      shl       k3, mmsize
      or        k3, k4
      lea       k4, [2*r3+1]
      and       k4, k3
      jnz %%escape
  %%continue:
      add       r1, 2*mmsize
      jl %1
  %endmacro
  
  %macro NAL_ESCAPE 0
  %if mmsize == 32
      %xdefine k3 r3
      %xdefine k4 r4
  %else
      %xdefine k3 r3d
      %xdefine k4 r4d
  %endif
  
  cglobal nal_escape, 3,5
      movzx    r3d, byte [r1]
      sub       r1, r2 ; r1 = offset of current src pointer from end of src
      pxor      m0, m0
      mov     [r0], r3b
      sub       r0, r1 ; r0 = projected end of dst, assuming no more escapes
      or       r3d, 0xffffff00 ; ignore data before src
  
      ; Start off by jumping into the escape loop in case there's an escape at the start.
      ; And do a few more in scalar until dst is aligned.
      jmp .escape_loop
  
  %if mmsize == 16
      NAL_LOOP .loop_aligned, mova
      jmp .ret
  %endif
      NAL_LOOP .loop_unaligned, movu
  .ret:
      movifnidn rax, r0
      RET
  
  .escape:
      ; Skip bytes that are known to be valid
      and       k4, k3
      tzcnt     k4, k4
      xor      r3d, r3d ; the last two bytes are known to be zero
      add       r1, r4
  .escape_loop:
      inc       r1
      jge .ret
      movzx    r4d, byte [r1+r2]
      shl      r3d, 8
      or       r3d, r4d
      test     r3d, 0xfffffc ; if the last two bytes are 0 and the current byte is <=3
      jz .add_escape_byte
  .escaped:
      lea      r4d, [r0+r1]
      mov  [r0+r1], r3b
      test     r4d, mmsize-1 ; Do SIMD when dst is aligned
      jnz .escape_loop
      movu      m1, [r1+r2+mmsize]
      movu      m2, [r1+r2]
  %if mmsize == 16
      lea      r4d, [r1+r2]
      test     r4d, mmsize-1
      jz .loop_aligned
  %endif
      jmp .loop_unaligned
  
  .add_escape_byte:
      mov byte [r0+r1], 3
      inc       r0
      or       r3d, 0x0300
      jmp .escaped
  %endmacro
  
  INIT_MMX mmx2
  NAL_ESCAPE
  INIT_XMM sse2
  NAL_ESCAPE
  %if ARCH_X86_64
  INIT_YMM avx2
  NAL_ESCAPE
  %endif