Blame view

ffmpeg-4.2.2/libavcodec/arm/vp8_armv6.S 8.44 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
  /*
   * Copyright (C) 2010 Mans Rullgard
   *
   * This file is part of FFmpeg.
   *
   * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
   * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include "libavutil/arm/asm.S"
  
  .macro rac_get_prob     h, bs, buf, cw, pr, t0, t1
          adds            \bs, \bs, \t0
          lsl             \cw, \cw, \t0
          lsl             \t0, \h,  \t0
          rsb             \h,  \pr, #256
          it              cs
          ldrhcs          \t1, [\buf], #2
          smlabb          \h,  \t0, \pr, \h
  T       itttt           cs
          rev16cs         \t1, \t1
  A       orrcs           \cw, \cw, \t1, lsl \bs
  T       lslcs           \t1, \t1, \bs
  T       orrcs           \cw, \cw, \t1
          subcs           \bs, \bs, #16
          lsr             \h,  \h,  #8
          cmp             \cw, \h,  lsl #16
          itt             ge
          subge           \cw, \cw, \h,  lsl #16
          subge           \h,  \t0, \h
  .endm
  
  .macro rac_get_128      h, bs, buf, cw, t0, t1
          adds            \bs, \bs, \t0
          lsl             \cw, \cw, \t0
          lsl             \t0, \h,  \t0
          it              cs
          ldrhcs          \t1, [\buf], #2
          mov             \h,  #128
          it              cs
          rev16cs         \t1, \t1
          add             \h,  \h,  \t0, lsl #7
  A       orrcs           \cw, \cw, \t1, lsl \bs
  T       ittt            cs
  T       lslcs           \t1, \t1, \bs
  T       orrcs           \cw, \cw, \t1
          subcs           \bs, \bs, #16
          lsr             \h,  \h,  #8
          cmp             \cw, \h,  lsl #16
          itt             ge
          subge           \cw, \cw, \h,  lsl #16
          subge           \h,  \t0, \h
  .endm
  
  function ff_decode_block_coeffs_armv6, export=1
          push            {r0,r1,r4-r11,lr}
          movrelx         lr,  X(ff_vp56_norm_shift)
          ldrd            r4,  r5,  [sp, #44]             @ token_prob, qmul
          cmp             r3,  #0
          ldr             r11, [r5]
          ldm             r0,  {r5-r7}                    @ high, bits, buf
          it              ne
          pkhtbne         r11, r11, r11, asr #16
          ldr             r8,  [r0, #16]                  @ code_word
  0:
          ldrb            r9,  [lr, r5]
          add             r3,  r3,  #1
          ldrb            r0,  [r4, #1]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          blt             2f
  
          ldrb            r9,  [lr, r5]
          ldrb            r0,  [r4, #2]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          ldrb            r9,  [lr, r5]
          bge             3f
  
          add             r4,  r3,  r3,  lsl #5
          sxth            r12, r11
          add             r4,  r4,  r2
          adds            r6,  r6,  r9
          add             r4,  r4,  #11
          lsl             r8,  r8,  r9
          it              cs
          ldrhcs          r10, [r7], #2
          lsl             r9,  r5,  r9
          mov             r5,  #128
          it              cs
          rev16cs         r10, r10
          add             r5,  r5,  r9,  lsl #7
  T       ittt            cs
  T       lslcs           r10, r10, r6
  T       orrcs           r8,  r8,  r10
  A       orrcs           r8,  r8,  r10, lsl r6
          subcs           r6,  r6,  #16
          lsr             r5,  r5,  #8
          cmp             r8,  r5,  lsl #16
          movrel          r10, zigzag_scan-1
          itt             ge
          subge           r8,  r8,  r5,  lsl #16
          subge           r5,  r9,  r5
          ldrb            r10, [r10, r3]
          it              ge
          rsbge           r12, r12, #0
          cmp             r3,  #16
          strh            r12, [r1, r10]
          bge             6f
  5:
          ldrb            r9,  [lr, r5]
          ldrb            r0,  [r4]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          pkhtb           r11, r11, r11, asr #16
          bge             0b
  
  6:
          ldr             r0,  [sp]
          ldr             r9,  [r0, #12]
          cmp             r7,  r9
          it              hi
          movhi           r7,  r9
          stm             r0,  {r5-r7}                    @ high, bits, buf
          str             r8,  [r0, #16]                  @ code_word
  
          add             sp,  sp,  #8
          mov             r0,  r3
          pop             {r4-r11,pc}
  2:
          add             r4,  r3,  r3,  lsl #5
          cmp             r3,  #16
          add             r4,  r4,  r2
          pkhtb           r11, r11, r11, asr #16
          bne             0b
          b               6b
  3:
          ldrb            r0,  [r4, #3]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          ldrb            r9,  [lr, r5]
          bge             1f
  
          mov             r12, #2
          ldrb            r0,  [r4, #4]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          it              ge
          addge           r12, #1
          ldrb            r9,  [lr, r5]
          blt             4f
          ldrb            r0,  [r4, #5]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          it              ge
          addge           r12, #1
          ldrb            r9,  [lr, r5]
          b               4f
  1:
          ldrb            r0,  [r4, #6]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          ldrb            r9,  [lr, r5]
          bge             3f
  
          ldrb            r0,  [r4, #7]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          ldrb            r9,  [lr, r5]
          bge             2f
  
          mov             r12, #5
          mov             r0,  #159
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          it              ge
          addge           r12, r12, #1
          ldrb            r9,  [lr, r5]
          b               4f
  2:
          mov             r12, #7
          mov             r0,  #165
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          it              ge
          addge           r12, r12, #2
          ldrb            r9,  [lr, r5]
          mov             r0,  #145
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          it              ge
          addge           r12, r12, #1
          ldrb            r9,  [lr, r5]
          b               4f
  3:
          ldrb            r0,  [r4, #8]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          it              ge
          addge           r4,  r4,  #1
          ldrb            r9,  [lr, r5]
          ite             ge
          movge           r12, #2
          movlt           r12, #0
          ldrb            r0,  [r4, #9]
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          mov             r9,  #8
          it              ge
          addge           r12, r12, #1
          movrelx         r4,  X(ff_vp8_dct_cat_prob), r1
          lsl             r9,  r9,  r12
          ldr             r4,  [r4, r12, lsl #2]
          add             r12, r9,  #3
          mov             r1,  #0
          ldrb            r0,  [r4], #1
  1:
          ldrb            r9,  [lr, r5]
          lsl             r1,  r1,  #1
          rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
          ldrb            r0,  [r4], #1
          it              ge
          addge           r1,  r1,  #1
          cmp             r0,  #0
          bne             1b
          ldrb            r9,  [lr, r5]
          add             r12, r12, r1
          ldr             r1,  [sp, #4]
  4:
          add             r4,  r3,  r3,  lsl #5
          add             r4,  r4,  r2
          add             r4,  r4,  #22
          rac_get_128     r5,  r6,  r7,  r8,  r9,  r10
          it              ge
          rsbge           r12, r12, #0
          smulbb          r12, r12, r11
          movrel          r9,  zigzag_scan-1
          ldrb            r9,  [r9, r3]
          cmp             r3,  #16
          strh            r12, [r1, r9]
          bge             6b
          b               5b
  endfunc
  
  const zigzag_scan
          .byte            0,  2,  8, 16
          .byte           10,  4,  6, 12
          .byte           18, 24, 26, 20
          .byte           14, 22, 28, 30
  endconst