Blame view

ffmpeg-4.2.2/libavcodec/arm/startcode_armv6.S 7.37 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
  /*
   * Copyright (c) 2013 RISC OS Open Ltd
   * Author: Ben Avison <bavison@riscosopen.org>
   *
   * This file is part of FFmpeg.
   *
   * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
   * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include "libavutil/arm/asm.S"
  
  RESULT  .req    a1
  BUF     .req    a1
  SIZE    .req    a2
  PATTERN .req    a3
  PTR     .req    a4
  DAT0    .req    v1
  DAT1    .req    v2
  DAT2    .req    v3
  DAT3    .req    v4
  TMP0    .req    v5
  TMP1    .req    v6
  TMP2    .req    ip
  TMP3    .req    lr
  
  #define PRELOAD_DISTANCE 4
  
  .macro innerloop4
          ldr     DAT0, [PTR], #4
          subs    SIZE, SIZE, #4 @ C flag survives rest of macro
          sub     TMP0, DAT0, PATTERN, lsr #14
          bic     TMP0, TMP0, DAT0
          ands    TMP0, TMP0, PATTERN
  .endm
  
  .macro innerloop16  decrement, do_preload
          ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
   .ifnc "\do_preload",""
          pld     [PTR, #PRELOAD_DISTANCE*32]
   .endif
   .ifnc "\decrement",""
          subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
   .endif
          sub     TMP0, DAT0, PATTERN, lsr #14
          sub     TMP1, DAT1, PATTERN, lsr #14
          bic     TMP0, TMP0, DAT0
          bic     TMP1, TMP1, DAT1
          sub     TMP2, DAT2, PATTERN, lsr #14
          sub     TMP3, DAT3, PATTERN, lsr #14
          ands    TMP0, TMP0, PATTERN
          bic     TMP2, TMP2, DAT2
          it      eq
          andseq  TMP1, TMP1, PATTERN
          bic     TMP3, TMP3, DAT3
          itt     eq
          andseq  TMP2, TMP2, PATTERN
          andseq  TMP3, TMP3, PATTERN
  .endm
  
  /* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
  function ff_startcode_find_candidate_armv6, export=1
          push    {v1-v6,lr}
          mov     PTR, BUF
          @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
          @ before using code that does preloads
          cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
          blo     60f
  
          @ Get to word-alignment, 1 byte at a time
          tst     PTR, #3
          beq     2f
  1:      ldrb    DAT0, [PTR], #1
          sub     SIZE, SIZE, #1
          teq     DAT0, #0
          beq     90f
          tst     PTR, #3
          bne     1b
  2:      @ Get to 4-word alignment, 1 word at a time
          ldr     PATTERN, =0x80008000
          setend  be
          tst     PTR, #12
          beq     4f
  3:      innerloop4
          bne     91f
          tst     PTR, #12
          bne     3b
  4:      @ Get to cacheline (8-word) alignment
          tst     PTR, #16
          beq     5f
          innerloop16  16
          bne     93f
  5:      @ Check complete cachelines, with preloading
          @ We need to stop when there are still (PRELOAD_DISTANCE+1)
          @ complete cachelines to go
          sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
  6:      innerloop16  , do_preload
          bne     93f
          innerloop16  32
          bne     93f
          bcs     6b
          @ Preload trailing part-cacheline, if any
          tst     SIZE, #31
          beq     7f
          pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
          @ Check remaining data without doing any more preloads. First
          @ do in chunks of 4 words:
  7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
          bmi     9f
  8:      innerloop16  16
          bne     93f
          bcs     8b
          @ Then in words:
  9:      adds    SIZE, SIZE, #16 - 4
          bmi     11f
  10:     innerloop4
          bne     91f
          bcs     10b
  11:     setend  le
          @ Check second byte of final halfword
          ldrb    DAT0, [PTR, #-1]
          teq     DAT0, #0
          beq     90f
          @ Check any remaining bytes
          tst     SIZE, #3
          beq     13f
  12:     ldrb    DAT0, [PTR], #1
          sub     SIZE, SIZE, #1
          teq     DAT0, #0
          beq     90f
          tst     SIZE, #3
          bne     12b
          @ No candidate found
  13:     sub     RESULT, PTR, BUF
          b       99f
  
  60:     @ Small buffer - simply check by looping over bytes
          subs    SIZE, SIZE, #1
          bcc     99f
  61:     ldrb    DAT0, [PTR], #1
          subs    SIZE, SIZE, #1
          teq     DAT0, #0
          beq     90f
          bcs     61b
          @ No candidate found
          sub     RESULT, PTR, BUF
          b       99f
  
  90:     @ Found a candidate at the preceding byte
          sub     RESULT, PTR, BUF
          sub     RESULT, RESULT, #1
          b       99f
  
  91:     @ Found a candidate somewhere in the preceding 4 bytes
          sub     RESULT, PTR, BUF
          sub     RESULT, RESULT, #4
          sub     TMP0, DAT0, #0x20000
          bics    TMP0, TMP0, DAT0
          itt     pl
          ldrbpl  DAT0, [PTR, #-3]
          addpl   RESULT, RESULT, #2
          bpl     92f
          teq     RESULT, #0
          beq     98f @ don't look back a byte if found at first byte in buffer
          ldrb    DAT0, [PTR, #-5]
  92:     teq     DAT0, #0
          it      eq
          subeq   RESULT, RESULT, #1
          b       98f
  
  93:     @ Found a candidate somewhere in the preceding 16 bytes
          sub     RESULT, PTR, BUF
          sub     RESULT, RESULT, #16
          teq     TMP0, #0
          beq     95f @ not in first 4 bytes
          sub     TMP0, DAT0, #0x20000
          bics    TMP0, TMP0, DAT0
          itt     pl
          ldrbpl  DAT0, [PTR, #-15]
          addpl   RESULT, RESULT, #2
          bpl     94f
          teq     RESULT, #0
          beq     98f @ don't look back a byte if found at first byte in buffer
          ldrb    DAT0, [PTR, #-17]
  94:     teq     DAT0, #0
          it      eq
          subeq   RESULT, RESULT, #1
          b       98f
  95:     add     RESULT, RESULT, #4
          teq     TMP1, #0
          beq     96f @ not in next 4 bytes
          sub     TMP1, DAT1, #0x20000
          bics    TMP1, TMP1, DAT1
          itee    mi
          ldrbmi  DAT0, [PTR, #-13]
          ldrbpl  DAT0, [PTR, #-11]
          addpl   RESULT, RESULT, #2
          teq     DAT0, #0
          it      eq
          subeq   RESULT, RESULT, #1
          b       98f
  96:     add     RESULT, RESULT, #4
          teq     TMP2, #0
          beq     97f @ not in next 4 bytes
          sub     TMP2, DAT2, #0x20000
          bics    TMP2, TMP2, DAT2
          itee    mi
          ldrbmi  DAT0, [PTR, #-9]
          ldrbpl  DAT0, [PTR, #-7]
          addpl   RESULT, RESULT, #2
          teq     DAT0, #0
          it      eq
          subeq   RESULT, RESULT, #1
          b       98f
  97:     add     RESULT, RESULT, #4
          sub     TMP3, DAT3, #0x20000
          bics    TMP3, TMP3, DAT3
          itee    mi
          ldrbmi  DAT0, [PTR, #-5]
          ldrbpl  DAT0, [PTR, #-3]
          addpl   RESULT, RESULT, #2
          teq     DAT0, #0
          it      eq
          subeq   RESULT, RESULT, #1
          @ drop through to 98f
  98:     setend  le
  99:     pop     {v1-v6,pc}
  endfunc
  
          .unreq  RESULT
          .unreq  BUF
          .unreq  SIZE
          .unreq  PATTERN
          .unreq  PTR
          .unreq  DAT0
          .unreq  DAT1
          .unreq  DAT2
          .unreq  DAT3
          .unreq  TMP0
          .unreq  TMP1
          .unreq  TMP2
          .unreq  TMP3