Blame view

ffmpeg-4.2.2/libswscale/arm/rgb2yuv_neon_common.S 6.76 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
  /*
   * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
   *
   * This file is part of FFmpeg.
   *
   * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
   * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include "libavutil/arm/asm.S"
  
  .macro alias name, tgt, set=1
  .if \set != 0
      \name   .req    \tgt
  .else
      .unreq  \name
  .endif
  .endm
  
  .altmacro
  
  .macro alias_dw_all qw, dw_l, dw_h
      alias   q\qw\()_l, d\dw_l
      alias   q\qw\()_h, d\dw_h
      .if \qw < 15
          alias_dw_all  %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
      .endif
  .endm
  
  alias_dw_all    0, 0, 1
  
  .noaltmacro
  
  .macro alias_qw     name, qw, set=1
      alias   \name\(), \qw, \set
      alias   \name\()_l, \qw\()_l, \set
      alias   \name\()_h, \qw\()_h, \set
  .endm
  
  .macro prologue
      push            {r4-r12, lr}
      vpush           {q4-q7}
  .endm
  
  .macro epilogue
      vpop            {q4-q7}
      pop             {r4-r12, pc}
  .endm
  
  .macro  load_arg    reg, ix
      ldr     \reg,   [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
  .endm
  
  
  /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
   *                  int width, int height,
   *                  int y_stride, int c_stride, int src_stride,
   *                  int32_t coeff_table[9]);
   */
  .macro  alias_loop_420sp set=1
      alias   src,        r0, \set
      alias   src0,       src, \set
      alias   y,          r1, \set
      alias   y0,         y, \set
      alias   chroma,     r2, \set
      alias   width,      r3, \set
      alias   header,     width, \set
  
      alias   height,     r4, \set
      alias   y_stride,   r5, \set
      alias   c_stride,   r6, \set
      alias   c_padding,  c_stride, \set
      alias   src_stride, r7, \set
  
      alias   y0_end,     r8, \set
  
      alias   src_padding,r9, \set
      alias   y_padding,  r10, \set
  
      alias   src1,       r11, \set
      alias   y1,         r12, \set
  
      alias   coeff_table,r12, \set
  .endm
  
  
  .macro  loop_420sp s_fmt, d_fmt, init, kernel, precision
  
  function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
      prologue
  
      alias_loop_420sp
  
      load_arg    height,         4
      load_arg    y_stride,       5
      load_arg    c_stride,       6
      load_arg    src_stride,     7
      load_arg    coeff_table,    8
  
      \init       coeff_table
  
      sub         y_padding,      y_stride,       width
      sub         c_padding,      c_stride,       width
      sub         src_padding,    src_stride,     width, LSL #2
  
      add         y0_end,         y0,             width
      and         header,         width,          #15
  
      add         y1,             y0,             y_stride
      add         src1,           src0,           src_stride
  
  0:
      cmp         header,     #0
      beq         1f
  
      \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
  
  1:
      \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
  
      cmp         y0,         y0_end
      blt         1b
  2:
      add         y0,         y1,         y_padding
      add         y0_end,     y1,         y_stride
      add         chroma,     chroma,     c_padding
      add         src0,       src1,       src_padding
  
      add         y1,         y0,         y_stride
      add         src1,       src0,       src_stride
  
      subs        height,     height,     #2
  
      bgt         0b
  
      epilogue
  
      alias_loop_420sp 0
  
  endfunc
  .endm
  
  .macro downsample
      vpaddl.u8   r16x8,  r8x16
      vpaddl.u8   g16x8,  g8x16
      vpaddl.u8   b16x8,  b8x16
  .endm
  
  
  /* acculumate and right shift by 2 */
  .macro downsample_ars2
      vpadal.u8   r16x8,  r8x16
      vpadal.u8   g16x8,  g8x16
      vpadal.u8   b16x8,  b8x16
  
      vrshr.u16   r16x8,  r16x8,  #2
      vrshr.u16   g16x8,  g16x8,  #2
      vrshr.u16   b16x8,  b16x8,  #2
  .endm
  
  .macro store_y8_16x1            dst, count
  .ifc "\count",""
      vstmia      \dst!,  {y8x16}
  .else
      vstmia      \dst,   {y8x16}
      add         \dst,   \dst,           \count
  .endif
  .endm
  
  .macro store_chroma_nv12_8x1    dst, count
  .ifc "\count",""
      vst2.i8     {u8x8, v8x8},   [\dst]!
  .else
      vst2.i8     {u8x8, v8x8},   [\dst], \count
  .endif
  .endm
  
  .macro store_chroma_nv21_8x1    dst, count
  .ifc "\count",""
      vst2.i8     {v8x8, u8x8},   [\dst]!
  .else
      vst2.i8     {v8x8, u8x8},   [\dst], \count
  .endif
  .endm
  
  .macro load_8888_16x1   a, b, c, d, src, count
  .ifc "\count",""
      vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
      vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]!
  .else
      vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
      vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]
      sub         \src,   \src,   #32
      add         \src,   \src,   \count, LSL #2
  .endif
  .endm
  
  .macro load_rgbx_16x1   src, count
      load_8888_16x1  r, g, b, x, \src, \count
  .endm
  
  .macro load_bgrx_16x1   src, count
      load_8888_16x1  b, g, r, x, \src, \count
  .endm
  
  .macro alias_src_rgbx   set=1
      alias_src_8888  r, g, b, x, \set
  .endm
  
  .macro alias_src_bgrx   set=1
      alias_src_8888  b, g, r, x, \set
  .endm
  
  .macro alias_dst_nv12   set=1
      alias   u8x8, c8x8x2_l, \set
      alias   v8x8, c8x8x2_h, \set
  .endm
  
  .macro alias_dst_nv21   set=1
      alias   v8x8, c8x8x2_l, \set
      alias   u8x8, c8x8x2_h, \set
  .endm
  
  
  // common aliases
  
  alias   CO_R    d0
  CO_RY   .dn     d0.s16[0]
  CO_RU   .dn     d0.s16[1]
  CO_RV   .dn     d0.s16[2]
  
  alias   CO_G    d1
  CO_GY   .dn     d1.s16[0]
  CO_GU   .dn     d1.s16[1]
  CO_GV   .dn     d1.s16[2]
  
  alias   CO_B    d2
  CO_BY   .dn     d2.s16[0]
  CO_BU   .dn     d2.s16[1]
  CO_BV   .dn     d2.s16[2]
  
  alias   BIAS_U, d3
  alias   BIAS_V, BIAS_U
  
  alias   BIAS_Y, q2
  
  
  /* q3-q6 R8G8B8X8 x16 */
  
  .macro alias_src_8888   a, b, c, d, set
      alias_qw  \a\()8x16, q3, \set
      alias_qw  \b\()8x16, q4, \set
      alias_qw  \c\()8x16, q5, \set
      alias_qw  \d\()8x16, q6, \set
  .endm
  
  .macro kernel_420_16x2  rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
      alias_src_\rgb_fmt
      alias_dst_\yuv_fmt
  
      load_\rgb_fmt\()_16x1   \rgb0, \count
  
      downsample
      compute_y_16x1
      store_y8_16x1   \y0, \count
  
  
      load_\rgb_fmt\()_16x1   \rgb1, \count
      downsample_ars2
      compute_y_16x1
      store_y8_16x1   \y1, \count
  
      compute_chroma_8x1  u, U
      compute_chroma_8x1  v, V
  
      store_chroma_\yuv_fmt\()_8x1 \chroma, \count
  
      alias_dst_\yuv_fmt 0
      alias_src_\rgb_fmt 0
  .endm