Blame view

3rdparty/ffmpeg-4.4.4/x264/common/aarch64/asm.S 7.7 KB
f244cbd5   Hu Chunming   ffmpeg支持h264编码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  /*****************************************************************************
   * asm.S: AArch64 utility macros
   *****************************************************************************
   * Copyright (C) 2008-2024 x264 project
   *
   * Authors: Mans Rullgard <mans@mansr.com>
   *          David Conrad <lessen42@gmail.com>
   *          Janne Grunau <janne-x264@jannau.net>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 2 of the License, or
   * (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
   *
   * This program is also available under a commercial proprietary license.
   * For more information, contact us at licensing@x264.com.
   *****************************************************************************/
  
  #include "config.h"
  
  #define GLUE(a, b) a ## b
  #define JOIN(a, b) GLUE(a, b)
  
  #ifdef PREFIX
  #   define BASE _x264_
  #   define SYM_PREFIX _
  #else
  #   define BASE x264_
  #   define SYM_PREFIX
  #endif
  
  #ifdef BIT_DEPTH
  #   define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _)
  #else
  #   define EXTERN_ASM BASE
  #endif
  
  #define X(s) JOIN(EXTERN_ASM, s)
  #define X264(s) JOIN(BASE, s)
  #define EXT(s) JOIN(SYM_PREFIX, s)
  
  #ifdef __ELF__
  #   define ELF
  #else
  #   define ELF  #
  #endif
  
  #ifdef __MACH__
  #   define MACH
  #else
  #   define MACH #
  #endif
  
  #if HAVE_AS_FUNC
  #   define FUNC
  #else
  #   define FUNC #
  #endif
  
  .macro  function name, export=0, align=2
      .macro endfunc
  .if \export
  ELF     .size   EXTERN_ASM\name, . - EXTERN_ASM\name
  .else
  ELF     .size   \name, . - \name
  .endif
  FUNC    .endfunc
          .purgem endfunc
      .endm
          .text
          .align          \align
      .if \export
          .global EXTERN_ASM\name
  ELF     .type   EXTERN_ASM\name, %function
  FUNC    .func   EXTERN_ASM\name
  EXTERN_ASM\name:
      .else
  ELF     .type   \name, %function
  FUNC    .func   \name
  \name:
      .endif
  .endm
  
  .macro  const   name, align=2
      .macro endconst
  ELF     .size   \name, . - \name
          .purgem endconst
      .endm
  ELF     .section        .rodata
  MACH    .const_data
          .align          \align
  \name:
  .endm
  
  .macro  movrel rd, val, offset=0
  #if defined(__APPLE__)
    .if \offset < 0
          adrp            \rd, \val@PAGE
          add             \rd, \rd, \val@PAGEOFF
          sub             \rd, \rd, -(\offset)
    .else
          adrp            \rd, \val+(\offset)@PAGE
          add             \rd, \rd, \val+(\offset)@PAGEOFF
    .endif
  #elif defined(PIC) && defined(_WIN32)
    .if \offset < 0
          adrp            \rd, \val
          add             \rd, \rd, :lo12:\val
          sub             \rd, \rd, -(\offset)
    .else
          adrp            \rd, \val+(\offset)
          add             \rd, \rd, :lo12:\val+(\offset)
    .endif
  #elif defined(PIC)
          adrp            \rd, \val+(\offset)
          add             \rd, \rd, :lo12:\val+(\offset)
  #else
          ldr             \rd, =\val+\offset
  #endif
  .endm
  
  #define FDEC_STRIDE 32
  #define FENC_STRIDE 16
  
  
  .macro SUMSUB_AB   sum, sub, a, b
      add         \sum,  \a,  \b
      sub         \sub,  \a,  \b
  .endm
  
  .macro unzip t1, t2, s1, s2
      uzp1        \t1,  \s1,  \s2
      uzp2        \t2,  \s1,  \s2
  .endm
  
  .macro transpose t1, t2, s1, s2
      trn1        \t1,  \s1,  \s2
      trn2        \t2,  \s1,  \s2
  .endm
  
  .macro transpose4x4.h v0, v1, v2, v3, t0, t1, t2, t3
      transpose   \t0\().2s,  \t2\().2s,  \v0\().2s,  \v2\().2s
      transpose   \t1\().2s,  \t3\().2s,  \v1\().2s,  \v3\().2s
      transpose   \v0\().4h,  \v1\().4h,  \t0\().4h,  \t1\().4h
      transpose   \v2\().4h,  \v3\().4h,  \t2\().4h,  \t3\().4h
  .endm
  
  .macro transpose4x8.h v0, v1, v2, v3, t0, t1, t2, t3
      transpose   \t0\().4s,  \t2\().4s,  \v0\().4s,  \v2\().4s
      transpose   \t1\().4s,  \t3\().4s,  \v1\().4s,  \v3\().4s
      transpose   \v0\().8h,  \v1\().8h,  \t0\().8h,  \t1\().8h
      transpose   \v2\().8h,  \v3\().8h,  \t2\().8h,  \t3\().8h
  .endm
  
  
  .macro transpose8x8.h r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
      trn1        \r8\().8h,  \r0\().8h,  \r1\().8h
      trn2        \r9\().8h,  \r0\().8h,  \r1\().8h
      trn1        \r1\().8h,  \r2\().8h,  \r3\().8h
      trn2        \r3\().8h,  \r2\().8h,  \r3\().8h
      trn1        \r0\().8h,  \r4\().8h,  \r5\().8h
      trn2        \r5\().8h,  \r4\().8h,  \r5\().8h
      trn1        \r2\().8h,  \r6\().8h,  \r7\().8h
      trn2        \r7\().8h,  \r6\().8h,  \r7\().8h
  
      trn1        \r4\().4s,  \r0\().4s,  \r2\().4s
      trn2        \r2\().4s,  \r0\().4s,  \r2\().4s
      trn1        \r6\().4s,  \r5\().4s,  \r7\().4s
      trn2        \r7\().4s,  \r5\().4s,  \r7\().4s
      trn1        \r5\().4s,  \r9\().4s,  \r3\().4s
      trn2        \r9\().4s,  \r9\().4s,  \r3\().4s
      trn1        \r3\().4s,  \r8\().4s,  \r1\().4s
      trn2        \r8\().4s,  \r8\().4s,  \r1\().4s
  
      trn1        \r0\().2d,  \r3\().2d,  \r4\().2d
      trn2        \r4\().2d,  \r3\().2d,  \r4\().2d
  
      trn1        \r1\().2d,  \r5\().2d,  \r6\().2d
      trn2        \r5\().2d,  \r5\().2d,  \r6\().2d
  
      trn2        \r6\().2d,  \r8\().2d,  \r2\().2d
      trn1        \r2\().2d,  \r8\().2d,  \r2\().2d
  
      trn1        \r3\().2d,  \r9\().2d,  \r7\().2d
      trn2        \r7\().2d,  \r9\().2d,  \r7\().2d
  .endm
  
  .macro  transpose_8x16.b r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
      trn1        \t0\().16b, \r0\().16b, \r1\().16b
      trn2        \t1\().16b, \r0\().16b, \r1\().16b
      trn1        \r1\().16b, \r2\().16b, \r3\().16b
      trn2        \r3\().16b, \r2\().16b, \r3\().16b
      trn1        \r0\().16b, \r4\().16b, \r5\().16b
      trn2        \r5\().16b, \r4\().16b, \r5\().16b
      trn1        \r2\().16b, \r6\().16b, \r7\().16b
      trn2        \r7\().16b, \r6\().16b, \r7\().16b
  
      trn1        \r4\().8h,  \r0\().8h,  \r2\().8h
      trn2        \r2\().8h,  \r0\().8h,  \r2\().8h
      trn1        \r6\().8h,  \r5\().8h,  \r7\().8h
      trn2        \r7\().8h,  \r5\().8h,  \r7\().8h
      trn1        \r5\().8h,  \t1\().8h,  \r3\().8h
      trn2        \t1\().8h,  \t1\().8h,  \r3\().8h
      trn1        \r3\().8h,  \t0\().8h,  \r1\().8h
      trn2        \t0\().8h,  \t0\().8h,  \r1\().8h
  
      trn1        \r0\().4s,  \r3\().4s,  \r4\().4s
      trn2        \r4\().4s,  \r3\().4s,  \r4\().4s
  
      trn1        \r1\().4s,  \r5\().4s,  \r6\().4s
      trn2        \r5\().4s,  \r5\().4s,  \r6\().4s
  
      trn2        \r6\().4s,  \t0\().4s,  \r2\().4s
      trn1        \r2\().4s,  \t0\().4s,  \r2\().4s
  
      trn1        \r3\().4s,  \t1\().4s,  \r7\().4s
      trn2        \r7\().4s,  \t1\().4s,  \r7\().4s
  .endm
  
  .macro  transpose_4x16.b r0, r1, r2, r3, t4, t5, t6, t7
      trn1        \t4\().16b, \r0\().16b,  \r1\().16b
      trn2        \t5\().16b, \r0\().16b,  \r1\().16b
      trn1        \t6\().16b, \r2\().16b,  \r3\().16b
      trn2        \t7\().16b, \r2\().16b,  \r3\().16b
  
      trn1        \r0\().8h,  \t4\().8h,  \t6\().8h
      trn2        \r2\().8h,  \t4\().8h,  \t6\().8h
      trn1        \r1\().8h,  \t5\().8h,  \t7\().8h
      trn2        \r3\().8h,  \t5\().8h,  \t7\().8h
  .endm
  
  .macro  transpose_4x8.b  r0, r1, r2, r3, t4, t5, t6, t7
      trn1        \t4\().8b,  \r0\().8b,  \r1\().8b
      trn2        \t5\().8b,  \r0\().8b,  \r1\().8b
      trn1        \t6\().8b,  \r2\().8b,  \r3\().8b
      trn2        \t7\().8b,  \r2\().8b,  \r3\().8b
  
      trn1        \r0\().4h,  \t4\().4h,  \t6\().4h
      trn2        \r2\().4h,  \t4\().4h,  \t6\().4h
      trn1        \r1\().4h,  \t5\().4h,  \t7\().4h
      trn2        \r3\().4h,  \t5\().4h,  \t7\().4h
  .endm