Blame view

ffmpeg-4.2.2/libavcodec/alpha/me_cmp_mvi_asm.S 6.06 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  /*
   * Alpha optimized DSP utils
   * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
   *
   * This file is part of FFmpeg.
   *
   * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
   * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include "regdef.h"
  
  /* Some nicer register names.  */
  #define ta t10
  #define tb t11
  #define tc t12
  #define td AT
  /* Danger: these overlap with the argument list and the return value */
  #define te a5
  #define tf a4
  #define tg a3
  #define th v0
  
          .set noat
          .set noreorder
          .arch pca56
          .text
  
  /*****************************************************************************
   * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
   *
   * This code is written with a pca56 in mind. For ev6, one should
   * really take the increased latency of 3 cycles for MVI instructions
   * into account.
   *
   * It is important to keep the loading and first use of a register as
   * far apart as possible, because if a register is accessed before it
   * has been fetched from memory, the CPU will stall.
   */
          .align 4
          .globl pix_abs16x16_mvi_asm
          .ent pix_abs16x16_mvi_asm
  pix_abs16x16_mvi_asm:
          .frame sp, 0, ra, 0
          .prologue 0
  
          and     a2, 7, t0
          clr     v0
          beq     t0, $aligned
          .align 4
  $unaligned:
          /* Registers:
             line 0:
             t0:  left_u -> left lo -> left
             t1:  mid
             t2:  right_u -> right hi -> right
             t3:  ref left
             t4:  ref right
             line 1:
             t5:  left_u -> left lo -> left
             t6:  mid
             t7:  right_u -> right hi -> right
             t8:  ref left
             t9:  ref right
             temp:
             ta:  left hi
             tb:  right lo
             tc:  error left
             td:  error right  */
  
          /* load line 0 */
          ldq_u   t0, 0(a2)       # left_u
          ldq_u   t1, 8(a2)       # mid
          ldq_u   t2, 16(a2)      # right_u
          ldq     t3, 0(a1)       # ref left
          ldq     t4, 8(a1)       # ref right
          addq    a1, a3, a1      # pix1
          addq    a2, a3, a2      # pix2
          /* load line 1 */
          ldq_u   t5, 0(a2)       # left_u
          ldq_u   t6, 8(a2)       # mid
          ldq_u   t7, 16(a2)      # right_u
          ldq     t8, 0(a1)       # ref left
          ldq     t9, 8(a1)       # ref right
          addq    a1, a3, a1      # pix1
          addq    a2, a3, a2      # pix2
          /* calc line 0 */
          extql   t0, a2, t0      # left lo
          extqh   t1, a2, ta      # left hi
          extql   t1, a2, tb      # right lo
          or      t0, ta, t0      # left
          extqh   t2, a2, t2      # right hi
          perr    t3, t0, tc      # error left
          or      t2, tb, t2      # right
          perr    t4, t2, td      # error right
          addq    v0, tc, v0      # add error left
          addq    v0, td, v0      # add error left
          /* calc line 1 */
          extql   t5, a2, t5      # left lo
          extqh   t6, a2, ta      # left hi
          extql   t6, a2, tb      # right lo
          or      t5, ta, t5      # left
          extqh   t7, a2, t7      # right hi
          perr    t8, t5, tc      # error left
          or      t7, tb, t7      # right
          perr    t9, t7, td      # error right
          addq    v0, tc, v0      # add error left
          addq    v0, td, v0      # add error left
          /* loop */
          subq    a4,  2, a4      # h -= 2
          bne     a4, $unaligned
          ret
  
          .align 4
  $aligned:
          /* load line 0 */
          ldq     t0, 0(a2)       # left
          ldq     t1, 8(a2)       # right
          addq    a2, a3, a2      # pix2
          ldq     t2, 0(a1)       # ref left
          ldq     t3, 8(a1)       # ref right
          addq    a1, a3, a1      # pix1
          /* load line 1 */
          ldq     t4, 0(a2)       # left
          ldq     t5, 8(a2)       # right
          addq    a2, a3, a2      # pix2
          ldq     t6, 0(a1)       # ref left
          ldq     t7, 8(a1)       # ref right
          addq    a1, a3, a1      # pix1
          /* load line 2 */
          ldq     t8, 0(a2)       # left
          ldq     t9, 8(a2)       # right
          addq    a2, a3, a2      # pix2
          ldq     ta, 0(a1)       # ref left
          ldq     tb, 8(a1)       # ref right
          addq    a1, a3, a1      # pix1
          /* load line 3 */
          ldq     tc, 0(a2)       # left
          ldq     td, 8(a2)       # right
          addq    a2, a3, a2      # pix2
          ldq     te, 0(a1)       # ref left
          ldq     a0, 8(a1)       # ref right
          /* calc line 0 */
          perr    t0, t2, t0      # error left
          addq    a1, a3, a1      # pix1
          perr    t1, t3, t1      # error right
          addq    v0, t0, v0      # add error left
          /* calc line 1 */
          perr    t4, t6, t0      # error left
          addq    v0, t1, v0      # add error right
          perr    t5, t7, t1      # error right
          addq    v0, t0, v0      # add error left
          /* calc line 2 */
          perr    t8, ta, t0      # error left
          addq    v0, t1, v0      # add error right
          perr    t9, tb, t1      # error right
          addq    v0, t0, v0      # add error left
          /* calc line 3 */
          perr    tc, te, t0      # error left
          addq    v0, t1, v0      # add error right
          perr    td, a0, t1      # error right
          addq    v0, t0, v0      # add error left
          addq    v0, t1, v0      # add error right
          /* loop */
          subq    a4,  4, a4      # h -= 4
          bne     a4, $aligned
          ret
          .end pix_abs16x16_mvi_asm