cabac-a.S
4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*****************************************************************************
* cabac-a.S: aarch64 cabac
*****************************************************************************
* Copyright (C) 2014-2024 x264 project
*
* Authors: Janne Grunau <janne-x264@jannau.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#include "asm.S"
#include "asm-offsets.h"
// w11 holds x264_cabac_t.i_low
// w12 holds x264_cabac_t.i_range
function cabac_encode_decision_asm, export=1
add w10, w1, #CABAC_STATE
ldrb w3, [x0, w10, uxtw] // i_state
ldr w12, [x0, #CABAC_I_RANGE]
movrel x8, X264(cabac_range_lps), -4
movrel x9, X264(cabac_transition)
ubfx x4, x3, #1, #7
asr w5, w12, #6
add x8, x8, x4, lsl #2
orr w14, w2, w3, lsl #1
ldrb w4, [x8, w5, uxtw] // i_range_lps
ldr w11, [x0, #CABAC_I_LOW]
eor w6, w2, w3 // b ^ i_state
ldrb w9, [x9, w14, uxtw]
sub w12, w12, w4
add w7, w11, w12
tst w6, #1 // (b ^ i_state) & 1
csel w12, w4, w12, ne
csel w11, w7, w11, ne
strb w9, [x0, w10, uxtw] // i_state
cabac_encode_renorm:
ldr w2, [x0, #CABAC_I_QUEUE]
clz w5, w12
sub w5, w5, #23
lsl w11, w11, w5
lsl w12, w12, w5
adds w2, w2, w5
b.ge cabac_putbyte
stp w11, w12, [x0, #CABAC_I_LOW] // store i_low, i_range
str w2, [x0, #CABAC_I_QUEUE]
ret
.align 5
cabac_putbyte:
ldr w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
add w14, w2, #10
mov w13, #-1
sub w2, w2, #8
asr w4, w11, w14 // out
lsl w13, w13, w14
subs w5, w4, #0xff
bic w11, w11, w13
cinc w6, w6, eq
b.eq 0f
1:
ldr x7, [x0, #CABAC_P]
asr w5, w4, #8 // carry
ldurb w8, [x7, #-1]
add w8, w8, w5
sub w5, w5, #1
sturb w8, [x7, #-1]
cbz w6, 3f
2:
subs w6, w6, #1
strb w5, [x7], #1
b.gt 2b
3:
strb w4, [x7], #1
str x7, [x0, #CABAC_P]
0:
stp w11, w12, [x0, #CABAC_I_LOW] // store i_low, i_range
stp w2, w6, [x0, #CABAC_I_QUEUE] // store i_queue, i_bytes_outstanding
ret
endfunc
function cabac_encode_bypass_asm, export=1, align=5
ldr w12, [x0, #CABAC_I_RANGE]
ldr w11, [x0, #CABAC_I_LOW]
ldr w2, [x0, #CABAC_I_QUEUE]
and w1, w1, w12
add w11, w1, w11, lsl #1
adds w2, w2, #1
b.ge cabac_putbyte
str w11, [x0, #CABAC_I_LOW]
str w2, [x0, #CABAC_I_QUEUE]
ret
endfunc
function cabac_encode_terminal_asm, export=1, align=5
ldr w12, [x0, #CABAC_I_RANGE]
sub w12, w12, #2
tbz w12, #8, 1f
str w12, [x0, #CABAC_I_RANGE]
ret
1:
ldr w2, [x0, #CABAC_I_QUEUE]
ldr w11, [x0, #CABAC_I_LOW]
lsl w12, w12, #1
adds w2, w2, #1
lsl w11, w11, #1
b.ge cabac_putbyte
stp w11, w12, [x0, #CABAC_I_LOW] // store i_low, i_range
str w2, [x0, #CABAC_I_QUEUE]
ret
endfunc