Blame view

3rdparty/ffmpeg-4.4.4/x264/common/aarch64/cabac-a.S 4.3 KB
6fdcb6a5   Hu Chunming   初次提交,代码大体完成编写,完善中
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
  /*****************************************************************************
   * cabac-a.S: aarch64 cabac
   *****************************************************************************
   * Copyright (C) 2014-2024 x264 project
   *
   * Authors: Janne Grunau <janne-x264@jannau.net>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 2 of the License, or
   * (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
   *
   * This program is also available under a commercial proprietary license.
   * For more information, contact us at licensing@x264.com.
   *****************************************************************************/
  
  #include "asm.S"
  #include "asm-offsets.h"
  
  // w11 holds x264_cabac_t.i_low
  // w12 holds x264_cabac_t.i_range
  
  function cabac_encode_decision_asm, export=1
      add         w10, w1,  #CABAC_STATE
      ldrb        w3,  [x0,  w10, uxtw]           // i_state
      ldr         w12, [x0,  #CABAC_I_RANGE]
      movrel      x8,  X264(cabac_range_lps), -4
      movrel      x9,  X264(cabac_transition)
      ubfx        x4,  x3,  #1,  #7
      asr         w5,  w12, #6
      add         x8,  x8,  x4, lsl #2
      orr         w14, w2,  w3, lsl #1
      ldrb        w4,  [x8,  w5,  uxtw]           // i_range_lps
      ldr         w11, [x0,  #CABAC_I_LOW]
      eor         w6,  w2,  w3                    // b ^ i_state
      ldrb        w9,  [x9,  w14, uxtw]
      sub         w12, w12, w4
      add         w7,  w11, w12
      tst         w6,  #1                         // (b ^ i_state) & 1
      csel        w12, w4, w12, ne
      csel        w11, w7, w11, ne
      strb        w9,  [x0,  w10, uxtw]           // i_state
  
  cabac_encode_renorm:
      ldr         w2,  [x0, #CABAC_I_QUEUE]
      clz         w5,  w12
      sub         w5,  w5,  #23
      lsl         w11, w11, w5
      lsl         w12, w12, w5
      adds        w2,  w2,  w5
      b.ge        cabac_putbyte
  
      stp         w11, w12, [x0, #CABAC_I_LOW]    // store i_low, i_range
      str         w2,  [x0, #CABAC_I_QUEUE]
      ret
  
  .align 5
  cabac_putbyte:
      ldr         w6,  [x0, #CABAC_I_BYTES_OUTSTANDING]
      add         w14, w2,  #10
      mov         w13, #-1
      sub         w2,  w2,  #8
      asr         w4,  w11, w14           // out
      lsl         w13, w13, w14
      subs        w5,  w4,  #0xff
      bic         w11, w11, w13
      cinc        w6,  w6,  eq
      b.eq        0f
  
  1:
      ldr         x7,  [x0, #CABAC_P]
      asr         w5,  w4,  #8            // carry
      ldurb       w8,  [x7, #-1]
      add         w8,  w8,  w5
      sub         w5,  w5,  #1
      sturb       w8,  [x7, #-1]
      cbz         w6,  3f
  2:
      subs        w6,  w6,  #1
      strb        w5,  [x7],  #1
      b.gt        2b
  3:
      strb        w4,  [x7],  #1
      str         x7,  [x0, #CABAC_P]
  0:
      stp         w11, w12, [x0, #CABAC_I_LOW]    // store i_low, i_range
      stp         w2,  w6,  [x0, #CABAC_I_QUEUE]  // store i_queue, i_bytes_outstanding
      ret
  endfunc
  
  function cabac_encode_bypass_asm, export=1, align=5
      ldr         w12, [x0, #CABAC_I_RANGE]
      ldr         w11, [x0, #CABAC_I_LOW]
      ldr         w2,  [x0, #CABAC_I_QUEUE]
      and         w1,  w1,  w12
      add         w11, w1,  w11, lsl #1
      adds        w2,  w2,  #1
      b.ge        cabac_putbyte
      str         w11, [x0, #CABAC_I_LOW]
      str         w2,  [x0, #CABAC_I_QUEUE]
      ret
  endfunc
  
  function cabac_encode_terminal_asm, export=1, align=5
      ldr         w12, [x0, #CABAC_I_RANGE]
      sub         w12, w12, #2
      tbz         w12, #8, 1f
  
      str         w12, [x0, #CABAC_I_RANGE]
      ret
  1:
      ldr         w2,  [x0, #CABAC_I_QUEUE]
      ldr         w11, [x0, #CABAC_I_LOW]
      lsl         w12, w12, #1
      adds        w2,  w2,  #1
      lsl         w11, w11, #1
      b.ge        cabac_putbyte
  
      stp         w11, w12, [x0, #CABAC_I_LOW]    // store i_low, i_range
      str         w2,  [x0, #CABAC_I_QUEUE]
      ret
  endfunc