rectangle.h
5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*****************************************************************************
* rectangle.h: rectangle filling
*****************************************************************************
* Copyright (C) 2003-2024 x264 project
*
* Authors: Fiona Glaser <fiona@x264.com>
* Loren Merritt <lorenm@u.washington.edu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
/* This function should only be called with constant w / h / s arguments! */
static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, int s, uint32_t v )
{
uint8_t *d = dst;
uint16_t v2 = s >= 2 ? v : v * 0x101;
uint32_t v4 = s >= 4 ? v : s >= 2 ? v * 0x10001 : v * 0x1010101;
uint64_t v8 = v4 + ((uint64_t)v4 << 32);
s *= 8;
if( w == 2 )
{
M16( d+s*0 ) = v2;
if( h == 1 ) return;
M16( d+s*1 ) = v2;
if( h == 2 ) return;
M16( d+s*2 ) = v2;
M16( d+s*3 ) = v2;
}
else if( w == 4 )
{
M32( d+s*0 ) = v4;
if( h == 1 ) return;
M32( d+s*1 ) = v4;
if( h == 2 ) return;
M32( d+s*2 ) = v4;
M32( d+s*3 ) = v4;
}
else if( w == 8 )
{
if( WORD_SIZE == 8 )
{
M64( d+s*0 ) = v8;
if( h == 1 ) return;
M64( d+s*1 ) = v8;
if( h == 2 ) return;
M64( d+s*2 ) = v8;
M64( d+s*3 ) = v8;
}
else
{
M32( d+s*0+0 ) = v4;
M32( d+s*0+4 ) = v4;
if( h == 1 ) return;
M32( d+s*1+0 ) = v4;
M32( d+s*1+4 ) = v4;
if( h == 2 ) return;
M32( d+s*2+0 ) = v4;
M32( d+s*2+4 ) = v4;
M32( d+s*3+0 ) = v4;
M32( d+s*3+4 ) = v4;
}
}
else if( w == 16 )
{
/* height 1, width 16 doesn't occur */
assert( h != 1 );
#if HAVE_VECTOREXT && defined(__SSE__)
v4si v16 = {v,v,v,v};
M128( d+s*0+0 ) = (__m128)v16;
M128( d+s*1+0 ) = (__m128)v16;
if( h == 2 ) return;
M128( d+s*2+0 ) = (__m128)v16;
M128( d+s*3+0 ) = (__m128)v16;
#else
if( WORD_SIZE == 8 )
{
do
{
M64( d+s*0+0 ) = v8;
M64( d+s*0+8 ) = v8;
M64( d+s*1+0 ) = v8;
M64( d+s*1+8 ) = v8;
h -= 2;
d += s*2;
} while( h );
}
else
{
do
{
M32( d+ 0 ) = v4;
M32( d+ 4 ) = v4;
M32( d+ 8 ) = v4;
M32( d+12 ) = v4;
d += s;
} while( --h );
}
#endif
}
else
assert(0);
}
#define x264_cache_mv_func_table x264_template(cache_mv_func_table)
extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);
#define x264_cache_mvd_func_table x264_template(cache_mvd_func_table)
extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);
#define x264_cache_ref_func_table x264_template(cache_ref_func_table)
extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);
#define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
{
void *mv_cache = &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y];
if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
x264_cache_mv_func_table[width + (height<<1)-3]( mv_cache, mv );
else
x264_macroblock_cache_rect( mv_cache, width*4, height, 4, mv );
}
static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mvd )
{
void *mvd_cache = &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y];
if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
x264_cache_mvd_func_table[width + (height<<1)-3]( mvd_cache, mvd );
else
x264_macroblock_cache_rect( mvd_cache, width*2, height, 2, mvd );
}
static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, int8_t ref )
{
void *ref_cache = &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y];
if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, (uint8_t)ref );
else
x264_macroblock_cache_rect( ref_cache, width, height, 1, (uint8_t)ref );
}
static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
{
x264_macroblock_cache_rect( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, 1, b_skip );
}
static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
{
x264_macroblock_cache_rect( &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y], 2, 2, 1, i_mode );
}