Blame view

ffmpeg-4.2.2/libavcodec/x86/huffyuvencdsp.asm 2.95 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  ;************************************************************************
  ;* SIMD-optimized HuffYUV encoding functions
  ;* Copyright (c) 2000, 2001 Fabrice Bellard
  ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  ;*
  ;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  ;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
  ;*
  ;* This file is part of FFmpeg.
  ;*
  ;* FFmpeg is free software; you can redistribute it and/or
  ;* modify it under the terms of the GNU Lesser General Public
  ;* License as published by the Free Software Foundation; either
  ;* version 2.1 of the License, or (at your option) any later version.
  ;*
  ;* FFmpeg is distributed in the hope that it will be useful,
  ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  ;* Lesser General Public License for more details.
  ;*
  ;* You should have received a copy of the GNU Lesser General Public
  ;* License along with FFmpeg; if not, write to the Free Software
  ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  ;******************************************************************************
  
  %include "libavutil/x86/x86util.asm"
  
  SECTION .text
  
  %include "libavcodec/x86/huffyuvdsp_template.asm"
  
  ;------------------------------------------------------------------------------
  ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
  ;                    unsigned mask, int w);
  ;------------------------------------------------------------------------------
  
  %macro DIFF_INT16 0
  cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
  %if mmsize > 8
      test src1q, mmsize-1
      jnz .unaligned
      test src2q, mmsize-1
      jnz .unaligned
      test dstq, mmsize-1
      jnz .unaligned
  %endif
      INT16_LOOP a, sub
  %if mmsize > 8
  .unaligned:
      INT16_LOOP u, sub
  %endif
  %endmacro
  
  %if ARCH_X86_32
  INIT_MMX mmx
  DIFF_INT16
  %endif
  
  INIT_XMM sse2
  DIFF_INT16
  
  %if HAVE_AVX2_EXTERNAL
  INIT_YMM avx2
  DIFF_INT16
  %endif
  
  INIT_MMX mmxext
  cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
      add      wd, wd
      movd    mm7, maskd
      SPLATW  mm7, mm7
      movq    mm0, [src1q]
      movq    mm2, [src2q]
      psllq   mm0, 16
      psllq   mm2, 16
      movd    mm6, [left_topq]
      por     mm0, mm6
      movd    mm6, [leftq]
      por     mm2, mm6
      xor     maskq, maskq
  .loop:
      movq    mm1, [src1q + maskq]
      movq    mm3, [src2q + maskq]
      movq    mm4, mm2
      psubw   mm2, mm0
      paddw   mm2, mm1
      pand    mm2, mm7
      movq    mm5, mm4
      pmaxsw  mm4, mm1
      pminsw  mm1, mm5
      pminsw  mm4, mm2
      pmaxsw  mm4, mm1
      psubw   mm3, mm4
      pand    mm3, mm7
      movq    [dstq + maskq], mm3
      add     maskq, 8
      movq    mm0, [src1q + maskq - 2]
      movq    mm2, [src2q + maskq - 2]
      cmp     maskq, wq
          jb .loop
      movzx maskd, word [src1q + wq - 2]
      mov [left_topq], maskd
      movzx maskd, word [src2q + wq - 2]
      mov [leftq], maskd
      RET