Blame view

ffmpeg-4.2.2/libavcodec/alpha/pixblockdsp_alpha.c 2.26 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  /*
   * SIMD-optimized pixel operations
   *
   * This file is part of FFmpeg.
   *
   * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
   * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include "libavutil/attributes.h"
  #include "libavcodec/pixblockdsp.h"
  #include "asm.h"
  
  static void get_pixels_mvi(int16_t *restrict block,
                             const uint8_t *restrict pixels, ptrdiff_t stride)
  {
      int h = 8;
  
      do {
          uint64_t p;
  
          p = ldq(pixels);
          stq(unpkbw(p),       block);
          stq(unpkbw(p >> 32), block + 4);
  
          pixels += stride;
          block += 8;
      } while (--h);
  }
  
  static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                              ptrdiff_t stride)
  {
      int h = 8;
      uint64_t mask = 0x4040;
  
      mask |= mask << 16;
      mask |= mask << 32;
      do {
          uint64_t x, y, c, d, a;
          uint64_t signs;
  
          x = ldq(s1);
          y = ldq(s2);
          c = cmpbge(x, y);
          d = x - y;
          a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
          d += 4 * a;             /* ...so we can use s4addq here.      */
          signs = zap(-1, c);
  
          stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
          stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
  
          s1 += stride;
          s2 += stride;
          block += 8;
      } while (--h);
  }
  
  av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
                                         unsigned high_bit_depth)
  {
      if (amask(AMASK_MVI) == 0) {
          if (!high_bit_depth)
              c->get_pixels = get_pixels_mvi;
          c->diff_pixels = diff_pixels_mvi;
      }
  }