FFmpeg  4.3
swscale_ppc_template.c
Go to the documentation of this file.
1 /*
2  * AltiVec-enhanced yuv2yuvX
3  *
4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  * based on the equivalent C code in swscale.c
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 static void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize,
25  const int16_t **src, uint8_t *dest,
26  const uint8_t *dither, int offset, int x)
27 {
28  register int i, j;
29  LOCAL_ALIGNED(16, int, val, [16]);
30  vector signed int vo1, vo2, vo3, vo4;
31  vector unsigned short vs1, vs2;
32  vector unsigned char vf;
33  vector unsigned int altivec_vectorShiftInt19 =
34  vec_add(vec_splat_u32(10), vec_splat_u32(9));
35 
36  for (i = 0; i < 16; i++)
37  val[i] = dither[(x + i + offset) & 7] << 12;
38 
39  vo1 = vec_ld(0, val);
40  vo2 = vec_ld(16, val);
41  vo3 = vec_ld(32, val);
42  vo4 = vec_ld(48, val);
43 
44  for (j = 0; j < filterSize; j++) {
45  unsigned int joffset=j<<1;
46  unsigned int xoffset=x<<1;
47  vector unsigned char av_unused perm;
48  vector signed short l1,vLumFilter;
49  LOAD_FILTER(vLumFilter,filter);
50  vLumFilter = vec_splat(vLumFilter, 0);
51  LOAD_L1(l1,src[j],perm);
52  yuv2planeX_8(vo1, vo2, l1, src[j], x, perm, vLumFilter);
53  yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
54  }
55 
56  vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
57  vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
58  vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
59  vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
60  vs1 = vec_packsu(vo1, vo2);
61  vs2 = vec_packsu(vo3, vo4);
62  vf = vec_packsu(vs1, vs2);
63  VEC_ST(vf, 0, dest);
64 }
65 
66 
67 static inline void yuv2planeX_u(const int16_t *filter, int filterSize,
68  const int16_t **src, uint8_t *dest, int dstW,
69  const uint8_t *dither, int offset, int x)
70 {
71  int i, j;
72 
73  for (i = x; i < dstW; i++) {
74  int t = dither[(i + offset) & 7] << 12;
75  for (j = 0; j < filterSize; j++)
76  t += src[j][i] * filter[j];
77  dest[i] = av_clip_uint8(t >> 19);
78  }
79 }
80 
81 static void FUNC(yuv2planeX)(const int16_t *filter, int filterSize,
82  const int16_t **src, uint8_t *dest, int dstW,
83  const uint8_t *dither, int offset)
84 {
85  int dst_u = -(uintptr_t)dest & 15;
86  int i;
87 
88  yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
89 
90  for (i = dst_u; i < dstW - 15; i += 16)
91  FUNC(yuv2planeX_8_16)(filter, filterSize, src, dest + i, dither,
92  offset, i);
93 
94  yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
95 }
96 
97 static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW,
98  const uint8_t *src, const int16_t *filter,
99  const int32_t *filterPos, int filterSize)
100 {
101  register int i;
102  LOCAL_ALIGNED(16, int, tempo, [4]);
103 
104  if (filterSize % 4) {
105  for (i = 0; i < dstW; i++) {
106  register int j;
107  register int srcPos = filterPos[i];
108  register int val = 0;
109  for (j = 0; j < filterSize; j++)
110  val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
111  dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
112  }
113  } else
114  switch (filterSize) {
115  case 4:
116  for (i = 0; i < dstW; i++) {
117  register int srcPos = filterPos[i];
118 
119  vector unsigned char src_vF = unaligned_load(srcPos, src);
120  vector signed short src_v, filter_v;
121  vector signed int val_vEven, val_s;
122  src_v = // vec_unpackh sign-extends...
123  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
124  // now put our elements in the even slots
125  src_v = vec_mergeh(src_v, (vector signed short)vzero);
126  GET_VF4(i, filter_v, filter);
127  val_vEven = vec_mule(src_v, filter_v);
128  val_s = vec_sums(val_vEven, vzero);
129  vec_st(val_s, 0, tempo);
130  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
131  }
132  break;
133  case 8:
134  for (i = 0; i < dstW; i++) {
135  register int srcPos = filterPos[i];
136  vector unsigned char src_vF, av_unused src_v0, av_unused src_v1;
137  vector unsigned char av_unused permS;
138  vector signed short src_v, filter_v;
139  vector signed int val_v, val_s;
140  FIRST_LOAD(src_v0, srcPos, src, permS);
141  LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
142  src_v = // vec_unpackh sign-extends...
143  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
144  filter_v = vec_ld(i << 4, filter);
145  val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
146  val_s = vec_sums(val_v, vzero);
147  vec_st(val_s, 0, tempo);
148  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
149  }
150  break;
151 
152  case 16:
153  for (i = 0; i < dstW; i++) {
154  register int srcPos = filterPos[i];
155 
156  vector unsigned char src_vF = unaligned_load(srcPos, src);
157  vector signed short src_vA = // vec_unpackh sign-extends...
158  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
159  vector signed short src_vB = // vec_unpackh sign-extends...
160  (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
161  vector signed short filter_v0 = vec_ld(i << 5, filter);
162  vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
163 
164  vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
165  vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
166 
167  vector signed int val_s = vec_sums(val_v, vzero);
168 
169  VEC_ST(val_s, 0, tempo);
170  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
171  }
172  break;
173 
174  default:
175  for (i = 0; i < dstW; i++) {
176  register int j, av_unused offset = i * 2 * filterSize;
177  register int srcPos = filterPos[i];
178 
179  vector signed int val_s, val_v = (vector signed int)vzero;
180  vector signed short av_unused filter_v0R;
181  vector unsigned char av_unused permF, av_unused src_v0, av_unused permS;
182  FIRST_LOAD(filter_v0R, offset, filter, permF);
183  FIRST_LOAD(src_v0, srcPos, src, permS);
184 
185  for (j = 0; j < filterSize - 15; j += 16) {
186  vector unsigned char av_unused src_v1, src_vF;
187  vector signed short av_unused filter_v1R, av_unused filter_v2R,
188  filter_v0, filter_v1, src_vA, src_vB;
189  vector signed int val_acc;
190  LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF);
191  src_vA = // vec_unpackh sign-extends...
192  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
193  src_vB = // vec_unpackh sign-extends...
194  (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
195  GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0);
196  GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16);
197 
198  val_acc = vec_msums(src_vA, filter_v0, val_v);
199  val_v = vec_msums(src_vB, filter_v1, val_acc);
200  UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0);
201  }
202 
203  if (j < filterSize - 7) {
204  // loading src_v0 is useless, it's already done above
205  vector unsigned char av_unused src_v1, src_vF;
206  vector signed short src_v, av_unused filter_v1R, filter_v;
207  LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF);
208  src_v = // vec_unpackh sign-extends...
209  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
210  GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0);
211  val_v = vec_msums(src_v, filter_v, val_v);
212  }
213  val_s = vec_sums(val_v, vzero);
214 
215  VEC_ST(val_s, 0, tempo);
216  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
217  }
218  }
219 }
av_unused
#define av_unused
Definition: attributes.h:131
yuv2planeX
static void FUNC() yuv2planeX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: swscale_ppc_template.c:81
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
x
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo x
Definition: fate.txt:150
perm
perm
Definition: f_perms.c:74
val
static double val(void *priv, double ch)
Definition: aeval.c:76
int32_t
int32_t
Definition: audio_convert.c:194
src
#define src
Definition: vp8dsp.c:254
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
yuv2planeX_u
static void yuv2planeX_u(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset, int x)
Definition: swscale_ppc_template.c:67
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
uint8_t
uint8_t
Definition: audio_convert.c:194
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:104
hScale_real
static void FUNC() hScale_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale_ppc_template.c:97
LOCAL_ALIGNED
#define LOCAL_ALIGNED(a, t, v,...)
Definition: internal.h:114
int
int
Definition: ffmpeg_filter.c:192
SwsContext
Definition: swscale_internal.h:280
short
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short
Definition: writing_filters.txt:89
yuv2planeX_8_16
static void FUNC() yuv2planeX_8_16(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, const uint8_t *dither, int offset, int x)
Definition: swscale_ppc_template.c:24
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:57