FFmpeg  4.3
simple_idct_alpha.c
Go to the documentation of this file.
1 /*
2  * Simple IDCT (Alpha optimized)
3  *
4  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * based upon some outcommented C code from mpeg2dec (idct_mmx.c
7  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
8  *
9  * Alpha optimizations by Måns Rullgård <mans@mansr.com>
10  * and Falk Hueffner <falk@debian.org>
11  *
12  * This file is part of FFmpeg.
13  *
14  * FFmpeg is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * FFmpeg is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with FFmpeg; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 #include "idctdsp_alpha.h"
30 #include "asm.h"
31 
32 // cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
33 // W4 is actually exactly 16384, but using 16383 works around
34 // accumulating rounding errors for some encoders
35 #define W1 22725
36 #define W2 21407
37 #define W3 19266
38 #define W4 16383
39 #define W5 12873
40 #define W6 8867
41 #define W7 4520
42 #define ROW_SHIFT 11
43 #define COL_SHIFT 20
44 
45 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
46 static inline int idct_row(int16_t *row)
47 {
48  int a0, a1, a2, a3, b0, b1, b2, b3, t;
49  uint64_t l, r, t2;
50  l = ldq(row);
51  r = ldq(row + 4);
52 
53  if (l == 0 && r == 0)
54  return 0;
55 
56  a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
57 
58  if (((l & ~0xffffUL) | r) == 0) {
59  a0 >>= ROW_SHIFT;
60  t2 = (uint16_t) a0;
61  t2 |= t2 << 16;
62  t2 |= t2 << 32;
63 
64  stq(t2, row);
65  stq(t2, row + 4);
66  return 1;
67  }
68 
69  a1 = a0;
70  a2 = a0;
71  a3 = a0;
72 
73  t = extwl(l, 4); /* row[2] */
74  if (t != 0) {
75  t = sextw(t);
76  a0 += W2 * t;
77  a1 += W6 * t;
78  a2 -= W6 * t;
79  a3 -= W2 * t;
80  }
81 
82  t = extwl(r, 0); /* row[4] */
83  if (t != 0) {
84  t = sextw(t);
85  a0 += W4 * t;
86  a1 -= W4 * t;
87  a2 -= W4 * t;
88  a3 += W4 * t;
89  }
90 
91  t = extwl(r, 4); /* row[6] */
92  if (t != 0) {
93  t = sextw(t);
94  a0 += W6 * t;
95  a1 -= W2 * t;
96  a2 += W2 * t;
97  a3 -= W6 * t;
98  }
99 
100  t = extwl(l, 2); /* row[1] */
101  if (t != 0) {
102  t = sextw(t);
103  b0 = W1 * t;
104  b1 = W3 * t;
105  b2 = W5 * t;
106  b3 = W7 * t;
107  } else {
108  b0 = 0;
109  b1 = 0;
110  b2 = 0;
111  b3 = 0;
112  }
113 
114  t = extwl(l, 6); /* row[3] */
115  if (t) {
116  t = sextw(t);
117  b0 += W3 * t;
118  b1 -= W7 * t;
119  b2 -= W1 * t;
120  b3 -= W5 * t;
121  }
122 
123 
124  t = extwl(r, 2); /* row[5] */
125  if (t) {
126  t = sextw(t);
127  b0 += W5 * t;
128  b1 -= W1 * t;
129  b2 += W7 * t;
130  b3 += W3 * t;
131  }
132 
133  t = extwl(r, 6); /* row[7] */
134  if (t) {
135  t = sextw(t);
136  b0 += W7 * t;
137  b1 -= W5 * t;
138  b2 += W3 * t;
139  b3 -= W1 * t;
140  }
141 
142  row[0] = (a0 + b0) >> ROW_SHIFT;
143  row[1] = (a1 + b1) >> ROW_SHIFT;
144  row[2] = (a2 + b2) >> ROW_SHIFT;
145  row[3] = (a3 + b3) >> ROW_SHIFT;
146  row[4] = (a3 - b3) >> ROW_SHIFT;
147  row[5] = (a2 - b2) >> ROW_SHIFT;
148  row[6] = (a1 - b1) >> ROW_SHIFT;
149  row[7] = (a0 - b0) >> ROW_SHIFT;
150 
151  return 2;
152 }
153 
154 static inline void idct_col(int16_t *col)
155 {
156  int a0, a1, a2, a3, b0, b1, b2, b3;
157 
158  col[0] += (1 << (COL_SHIFT - 1)) / W4;
159 
160  a0 = W4 * col[8 * 0];
161  a1 = W4 * col[8 * 0];
162  a2 = W4 * col[8 * 0];
163  a3 = W4 * col[8 * 0];
164 
165  if (col[8 * 2]) {
166  a0 += W2 * col[8 * 2];
167  a1 += W6 * col[8 * 2];
168  a2 -= W6 * col[8 * 2];
169  a3 -= W2 * col[8 * 2];
170  }
171 
172  if (col[8 * 4]) {
173  a0 += W4 * col[8 * 4];
174  a1 -= W4 * col[8 * 4];
175  a2 -= W4 * col[8 * 4];
176  a3 += W4 * col[8 * 4];
177  }
178 
179  if (col[8 * 6]) {
180  a0 += W6 * col[8 * 6];
181  a1 -= W2 * col[8 * 6];
182  a2 += W2 * col[8 * 6];
183  a3 -= W6 * col[8 * 6];
184  }
185 
186  if (col[8 * 1]) {
187  b0 = W1 * col[8 * 1];
188  b1 = W3 * col[8 * 1];
189  b2 = W5 * col[8 * 1];
190  b3 = W7 * col[8 * 1];
191  } else {
192  b0 = 0;
193  b1 = 0;
194  b2 = 0;
195  b3 = 0;
196  }
197 
198  if (col[8 * 3]) {
199  b0 += W3 * col[8 * 3];
200  b1 -= W7 * col[8 * 3];
201  b2 -= W1 * col[8 * 3];
202  b3 -= W5 * col[8 * 3];
203  }
204 
205  if (col[8 * 5]) {
206  b0 += W5 * col[8 * 5];
207  b1 -= W1 * col[8 * 5];
208  b2 += W7 * col[8 * 5];
209  b3 += W3 * col[8 * 5];
210  }
211 
212  if (col[8 * 7]) {
213  b0 += W7 * col[8 * 7];
214  b1 -= W5 * col[8 * 7];
215  b2 += W3 * col[8 * 7];
216  b3 -= W1 * col[8 * 7];
217  }
218 
219  col[8 * 0] = (a0 + b0) >> COL_SHIFT;
220  col[8 * 7] = (a0 - b0) >> COL_SHIFT;
221  col[8 * 1] = (a1 + b1) >> COL_SHIFT;
222  col[8 * 6] = (a1 - b1) >> COL_SHIFT;
223  col[8 * 2] = (a2 + b2) >> COL_SHIFT;
224  col[8 * 5] = (a2 - b2) >> COL_SHIFT;
225  col[8 * 3] = (a3 + b3) >> COL_SHIFT;
226  col[8 * 4] = (a3 - b3) >> COL_SHIFT;
227 }
228 
229 /* If all rows but the first one are zero after row transformation,
230  all rows will be identical after column transformation. */
231 static inline void idct_col2(int16_t *col)
232 {
233  int i;
234  uint64_t l, r;
235 
236  for (i = 0; i < 8; ++i) {
237  int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
238 
239  a0 *= W4;
240  col[i] = a0 >> COL_SHIFT;
241  }
242 
243  l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
244  stq(l, col + 2 * 4); stq(r, col + 3 * 4);
245  stq(l, col + 4 * 4); stq(r, col + 5 * 4);
246  stq(l, col + 6 * 4); stq(r, col + 7 * 4);
247  stq(l, col + 8 * 4); stq(r, col + 9 * 4);
248  stq(l, col + 10 * 4); stq(r, col + 11 * 4);
249  stq(l, col + 12 * 4); stq(r, col + 13 * 4);
250  stq(l, col + 14 * 4); stq(r, col + 15 * 4);
251 }
252 
253 void ff_simple_idct_axp(int16_t *block)
254 {
255 
256  int i;
257  int rowsZero = 1; /* all rows except row 0 zero */
258  int rowsConstant = 1; /* all rows consist of a constant value */
259 
260  for (i = 0; i < 8; i++) {
261  int sparseness = idct_row(block + 8 * i);
262 
263  if (i > 0 && sparseness > 0)
264  rowsZero = 0;
265  if (sparseness == 2)
266  rowsConstant = 0;
267  }
268 
269  if (rowsZero) {
270  idct_col2(block);
271  } else if (rowsConstant) {
272  idct_col(block);
273  for (i = 0; i < 8; i += 2) {
274  uint64_t v = (uint16_t) block[0];
275  uint64_t w = (uint16_t) block[8];
276 
277  v |= v << 16;
278  w |= w << 16;
279  v |= v << 32;
280  w |= w << 32;
281  stq(v, block + 0 * 4);
282  stq(v, block + 1 * 4);
283  stq(w, block + 2 * 4);
284  stq(w, block + 3 * 4);
285  block += 4 * 4;
286  }
287  } else {
288  for (i = 0; i < 8; i++)
289  idct_col(block + i);
290  }
291 }
292 
293 void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
294 {
296  put_pixels_clamped_axp_p(block, dest, line_size);
297 }
298 
299 void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
300 {
302  add_pixels_clamped_axp_p(block, dest, line_size);
303 }
idct_col2
static void idct_col2(int16_t *col)
Definition: simple_idct_alpha.c:231
W5
#define W5
Definition: simple_idct_alpha.c:39
W1
#define W1
Definition: simple_idct_alpha.c:35
ldq
#define ldq(p)
Definition: asm.h:59
W6
#define W6
Definition: simple_idct_alpha.c:40
asm.h
sextw
#define sextw(x)
Definition: asm.h:56
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:1332
ff_simple_idct_put_axp
void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: simple_idct_alpha.c:293
W7
#define W7
Definition: simple_idct_alpha.c:41
a1
#define a1
Definition: regdef.h:47
ff_simple_idct_add_axp
void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: simple_idct_alpha.c:299
extwl
#define extwl(a, b)
Definition: asm.h:109
ff_simple_idct_axp
void ff_simple_idct_axp(int16_t *block)
Definition: simple_idct_alpha.c:253
W2
#define W2
Definition: simple_idct_alpha.c:36
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:1334
ROW_SHIFT
#define ROW_SHIFT
Definition: simple_idct_alpha.c:42
W4
#define W4
Definition: simple_idct_alpha.c:38
W3
#define W3
Definition: simple_idct_alpha.c:37
idct_row
static int idct_row(int16_t *row)
Definition: simple_idct_alpha.c:46
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:1333
a0
#define a0
Definition: regdef.h:46
r
#define r
Definition: input.c:40
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
add_pixels_clamped_axp_p
void(* add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:33
a2
#define a2
Definition: regdef.h:48
uint8_t
uint8_t
Definition: audio_convert.c:194
idct_col
static void idct_col(int16_t *col)
Definition: simple_idct_alpha.c:154
w
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
Definition: fate.txt:150
stq
#define stq(l, p)
Definition: asm.h:69
t2
#define t2
Definition: regdef.h:30
COL_SHIFT
#define COL_SHIFT
Definition: simple_idct_alpha.c:43
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
b0
static double b0(void *priv, double x, double y)
Definition: vf_xfade.c:1331
put_pixels_clamped_axp_p
void(* put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:31
a3
#define a3
Definition: regdef.h:49
idctdsp_alpha.h