FFmpeg  4.3
videodsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2002-2012 Michael Niedermayer
3  * Copyright (C) 2012 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config.h"
23 #include "libavutil/attributes.h"
24 #include "libavutil/avassert.h"
25 #include "libavutil/common.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/x86/asm.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/videodsp.h"
31 
32 #if HAVE_X86ASM
33 typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride,
34  const uint8_t *src, x86_reg src_stride,
35  x86_reg start_y, x86_reg end_y, x86_reg bh);
36 typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride,
37  const uint8_t *src, x86_reg src_stride,
38  x86_reg start_y, x86_reg end_y, x86_reg bh,
39  x86_reg w);
40 
41 extern emu_edge_vfix_func ff_emu_edge_vfix1_mmx;
42 extern emu_edge_vfix_func ff_emu_edge_vfix2_mmx;
43 extern emu_edge_vfix_func ff_emu_edge_vfix3_mmx;
44 extern emu_edge_vfix_func ff_emu_edge_vfix4_mmx;
45 extern emu_edge_vfix_func ff_emu_edge_vfix5_mmx;
46 extern emu_edge_vfix_func ff_emu_edge_vfix6_mmx;
47 extern emu_edge_vfix_func ff_emu_edge_vfix7_mmx;
48 extern emu_edge_vfix_func ff_emu_edge_vfix8_mmx;
49 extern emu_edge_vfix_func ff_emu_edge_vfix9_mmx;
50 extern emu_edge_vfix_func ff_emu_edge_vfix10_mmx;
51 extern emu_edge_vfix_func ff_emu_edge_vfix11_mmx;
52 extern emu_edge_vfix_func ff_emu_edge_vfix12_mmx;
53 extern emu_edge_vfix_func ff_emu_edge_vfix13_mmx;
54 extern emu_edge_vfix_func ff_emu_edge_vfix14_mmx;
55 extern emu_edge_vfix_func ff_emu_edge_vfix15_mmx;
56 extern emu_edge_vfix_func ff_emu_edge_vfix16_mmx;
57 extern emu_edge_vfix_func ff_emu_edge_vfix17_mmx;
58 extern emu_edge_vfix_func ff_emu_edge_vfix18_mmx;
59 extern emu_edge_vfix_func ff_emu_edge_vfix19_mmx;
60 extern emu_edge_vfix_func ff_emu_edge_vfix20_mmx;
61 extern emu_edge_vfix_func ff_emu_edge_vfix21_mmx;
62 extern emu_edge_vfix_func ff_emu_edge_vfix22_mmx;
63 #if ARCH_X86_32
64 static emu_edge_vfix_func * const vfixtbl_mmx[22] = {
65  &ff_emu_edge_vfix1_mmx, &ff_emu_edge_vfix2_mmx, &ff_emu_edge_vfix3_mmx,
66  &ff_emu_edge_vfix4_mmx, &ff_emu_edge_vfix5_mmx, &ff_emu_edge_vfix6_mmx,
67  &ff_emu_edge_vfix7_mmx, &ff_emu_edge_vfix8_mmx, &ff_emu_edge_vfix9_mmx,
68  &ff_emu_edge_vfix10_mmx, &ff_emu_edge_vfix11_mmx, &ff_emu_edge_vfix12_mmx,
69  &ff_emu_edge_vfix13_mmx, &ff_emu_edge_vfix14_mmx, &ff_emu_edge_vfix15_mmx,
70  &ff_emu_edge_vfix16_mmx, &ff_emu_edge_vfix17_mmx, &ff_emu_edge_vfix18_mmx,
71  &ff_emu_edge_vfix19_mmx, &ff_emu_edge_vfix20_mmx, &ff_emu_edge_vfix21_mmx,
72  &ff_emu_edge_vfix22_mmx
73 };
74 #endif
75 extern emu_edge_vvar_func ff_emu_edge_vvar_mmx;
76 extern emu_edge_vfix_func ff_emu_edge_vfix16_sse;
77 extern emu_edge_vfix_func ff_emu_edge_vfix17_sse;
78 extern emu_edge_vfix_func ff_emu_edge_vfix18_sse;
79 extern emu_edge_vfix_func ff_emu_edge_vfix19_sse;
80 extern emu_edge_vfix_func ff_emu_edge_vfix20_sse;
81 extern emu_edge_vfix_func ff_emu_edge_vfix21_sse;
82 extern emu_edge_vfix_func ff_emu_edge_vfix22_sse;
83 static emu_edge_vfix_func * const vfixtbl_sse[22] = {
84  ff_emu_edge_vfix1_mmx, ff_emu_edge_vfix2_mmx, ff_emu_edge_vfix3_mmx,
85  ff_emu_edge_vfix4_mmx, ff_emu_edge_vfix5_mmx, ff_emu_edge_vfix6_mmx,
86  ff_emu_edge_vfix7_mmx, ff_emu_edge_vfix8_mmx, ff_emu_edge_vfix9_mmx,
87  ff_emu_edge_vfix10_mmx, ff_emu_edge_vfix11_mmx, ff_emu_edge_vfix12_mmx,
88  ff_emu_edge_vfix13_mmx, ff_emu_edge_vfix14_mmx, ff_emu_edge_vfix15_mmx,
89  ff_emu_edge_vfix16_sse, ff_emu_edge_vfix17_sse, ff_emu_edge_vfix18_sse,
90  ff_emu_edge_vfix19_sse, ff_emu_edge_vfix20_sse, ff_emu_edge_vfix21_sse,
91  ff_emu_edge_vfix22_sse
92 };
93 extern emu_edge_vvar_func ff_emu_edge_vvar_sse;
94 
95 typedef void emu_edge_hfix_func(uint8_t *dst, x86_reg dst_stride,
96  x86_reg start_x, x86_reg bh);
97 typedef void emu_edge_hvar_func(uint8_t *dst, x86_reg dst_stride,
98  x86_reg start_x, x86_reg n_words, x86_reg bh);
99 
100 extern emu_edge_hfix_func ff_emu_edge_hfix2_mmx;
101 extern emu_edge_hfix_func ff_emu_edge_hfix4_mmx;
102 extern emu_edge_hfix_func ff_emu_edge_hfix6_mmx;
103 extern emu_edge_hfix_func ff_emu_edge_hfix8_mmx;
104 extern emu_edge_hfix_func ff_emu_edge_hfix10_mmx;
105 extern emu_edge_hfix_func ff_emu_edge_hfix12_mmx;
106 extern emu_edge_hfix_func ff_emu_edge_hfix14_mmx;
107 extern emu_edge_hfix_func ff_emu_edge_hfix16_mmx;
108 extern emu_edge_hfix_func ff_emu_edge_hfix18_mmx;
109 extern emu_edge_hfix_func ff_emu_edge_hfix20_mmx;
110 extern emu_edge_hfix_func ff_emu_edge_hfix22_mmx;
111 #if ARCH_X86_32
112 static emu_edge_hfix_func * const hfixtbl_mmx[11] = {
113  ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx,
114  ff_emu_edge_hfix8_mmx, ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx,
115  ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_mmx, ff_emu_edge_hfix18_mmx,
116  ff_emu_edge_hfix20_mmx, ff_emu_edge_hfix22_mmx
117 };
118 #endif
119 extern emu_edge_hvar_func ff_emu_edge_hvar_mmx;
120 extern emu_edge_hfix_func ff_emu_edge_hfix16_sse2;
121 extern emu_edge_hfix_func ff_emu_edge_hfix18_sse2;
122 extern emu_edge_hfix_func ff_emu_edge_hfix20_sse2;
123 extern emu_edge_hfix_func ff_emu_edge_hfix22_sse2;
124 static emu_edge_hfix_func * const hfixtbl_sse2[11] = {
125  ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx,
126  ff_emu_edge_hfix8_mmx, ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx,
127  ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_sse2, ff_emu_edge_hfix18_sse2,
128  ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2
129 };
130 extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
131 #if HAVE_AVX2_EXTERNAL
132 extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2;
133 extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2;
134 extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2;
135 extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2;
136 extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2;
137 extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2;
138 extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2;
139 extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2;
140 static emu_edge_hfix_func * const hfixtbl_avx2[11] = {
141  ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx,
142  ff_emu_edge_hfix8_avx2, ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2,
143  ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2,
144  ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2
145 };
146 extern emu_edge_hvar_func ff_emu_edge_hvar_avx2;
147 #endif
148 
149 static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
150  ptrdiff_t dst_stride,
151  ptrdiff_t src_stride,
152  x86_reg block_w, x86_reg block_h,
153  x86_reg src_x, x86_reg src_y,
154  x86_reg w, x86_reg h,
155  emu_edge_vfix_func * const *vfix_tbl,
156  emu_edge_vvar_func *v_extend_var,
157  emu_edge_hfix_func * const *hfix_tbl,
158  emu_edge_hvar_func *h_extend_var)
159 {
160  x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p;
161 
162  if (!w || !h)
163  return;
164 
165  av_assert2(block_w <= FFABS(dst_stride));
166 
167  if (src_y >= h) {
168  src -= src_y*src_stride;
169  src_y_add = h - 1;
170  src_y = h - 1;
171  } else if (src_y <= -block_h) {
172  src -= src_y*src_stride;
173  src_y_add = 1 - block_h;
174  src_y = 1 - block_h;
175  }
176  if (src_x >= w) {
177  src += w - 1 - src_x;
178  src_x = w - 1;
179  } else if (src_x <= -block_w) {
180  src += 1 - block_w - src_x;
181  src_x = 1 - block_w;
182  }
183 
184  start_y = FFMAX(0, -src_y);
185  start_x = FFMAX(0, -src_x);
186  end_y = FFMIN(block_h, h-src_y);
187  end_x = FFMIN(block_w, w-src_x);
188  av_assert2(start_x < end_x && block_w > 0);
189  av_assert2(start_y < end_y && block_h > 0);
190 
191  // fill in the to-be-copied part plus all above/below
192  src += (src_y_add + start_y) * src_stride + start_x;
193  w = end_x - start_x;
194  if (w <= 22) {
195  vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride,
196  start_y, end_y, block_h);
197  } else {
198  v_extend_var(dst + start_x, dst_stride, src, src_stride,
199  start_y, end_y, block_h, w);
200  }
201 
202  // fill left
203  if (start_x) {
204  if (start_x <= 22) {
205  hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h);
206  } else {
207  h_extend_var(dst, dst_stride,
208  start_x, (start_x + 1) >> 1, block_h);
209  }
210  }
211 
212  // fill right
213  p = block_w - end_x;
214  if (p) {
215  if (p <= 22) {
216  hfix_tbl[(p - 1) >> 1](dst + end_x - (p & 1), dst_stride,
217  -!(p & 1), block_h);
218  } else {
219  h_extend_var(dst + end_x - (p & 1), dst_stride,
220  -!(p & 1), (p + 1) >> 1, block_h);
221  }
222  }
223 }
224 
225 #if ARCH_X86_32
226 static av_noinline void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src,
227  ptrdiff_t buf_stride,
228  ptrdiff_t src_stride,
229  int block_w, int block_h,
230  int src_x, int src_y, int w, int h)
231 {
232  emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
233  src_x, src_y, w, h, vfixtbl_mmx, &ff_emu_edge_vvar_mmx,
234  hfixtbl_mmx, &ff_emu_edge_hvar_mmx);
235 }
236 
237 static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
238  ptrdiff_t buf_stride,
239  ptrdiff_t src_stride,
240  int block_w, int block_h,
241  int src_x, int src_y, int w, int h)
242 {
243  emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
244  src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
245  hfixtbl_mmx, &ff_emu_edge_hvar_mmx);
246 }
247 #endif
248 
249 static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
250  ptrdiff_t buf_stride,
251  ptrdiff_t src_stride,
252  int block_w, int block_h,
253  int src_x, int src_y, int w,
254  int h)
255 {
256  emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
257  src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
258  hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
259 }
260 
261 #if HAVE_AVX2_EXTERNAL
262 static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src,
263  ptrdiff_t buf_stride,
264  ptrdiff_t src_stride,
265  int block_w, int block_h,
266  int src_x, int src_y, int w,
267  int h)
268 {
269  emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
270  src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
271  hfixtbl_avx2, &ff_emu_edge_hvar_avx2);
272 }
273 #endif /* HAVE_AVX2_EXTERNAL */
274 #endif /* HAVE_X86ASM */
275 
276 void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
277 void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h);
278 
280 {
281 #if HAVE_X86ASM
282  int cpu_flags = av_get_cpu_flags();
283 
284 #if ARCH_X86_32
285  if (EXTERNAL_MMX(cpu_flags) && bpc <= 8) {
286  ctx->emulated_edge_mc = emulated_edge_mc_mmx;
287  }
289  ctx->prefetch = ff_prefetch_3dnow;
290  }
291 #endif /* ARCH_X86_32 */
292  if (EXTERNAL_MMXEXT(cpu_flags)) {
293  ctx->prefetch = ff_prefetch_mmxext;
294  }
295 #if ARCH_X86_32
296  if (EXTERNAL_SSE(cpu_flags) && bpc <= 8) {
297  ctx->emulated_edge_mc = emulated_edge_mc_sse;
298  }
299 #endif /* ARCH_X86_32 */
300  if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
301  ctx->emulated_edge_mc = emulated_edge_mc_sse2;
302  }
303 #if HAVE_AVX2_EXTERNAL
304  if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) {
305  ctx->emulated_edge_mc = emulated_edge_mc_avx2;
306  }
307 #endif
308 #endif /* HAVE_X86ASM */
309 }
stride
int stride
Definition: mace.c:144
cpu.h
EXTERNAL_AMD3DNOW
#define EXTERNAL_AMD3DNOW(flags)
Definition: cpu.h:54
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
av_noinline
#define av_noinline
Definition: attributes.h:72
EXTERNAL_AVX2
#define EXTERNAL_AVX2(flags)
Definition: cpu.h:78
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
EXTERNAL_SSE
#define EXTERNAL_SSE(flags)
Definition: cpu.h:58
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ff_videodsp_init_x86
av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
Definition: videodsp_init.c:279
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
ff_prefetch_3dnow
void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h)
src
#define src
Definition: vp8dsp.c:254
cpu.h
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
asm.h
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
common.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
uint8_t
uint8_t
Definition: audio_convert.c:194
w
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
Definition: fate.txt:150
config.h
VideoDSPContext
Definition: videodsp.h:41
mem.h
videodsp.h
x86_reg
int x86_reg
Definition: asm.h:72
h
h
Definition: vp9dsp_template.c:2038
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
ff_prefetch_mmxext
void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h)
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57