FFmpeg  4.3
dirac_dwt_init.c
Go to the documentation of this file.
1 /*
2  * x86 optimized discrete wavelet transform
3  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
4  * Copyright (c) 2010 David Conrad
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/x86/asm.h"
24 #include "libavutil/x86/cpu.h"
25 #include "libavcodec/dirac_dwt.h"
26 
27 #define COMPOSE_VERTICAL(ext, align) \
28 void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
29 void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
30 void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
31 void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
32 void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
33 void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
34 void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
35 \
36 static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
37 { \
38  int i, width_align = width&~(align-1); \
39  int16_t *b0 = (int16_t *)_b0; \
40  int16_t *b1 = (int16_t *)_b1; \
41  int16_t *b2 = (int16_t *)_b2; \
42 \
43  for(i=width_align; i<width; i++) \
44  b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
45 \
46  ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
47 } \
48 \
49 static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
50 { \
51  int i, width_align = width&~(align-1); \
52  int16_t *b0 = (int16_t *)_b0; \
53  int16_t *b1 = (int16_t *)_b1; \
54  int16_t *b2 = (int16_t *)_b2; \
55 \
56  for(i=width_align; i<width; i++) \
57  b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
58 \
59  ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
60 } \
61 \
62 static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
63  uint8_t *_b3, uint8_t *_b4, int width) \
64 { \
65  int i, width_align = width&~(align-1); \
66  int16_t *b0 = (int16_t *)_b0; \
67  int16_t *b1 = (int16_t *)_b1; \
68  int16_t *b2 = (int16_t *)_b2; \
69  int16_t *b3 = (int16_t *)_b3; \
70  int16_t *b4 = (int16_t *)_b4; \
71 \
72  for(i=width_align; i<width; i++) \
73  b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
74 \
75  ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
76 } \
77 \
78 static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
79  uint8_t *_b3, uint8_t *_b4, int width) \
80 { \
81  int i, width_align = width&~(align-1); \
82  int16_t *b0 = (int16_t *)_b0; \
83  int16_t *b1 = (int16_t *)_b1; \
84  int16_t *b2 = (int16_t *)_b2; \
85  int16_t *b3 = (int16_t *)_b3; \
86  int16_t *b4 = (int16_t *)_b4; \
87 \
88  for(i=width_align; i<width; i++) \
89  b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
90 \
91  ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
92 } \
93 static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
94 { \
95  int i, width_align = width&~(align-1); \
96  int16_t *b0 = (int16_t *)_b0; \
97  int16_t *b1 = (int16_t *)_b1; \
98 \
99  for(i=width_align; i<width; i++) { \
100  b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
101  b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
102  } \
103 \
104  ff_vertical_compose_haar##ext(b0, b1, width_align); \
105 } \
106 static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
107 {\
108  int w2= w>>1;\
109  int x= w2 - (w2&(align-1));\
110  int16_t *b = (int16_t *)_b; \
111  int16_t *tmp = (int16_t *)_tmp; \
112 \
113  ff_horizontal_compose_haar0i##ext(b, tmp, w);\
114 \
115  for (; x < w2; x++) {\
116  b[2*x ] = tmp[x];\
117  b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
118  }\
119 }\
120 static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
121 {\
122  int w2= w>>1;\
123  int x= w2 - (w2&(align-1));\
124  int16_t *b = (int16_t *)_b; \
125  int16_t *tmp = (int16_t *)_tmp; \
126 \
127  ff_horizontal_compose_haar1i##ext(b, tmp, w);\
128 \
129  for (; x < w2; x++) {\
130  b[2*x ] = (tmp[x] + 1)>>1;\
131  b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
132  }\
133 }\
134 \
135 
136 #if HAVE_X86ASM
137 #if !ARCH_X86_64
138 COMPOSE_VERTICAL(_mmx, 4)
139 #endif
140 COMPOSE_VERTICAL(_sse2, 8)
141 
142 
143 void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
144 
145 static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
146 {
147  int w2= w>>1;
148  int x= w2 - (w2&7);
149  int16_t *b = (int16_t *)_b;
150  int16_t *tmp = (int16_t *)_tmp;
151 
152  ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
153 
154  for (; x < w2; x++) {
155  b[2*x ] = (tmp[x] + 1)>>1;
156  b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
157  }
158 }
159 #endif
160 
162 {
163 #if HAVE_X86ASM
164  int mm_flags = av_get_cpu_flags();
165 
166 #if !ARCH_X86_64
167  if (!(mm_flags & AV_CPU_FLAG_MMX))
168  return;
169 
170  switch (type) {
171  case DWT_DIRAC_DD9_7:
172  d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
173  d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
174  break;
175  case DWT_DIRAC_LEGALL5_3:
176  d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
177  d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
178  break;
179  case DWT_DIRAC_DD13_7:
180  d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
181  d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
182  break;
183  case DWT_DIRAC_HAAR0:
184  d->vertical_compose = (void*)vertical_compose_haar_mmx;
185  d->horizontal_compose = horizontal_compose_haar0i_mmx;
186  break;
187  case DWT_DIRAC_HAAR1:
188  d->vertical_compose = (void*)vertical_compose_haar_mmx;
189  d->horizontal_compose = horizontal_compose_haar1i_mmx;
190  break;
191  }
192 #endif
193 
194  if (!(mm_flags & AV_CPU_FLAG_SSE2))
195  return;
196 
197  switch (type) {
198  case DWT_DIRAC_DD9_7:
199  d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
200  d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
201  break;
202  case DWT_DIRAC_LEGALL5_3:
203  d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
204  d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
205  break;
206  case DWT_DIRAC_DD13_7:
207  d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
208  d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
209  break;
210  case DWT_DIRAC_HAAR0:
211  d->vertical_compose = (void*)vertical_compose_haar_sse2;
212  d->horizontal_compose = horizontal_compose_haar0i_sse2;
213  break;
214  case DWT_DIRAC_HAAR1:
215  d->vertical_compose = (void*)vertical_compose_haar_sse2;
216  d->horizontal_compose = horizontal_compose_haar1i_sse2;
217  break;
218  }
219 
220  if (!(mm_flags & AV_CPU_FLAG_SSSE3))
221  return;
222 
223  switch (type) {
224  case DWT_DIRAC_DD9_7:
225  d->horizontal_compose = horizontal_compose_dd97i_ssse3;
226  break;
227  }
228 #endif // HAVE_X86ASM
229 }
cpu.h
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
b
#define b
Definition: input.c:41
dirac_dwt.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
AV_CPU_FLAG_SSSE3
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
Definition: cpu.h:43
x
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo x
Definition: fate.txt:150
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
DWT_DIRAC_HAAR1
@ DWT_DIRAC_HAAR1
Definition: dirac_dwt.h:81
DWT_DIRAC_HAAR0
@ DWT_DIRAC_HAAR0
Definition: dirac_dwt.h:80
DWTContext::horizontal_compose
void(* horizontal_compose)(uint8_t *b, uint8_t *tmp, int width)
Definition: dirac_dwt.h:69
dwt_type
dwt_type
Definition: dirac_dwt.h:74
DWTContext::vertical_compose_l0
void(* vertical_compose_l0)(void)
Definition: dirac_dwt.h:64
DWT_DIRAC_DD13_7
@ DWT_DIRAC_DD13_7
Definition: dirac_dwt.h:79
DWTContext::vertical_compose
void(* vertical_compose)(void)
one set of lowpass and highpass combined
Definition: dirac_dwt.h:68
AV_CPU_FLAG_SSE2
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:36
COMPOSE_DD97iH0
#define COMPOSE_DD97iH0(b0, b1, b2, b3, b4)
Definition: dirac_dwt.h:101
COMPOSE_VERTICAL
#define COMPOSE_VERTICAL(ext, align)
Definition: dirac_dwt_init.c:27
asm.h
ff_spatial_idwt_init_x86
void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type)
Definition: dirac_dwt_init.c:161
uint8_t
uint8_t
Definition: audio_convert.c:194
DWTContext
Definition: dirac_dwt.h:54
w
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
Definition: fate.txt:150
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:31
void
typedef void(RENAME(mix_any_func_type))
Definition: rematrix_template.c:52
DWTContext::vertical_compose_h0
void(* vertical_compose_h0)(void)
Definition: dirac_dwt.h:65
DWT_DIRAC_LEGALL5_3
@ DWT_DIRAC_LEGALL5_3
Definition: dirac_dwt.h:78
DWT_DIRAC_DD9_7
@ DWT_DIRAC_DD9_7
Definition: dirac_dwt.h:77