FFmpeg  4.2.1
tx.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include <stddef.h>
26 #include "tx.h"
27 #include "thread.h"
28 #include "mem.h"
29 #include "avassert.h"
30 
31 typedef float FFTSample;
33 
34 struct AVTXContext {
35  int n; /* Nptwo part */
36  int m; /* Ptwo part */
37 
38  FFTComplex *exptab; /* MDCT exptab */
39  FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */
40  int *pfatab; /* Input/Output mapping for compound transforms */
41  int *revtab; /* Input mapping for power of two transforms */
42 };
43 
44 #define FFT_NAME(x) x
45 
46 #define COSTABLE(size) \
47  static DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
48 
49 static FFTSample * const FFT_NAME(ff_cos_tabs)[18];
50 
51 COSTABLE(16);
52 COSTABLE(32);
53 COSTABLE(64);
54 COSTABLE(128);
55 COSTABLE(256);
56 COSTABLE(512);
57 COSTABLE(1024);
58 COSTABLE(2048);
59 COSTABLE(4096);
60 COSTABLE(8192);
61 COSTABLE(16384);
62 COSTABLE(32768);
63 COSTABLE(65536);
64 COSTABLE(131072);
65 
66 static av_cold void init_ff_cos_tabs(int index)
67 {
68  int m = 1 << index;
69  double freq = 2*M_PI/m;
70  FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
71  for(int i = 0; i <= m/4; i++)
72  tab[i] = cos(i*freq);
73  for(int i = 1; i < m/4; i++)
74  tab[m/2 - i] = tab[i];
75 }
76 
77 typedef struct CosTabsInitOnce {
78  void (*func)(void);
79  AVOnce control;
81 
82 #define INIT_FF_COS_TABS_FUNC(index, size) \
83 static av_cold void init_ff_cos_tabs_ ## size (void) \
84 { \
85  init_ff_cos_tabs(index); \
86 }
87 
94 INIT_FF_COS_TABS_FUNC(10, 1024)
95 INIT_FF_COS_TABS_FUNC(11, 2048)
96 INIT_FF_COS_TABS_FUNC(12, 4096)
97 INIT_FF_COS_TABS_FUNC(13, 8192)
98 INIT_FF_COS_TABS_FUNC(14, 16384)
99 INIT_FF_COS_TABS_FUNC(15, 32768)
100 INIT_FF_COS_TABS_FUNC(16, 65536)
101 INIT_FF_COS_TABS_FUNC(17, 131072)
102 
104  { NULL },
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_ff_cos_tabs_16, AV_ONCE_INIT },
109  { init_ff_cos_tabs_32, AV_ONCE_INIT },
110  { init_ff_cos_tabs_64, AV_ONCE_INIT },
111  { init_ff_cos_tabs_128, AV_ONCE_INIT },
112  { init_ff_cos_tabs_256, AV_ONCE_INIT },
113  { init_ff_cos_tabs_512, AV_ONCE_INIT },
114  { init_ff_cos_tabs_1024, AV_ONCE_INIT },
115  { init_ff_cos_tabs_2048, AV_ONCE_INIT },
116  { init_ff_cos_tabs_4096, AV_ONCE_INIT },
117  { init_ff_cos_tabs_8192, AV_ONCE_INIT },
118  { init_ff_cos_tabs_16384, AV_ONCE_INIT },
119  { init_ff_cos_tabs_32768, AV_ONCE_INIT },
120  { init_ff_cos_tabs_65536, AV_ONCE_INIT },
121  { init_ff_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
125  NULL, NULL, NULL, NULL,
126  FFT_NAME(ff_cos_16),
127  FFT_NAME(ff_cos_32),
128  FFT_NAME(ff_cos_64),
129  FFT_NAME(ff_cos_128),
130  FFT_NAME(ff_cos_256),
131  FFT_NAME(ff_cos_512),
132  FFT_NAME(ff_cos_1024),
133  FFT_NAME(ff_cos_2048),
134  FFT_NAME(ff_cos_4096),
135  FFT_NAME(ff_cos_8192),
136  FFT_NAME(ff_cos_16384),
137  FFT_NAME(ff_cos_32768),
138  FFT_NAME(ff_cos_65536),
139  FFT_NAME(ff_cos_131072),
140 };
141 
143 {
144  ff_thread_once(&cos_tabs_init_once[index].control,
145  cos_tabs_init_once[index].func);
146 }
147 
150 
151 static av_cold void ff_init_53_tabs(void)
152 {
153  ff_53_tabs[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
154  ff_53_tabs[1] = (FFTComplex){ 0.5, 0.5 };
155  ff_53_tabs[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) };
156  ff_53_tabs[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
157 }
158 
159 #define BF(x, y, a, b) do { \
160  x = (a) - (b); \
161  y = (a) + (b); \
162  } while (0)
163 
164 #define CMUL(dre, dim, are, aim, bre, bim) do { \
165  (dre) = (are) * (bre) - (aim) * (bim); \
166  (dim) = (are) * (bim) + (aim) * (bre); \
167  } while (0)
168 
169 #define CMUL3(c, a, b) CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
170 
172  ptrdiff_t stride)
173 {
174  FFTComplex tmp[2];
175 
176  tmp[0].re = in[1].im - in[2].im;
177  tmp[0].im = in[1].re - in[2].re;
178  tmp[1].re = in[1].re + in[2].re;
179  tmp[1].im = in[1].im + in[2].im;
180 
181  out[0*stride].re = in[0].re + tmp[1].re;
182  out[0*stride].im = in[0].im + tmp[1].im;
183 
184  tmp[0].re *= ff_53_tabs[0].re;
185  tmp[0].im *= ff_53_tabs[0].im;
186  tmp[1].re *= ff_53_tabs[1].re;
187  tmp[1].im *= ff_53_tabs[1].re;
188 
189  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
190  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
191  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
192  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
193 }
194 
195 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
196 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
197  ptrdiff_t stride) \
198 { \
199  FFTComplex z0[4], t[6]; \
200  \
201  t[0].re = in[1].re + in[4].re; \
202  t[0].im = in[1].im + in[4].im; \
203  t[1].im = in[1].re - in[4].re; \
204  t[1].re = in[1].im - in[4].im; \
205  t[2].re = in[2].re + in[3].re; \
206  t[2].im = in[2].im + in[3].im; \
207  t[3].im = in[2].re - in[3].re; \
208  t[3].re = in[2].im - in[3].im; \
209  \
210  out[D0*stride].re = in[0].re + in[1].re + in[2].re + \
211  in[3].re + in[4].re; \
212  out[D0*stride].im = in[0].im + in[1].im + in[2].im + \
213  in[3].im + in[4].im; \
214  \
215  t[4].re = ff_53_tabs[2].re * t[2].re - ff_53_tabs[3].re * t[0].re; \
216  t[4].im = ff_53_tabs[2].re * t[2].im - ff_53_tabs[3].re * t[0].im; \
217  t[0].re = ff_53_tabs[2].re * t[0].re - ff_53_tabs[3].re * t[2].re; \
218  t[0].im = ff_53_tabs[2].re * t[0].im - ff_53_tabs[3].re * t[2].im; \
219  t[5].re = ff_53_tabs[2].im * t[3].re - ff_53_tabs[3].im * t[1].re; \
220  t[5].im = ff_53_tabs[2].im * t[3].im - ff_53_tabs[3].im * t[1].im; \
221  t[1].re = ff_53_tabs[2].im * t[1].re + ff_53_tabs[3].im * t[3].re; \
222  t[1].im = ff_53_tabs[2].im * t[1].im + ff_53_tabs[3].im * t[3].im; \
223  \
224  z0[0].re = t[0].re - t[1].re; \
225  z0[0].im = t[0].im - t[1].im; \
226  z0[1].re = t[4].re + t[5].re; \
227  z0[1].im = t[4].im + t[5].im; \
228  \
229  z0[2].re = t[4].re - t[5].re; \
230  z0[2].im = t[4].im - t[5].im; \
231  z0[3].re = t[0].re + t[1].re; \
232  z0[3].im = t[0].im + t[1].im; \
233  \
234  out[D1*stride].re = in[0].re + z0[3].re; \
235  out[D1*stride].im = in[0].im + z0[0].im; \
236  out[D2*stride].re = in[0].re + z0[2].re; \
237  out[D2*stride].im = in[0].im + z0[1].im; \
238  out[D3*stride].re = in[0].re + z0[1].re; \
239  out[D3*stride].im = in[0].im + z0[2].im; \
240  out[D4*stride].re = in[0].re + z0[0].re; \
241  out[D4*stride].im = in[0].im + z0[3].im; \
242 }
243 
244 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
245 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
246 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
247 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
248 
250  ptrdiff_t stride)
251 {
252  FFTComplex tmp[15];
253 
254  for (int i = 0; i < 5; i++)
255  fft3(tmp + i, in + i*3, 5);
256 
257  fft5_m1(out, tmp + 0, stride);
258  fft5_m2(out, tmp + 5, stride);
259  fft5_m3(out, tmp + 10, stride);
260 }
261 
262 #define BUTTERFLIES(a0,a1,a2,a3) {\
263  BF(t3, t5, t5, t1);\
264  BF(a2.re, a0.re, a0.re, t5);\
265  BF(a3.im, a1.im, a1.im, t3);\
266  BF(t4, t6, t2, t6);\
267  BF(a3.re, a1.re, a1.re, t4);\
268  BF(a2.im, a0.im, a0.im, t6);\
269 }
270 
271 // force loading all the inputs before storing any.
272 // this is slightly slower for small data, but avoids store->load aliasing
273 // for addresses separated by large powers of 2.
274 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
275  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
276  BF(t3, t5, t5, t1);\
277  BF(a2.re, a0.re, r0, t5);\
278  BF(a3.im, a1.im, i1, t3);\
279  BF(t4, t6, t2, t6);\
280  BF(a3.re, a1.re, r1, t4);\
281  BF(a2.im, a0.im, i0, t6);\
282 }
283 
284 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
285  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
286  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
287  BUTTERFLIES(a0,a1,a2,a3)\
288 }
289 
290 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
291  t1 = a2.re;\
292  t2 = a2.im;\
293  t5 = a3.re;\
294  t6 = a3.im;\
295  BUTTERFLIES(a0,a1,a2,a3)\
296 }
297 
298 /* z[0...8n-1], w[1...2n-1] */
299 #define PASS(name)\
300 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
301 {\
302  FFTSample t1, t2, t3, t4, t5, t6;\
303  int o1 = 2*n;\
304  int o2 = 4*n;\
305  int o3 = 6*n;\
306  const FFTSample *wim = wre+o1;\
307  n--;\
308 \
309  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
310  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
311  do {\
312  z += 2;\
313  wre += 2;\
314  wim -= 2;\
315  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
316  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
317  } while(--n);\
318 }
319 
320 PASS(pass)
321 #undef BUTTERFLIES
322 #define BUTTERFLIES BUTTERFLIES_BIG
323 PASS(pass_big)
324 
325 #define DECL_FFT(n,n2,n4)\
326 static void fft##n(FFTComplex *z)\
327 {\
328  fft##n2(z);\
329  fft##n4(z+n4*2);\
330  fft##n4(z+n4*3);\
331  pass(z,FFT_NAME(ff_cos_##n),n4/2);\
332 }
333 
334 static void fft4(FFTComplex *z)
335 {
336  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
337 
338  BF(t3, t1, z[0].re, z[1].re);
339  BF(t8, t6, z[3].re, z[2].re);
340  BF(z[2].re, z[0].re, t1, t6);
341  BF(t4, t2, z[0].im, z[1].im);
342  BF(t7, t5, z[2].im, z[3].im);
343  BF(z[3].im, z[1].im, t4, t8);
344  BF(z[3].re, z[1].re, t3, t7);
345  BF(z[2].im, z[0].im, t2, t5);
346 }
347 
348 static void fft8(FFTComplex *z)
349 {
350  FFTSample t1, t2, t3, t4, t5, t6;
351 
352  fft4(z);
353 
354  BF(t1, z[5].re, z[4].re, -z[5].re);
355  BF(t2, z[5].im, z[4].im, -z[5].im);
356  BF(t5, z[7].re, z[6].re, -z[7].re);
357  BF(t6, z[7].im, z[6].im, -z[7].im);
358 
359  BUTTERFLIES(z[0],z[2],z[4],z[6]);
360  TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
361 }
362 
363 static void fft16(FFTComplex *z)
364 {
365  FFTSample t1, t2, t3, t4, t5, t6;
366  FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
367  FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
368 
369  fft8(z);
370  fft4(z+8);
371  fft4(z+12);
372 
373  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
374  TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
375  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
376  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
377 }
378 
379 DECL_FFT(32,16,8)
380 DECL_FFT(64,32,16)
381 DECL_FFT(128,64,32)
382 DECL_FFT(256,128,64)
383 DECL_FFT(512,256,128)
384 #define pass pass_big
385 DECL_FFT(1024,512,256)
386 DECL_FFT(2048,1024,512)
387 DECL_FFT(4096,2048,1024)
388 DECL_FFT(8192,4096,2048)
389 DECL_FFT(16384,8192,4096)
390 DECL_FFT(32768,16384,8192)
391 DECL_FFT(65536,32768,16384)
392 DECL_FFT(131072,65536,32768)
393 
394 static void (* const fft_dispatch[])(FFTComplex*) = {
395  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
396  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
397 };
398 
399 #define DECL_COMP_FFT(N) \
400 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
401  void *_in, ptrdiff_t stride) \
402 { \
403  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
404  FFTComplex *in = _in; \
405  FFTComplex *out = _out; \
406  FFTComplex fft##N##in[N]; \
407  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m) - 2]; \
408  \
409  for (int i = 0; i < m; i++) { \
410  for (int j = 0; j < N; j++) \
411  fft##N##in[j] = in[in_map[i*N + j]]; \
412  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
413  } \
414  \
415  for (int i = 0; i < N; i++) \
416  fftp(s->tmp + m*i); \
417  \
418  for (int i = 0; i < N*m; i++) \
419  out[i] = s->tmp[out_map[i]]; \
420 }
421 
422 DECL_COMP_FFT(3)
423 DECL_COMP_FFT(5)
424 DECL_COMP_FFT(15)
425 
426 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
427  ptrdiff_t stride)
428 {
429  FFTComplex *in = _in;
430  FFTComplex *out = _out;
431  int m = s->m, mb = av_log2(m) - 2;
432  for (int i = 0; i < m; i++)
433  out[s->revtab[i]] = in[i];
434  fft_dispatch[mb](out);
435 }
436 
437 #define DECL_COMP_IMDCT(N) \
438 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
439  ptrdiff_t stride) \
440 { \
441  FFTComplex fft##N##in[N]; \
442  FFTComplex *z = _dst, *exp = s->exptab; \
443  const int m = s->m, len8 = N*m >> 1; \
444  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
445  const float *src = _src, *in1, *in2; \
446  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
447  \
448  stride /= sizeof(*src); /* To convert it from bytes */ \
449  in1 = src; \
450  in2 = src + ((N*m*2) - 1) * stride; \
451  \
452  for (int i = 0; i < m; i++) { \
453  for (int j = 0; j < N; j++) { \
454  const int k = in_map[i*N + j]; \
455  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
456  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
457  } \
458  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
459  } \
460  \
461  for (int i = 0; i < N; i++) \
462  fftp(s->tmp + m*i); \
463  \
464  for (int i = 0; i < len8; i++) { \
465  const int i0 = len8 + i, i1 = len8 - i - 1; \
466  const int s0 = out_map[i0], s1 = out_map[i1]; \
467  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
468  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
469  \
470  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
471  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
472  } \
473 }
474 
477 DECL_COMP_IMDCT(15)
478 
479 #define DECL_COMP_MDCT(N) \
480 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
481  ptrdiff_t stride) \
482 { \
483  float *src = _src, *dst = _dst; \
484  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
485  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
486  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
487  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
488  \
489  stride /= sizeof(*dst); \
490  \
491  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
492  for (int j = 0; j < N; j++) { \
493  const int k = in_map[i*N + j]; \
494  if (k < len4) { \
495  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \
496  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \
497  } else { \
498  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \
499  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \
500  } \
501  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
502  exp[k >> 1].re, exp[k >> 1].im); \
503  } \
504  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
505  } \
506  \
507  for (int i = 0; i < N; i++) \
508  fftp(s->tmp + m*i); \
509  \
510  for (int i = 0; i < len8; i++) { \
511  const int i0 = len8 + i, i1 = len8 - i - 1; \
512  const int s0 = out_map[i0], s1 = out_map[i1]; \
513  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
514  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
515  \
516  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
517  exp[i0].im, exp[i0].re); \
518  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
519  exp[i1].im, exp[i1].re); \
520  } \
521 }
522 
525 DECL_COMP_MDCT(15)
526 
527 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
528  ptrdiff_t stride)
529 {
530  FFTComplex *z = _dst, *exp = s->exptab;
531  const int m = s->m, len8 = m >> 1;
532  const float *src = _src, *in1, *in2;
533  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
534 
535  stride /= sizeof(*src);
536  in1 = src;
537  in2 = src + ((m*2) - 1) * stride;
538 
539  for (int i = 0; i < m; i++) {
540  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
541  CMUL3(z[s->revtab[i]], tmp, exp[i]);
542  }
543 
544  fftp(z);
545 
546  for (int i = 0; i < len8; i++) {
547  const int i0 = len8 + i, i1 = len8 - i - 1;
548  FFTComplex src1 = { z[i1].im, z[i1].re };
549  FFTComplex src0 = { z[i0].im, z[i0].re };
550 
551  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
552  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
553  }
554 }
555 
556 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
557  ptrdiff_t stride)
558 {
559  float *src = _src, *dst = _dst;
560  FFTComplex *exp = s->exptab, tmp, *z = _dst;
561  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
562  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
563 
564  stride /= sizeof(*dst);
565 
566  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
567  const int k = 2*i;
568  if (k < len4) {
569  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
570  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
571  } else {
572  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
573  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
574  }
575  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
576  exp[i].re, exp[i].im);
577  }
578 
579  fftp(z);
580 
581  for (int i = 0; i < len8; i++) {
582  const int i0 = len8 + i, i1 = len8 - i - 1;
583  FFTComplex src1 = { z[i1].re, z[i1].im };
584  FFTComplex src0 = { z[i0].re, z[i0].im };
585 
586  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
587  exp[i0].im, exp[i0].re);
588  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
589  exp[i1].im, exp[i1].re);
590  }
591 }
592 
593 /* Calculates the modular multiplicative inverse, not fast, replace */
594 static int mulinv(int n, int m)
595 {
596  n = n % m;
597  for (int x = 1; x < m; x++)
598  if (((n * x) % m) == 1)
599  return x;
600  av_assert0(0); /* Never reached */
601 }
602 
603 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
604 static int gen_compound_mapping(AVTXContext *s, int n, int m, int inv,
605  enum AVTXType type)
606 {
607  int *in_map, *out_map;
608  const int len = n*m;
609  const int m_inv = mulinv(m, n);
610  const int n_inv = mulinv(n, m);
611  const int mdct = type == AV_TX_FLOAT_MDCT;
612 
613  if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
614  return AVERROR(ENOMEM);
615 
616  in_map = s->pfatab;
617  out_map = s->pfatab + n*m;
618 
619  /* Ruritanian map for input, CRT map for output, can be swapped */
620  for (int j = 0; j < m; j++) {
621  for (int i = 0; i < n; i++) {
622  /* Shifted by 1 to simplify forward MDCTs */
623  in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
624  out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
625  }
626  }
627 
628  /* Change transform direction by reversing all ACs */
629  if (inv) {
630  for (int i = 0; i < m; i++) {
631  int *in = &in_map[i*n + 1]; /* Skip the DC */
632  for (int j = 0; j < ((n - 1) >> 1); j++)
633  FFSWAP(int, in[j], in[n - j - 2]);
634  }
635  }
636 
637  /* Our 15-point transform is also a compound one, so embed its input map */
638  if (n == 15) {
639  for (int k = 0; k < m; k++) {
640  int tmp[15];
641  memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
642  for (int i = 0; i < 5; i++) {
643  for (int j = 0; j < 3; j++)
644  in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
645  }
646  }
647  }
648 
649  return 0;
650 }
651 
652 static int split_radix_permutation(int i, int n, int inverse)
653 {
654  int m;
655  if (n <= 2)
656  return i & 1;
657  m = n >> 1;
658  if (!(i & m))
659  return split_radix_permutation(i, m, inverse)*2;
660  m >>= 1;
661  if (inverse == !(i & m))
662  return split_radix_permutation(i, m, inverse)*4 + 1;
663  else
664  return split_radix_permutation(i, m, inverse)*4 - 1;
665 }
666 
667 static int get_ptwo_revtab(AVTXContext *s, int m, int inv)
668 {
669  if (!(s->revtab = av_malloc(m*sizeof(*s->revtab))))
670  return AVERROR(ENOMEM);
671 
672  /* Default */
673  for (int i = 0; i < m; i++) {
674  int k = -split_radix_permutation(i, m, inv) & (m - 1);
675  s->revtab[k] = i;
676  }
677 
678  return 0;
679 }
680 
681 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
682 {
683  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
684 
685  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
686  return AVERROR(ENOMEM);
687 
688  scale = sqrt(fabs(scale));
689  for (int i = 0; i < len4; i++) {
690  const double alpha = M_PI_2 * (i + theta) / len4;
691  s->exptab[i].re = cos(alpha) * scale;
692  s->exptab[i].im = sin(alpha) * scale;
693  }
694 
695  return 0;
696 }
697 
699 {
700  if (!(*ctx))
701  return;
702 
703  av_free((*ctx)->pfatab);
704  av_free((*ctx)->exptab);
705  av_free((*ctx)->revtab);
706  av_free((*ctx)->tmp);
707 
708  av_freep(ctx);
709 }
710 
712  int inv, int len, const void *scale, uint64_t flags)
713 {
714  int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
715 
716  if (type == AV_TX_FLOAT_MDCT)
717  len >>= 1;
718 
719 #define CHECK_FACTOR(DST, FACTOR, SRC) \
720  if (DST == 1 && !(SRC % FACTOR)) { \
721  DST = FACTOR; \
722  SRC /= FACTOR; \
723  }
724  CHECK_FACTOR(n, 15, len)
725  CHECK_FACTOR(n, 5, len)
726  CHECK_FACTOR(n, 3, len)
727 #undef CHECK_NPTWO_FACTOR
728 
729  /* len must be a power of two now */
730  if (!(len & (len - 1)) && len >= 4 && len <= max_ptwo) {
731  m = len;
732  len = 1;
733  }
734 
735  /* Filter out direct 3, 5 and 15 transforms, too niche */
736  if (len > 1 || m == 1) {
737  av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, "
738  "m = %i, residual = %i!\n", n, m, len);
739  return AVERROR(EINVAL);
740  } else if (n > 1 && m > 1) { /* 2D transform case */
741  if ((err = gen_compound_mapping(s, n, m, inv, type)))
742  return err;
743  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
744  return AVERROR(ENOMEM);
745  *tx = n == 3 ? compound_fft_3xM :
746  n == 5 ? compound_fft_5xM :
747  compound_fft_15xM;
748  if (type == AV_TX_FLOAT_MDCT)
749  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
750  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
751  inv ? compound_imdct_15xM : compound_mdct_15xM;
752  } else { /* Direct transform case */
753  *tx = monolithic_fft;
754  if (type == AV_TX_FLOAT_MDCT)
755  *tx = inv ? monolithic_imdct : monolithic_mdct;
756  }
757 
758  if (n != 1)
760  if (m != 1) {
761  get_ptwo_revtab(s, m, inv);
762  for (int i = 4; i <= av_log2(m); i++)
764  }
765 
766  if (type == AV_TX_FLOAT_MDCT)
767  if ((err = gen_mdct_exptab(s, n*m, *((float *)scale))))
768  return err;
769 
770  s->n = n;
771  s->m = m;
772 
773  return 0;
774 }
775 
777  int inv, int len, const void *scale, uint64_t flags)
778 {
779  int err;
780  AVTXContext *s = av_mallocz(sizeof(*s));
781  if (!s)
782  return AVERROR(ENOMEM);
783 
784  switch (type) {
785  case AV_TX_FLOAT_FFT:
786  case AV_TX_FLOAT_MDCT:
787  if ((err = init_mdct_fft(s, tx, type, inv, len, scale, flags)))
788  goto fail;
789  break;
790  default:
791  err = AVERROR(EINVAL);
792  goto fail;
793  }
794 
795  *ctx = s;
796 
797  return 0;
798 
799 fail:
800  av_tx_uninit(&s);
801  *tx = NULL;
802  return err;
803 }
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx.c:249
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets ctx to NULL, does nothing when ctx == NULL.
Definition: tx.c:698
#define NULL
Definition: coverity.c:32
AVComplexFloat FFTComplex
Definition: tx.c:32
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx.c:394
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx.c:426
float re
Definition: fft.c:82
Memory handling functions.
static FFTComplex ff_53_tabs[4]
Definition: tx.c:149
#define M_SQRT1_2
Definition: mathematics.h:58
static av_cold void init_ff_cos_tabs(int index)
Definition: tx.c:66
static av_cold void ff_init_53_tabs(void)
Definition: tx.c:151
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx.c:322
FFTSample re
Definition: avfft.h:38
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
#define t8
Definition: regdef.h:53
#define src
Definition: vp8dsp.c:254
int stride
Definition: mace.c:144
static av_cold void ff_init_ff_cos_tabs(int index)
Definition: tx.c:142
#define pass
Definition: tx.c:384
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx.c:103
#define t7
Definition: regdef.h:35
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
#define DECL_COMP_FFT(N)
Definition: tx.c:399
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
#define mb
#define CMUL3(c, a, b)
Definition: tx.c:169
int m
Definition: tx.c:36
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
static int split_radix_permutation(int i, int n, int inverse)
Definition: tx.c:652
#define AVOnce
Definition: thread.h:159
#define PASS(name)
Definition: tx.c:299
#define av_log(a,...)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:56
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx.c:195
#define AVERROR(e)
Definition: error.h:43
float FFTSample
Definition: tx.c:31
#define t1
Definition: regdef.h:29
AVTXType
Definition: tx.h:31
static int mulinv(int n, int m)
Definition: tx.c:594
simple assert() macros that are a bit more flexible than ISO C assert().
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define t3
Definition: regdef.h:31
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: tx.c:164
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx.c:556
float FFTSample
Definition: avfft.h:35
static void fft8(FFTComplex *z)
Definition: tx.c:348
#define fail()
Definition: checkasm.h:120
int8_t exp
Definition: eval.c:72
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx.c:681
Standard complex to complex FFT with sample data type AVComplexFloat.
Definition: tx.h:36
FFTComplex * exptab
Definition: tx.c:38
#define COSTABLE(size)
Definition: tx.c:46
#define M_PI_2
Definition: mathematics.h:55
int * revtab
Definition: tx.c:41
AVFormatContext * ctx
Definition: movenc.c:48
#define s(width, name)
Definition: cbs_vp9.c:257
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx.c:290
Definition: tx.c:34
#define CHECK_FACTOR(DST, FACTOR, SRC)
int * pfatab
Definition: tx.c:40
#define DECL_COMP_MDCT(N)
Definition: tx.c:479
FFTComplex * tmp
Definition: tx.c:39
#define FF_ARRAY_ELEMS(a)
#define av_log2
Definition: intmath.h:83
#define DECL_COMP_IMDCT(N)
Definition: tx.c:437
#define src1
Definition: h264pred.c:139
#define AV_ONCE_INIT
Definition: thread.h:160
typedef void(RENAME(mix_any_func_type))
#define FFT_NAME(x)
Definition: tx.c:44
static const int16_t alpha[]
Definition: ilbcdata.h:55
static AVOnce tabs_53_once
Definition: tx.c:148
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx.c:284
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx.c:527
float im
Definition: fft.c:82
cl_device_type type
static int gen_compound_mapping(AVTXContext *s, int n, int m, int inv, enum AVTXType type)
Definition: tx.c:604
#define src0
Definition: h264pred.c:138
#define t5
Definition: regdef.h:33
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration Currently power of two lengths from 4 to ...
Definition: tx.c:776
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx.c:82
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx.c:171
#define flags(name, subs,...)
Definition: cbs_av1.c:561
FFTSample im
Definition: avfft.h:38
#define t6
Definition: regdef.h:34
static int init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx.c:711
#define t4
Definition: regdef.h:32
Standard MDCT with sample data type of float and a scale type of float.
Definition: tx.h:41
#define av_free(p)
static int get_ptwo_revtab(AVTXContext *s, int m, int inv)
Definition: tx.c:667
int len
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:162
static const struct twinvq_data tab
#define DECL_FFT(n, n2, n4)
Definition: tx.c:325
FILE * out
Definition: movenc.c:54
#define av_freep(p)
#define BF(x, y, a, b)
Definition: tx.c:159
#define av_always_inline
Definition: attributes.h:39
#define M_PI
Definition: mathematics.h:52
static void fft16(FFTComplex *z)
Definition: tx.c:363
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
Definition: asfcrypt.c:35
#define av_malloc_array(a, b)
#define FFSWAP(type, a, b)
Definition: common.h:99
static void fft4(FFTComplex *z)
Definition: tx.c:334
int n
Definition: tx.c:35
#define t2
Definition: regdef.h:30