FFmpeg  4.3
aaccoder_twoloop.h
Go to the documentation of this file.
1 /*
2  * AAC encoder twoloop coder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder twoloop coder
25  * @author Konstantin Shishkov, Claudio Freire
26  */
27 
28 /**
29  * This file contains a template for the twoloop coder function.
30  * It needs to be provided, externally, as an already included declaration,
31  * the following functions from aacenc_quantization/util.h. They're not included
32  * explicitly here to make it possible to provide alternative implementations:
33  * - quantize_band_cost
34  * - abs_pow34_v
35  * - find_max_val
36  * - find_min_book
37  * - find_form_factor
38  */
39 
40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
41 #define AVCODEC_AACCODER_TWOLOOP_H
42 
43 #include <float.h>
44 #include "libavutil/mathematics.h"
45 #include "mathops.h"
46 #include "avcodec.h"
47 #include "put_bits.h"
48 #include "aac.h"
49 #include "aacenc.h"
50 #include "aactab.h"
51 #include "aacenctab.h"
52 
53 /** Frequency in Hz for lower limit of noise substitution **/
54 #define NOISE_LOW_LIMIT 4000
55 
56 #define sclip(x) av_clip(x,60,218)
57 
58 /* Reflects the cost to change codebooks */
59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
60 {
61  return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
62 }
63 
64 /**
65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
66  */
70  const float lambda)
71 {
72  int start = 0, i, w, w2, g, recomprd;
73  int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
74  / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
75  * (lambda / 120.f);
76  int refbits = destbits;
77  int toomanybits, toofewbits;
78  char nzs[128];
79  uint8_t nextband[128];
80  int maxsf[128], minsf[128];
81  float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
82  float maxvals[128], spread_thr_r[128];
83  float min_spread_thr_r, max_spread_thr_r;
84 
85  /**
86  * rdlambda controls the maximum tolerated distortion. Twoloop
87  * will keep iterating until it fails to lower it or it reaches
88  * ulimit * rdlambda. Keeping it low increases quality on difficult
89  * signals, but lower it too much, and bits will be taken from weak
90  * signals, creating "holes". A balance is necessary.
91  * rdmax and rdmin specify the relative deviation from rdlambda
92  * allowed for tonality compensation
93  */
94  float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
95  const float nzslope = 1.5f;
96  float rdmin = 0.03125f;
97  float rdmax = 1.0f;
98 
99  /**
100  * sfoffs controls an offset of optmium allocation that will be
101  * applied based on lambda. Keep it real and modest, the loop
102  * will take care of the rest, this just accelerates convergence
103  */
104  float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
105 
106  int fflag, minscaler, maxscaler, nminscaler;
107  int its = 0;
108  int maxits = 30;
109  int allz = 0;
110  int tbits;
111  int cutoff = 1024;
112  int pns_start_pos;
113  int prev;
114 
115  /**
116  * zeroscale controls a multiplier of the threshold, if band energy
117  * is below this, a zero is forced. Keep it lower than 1, unless
118  * low lambda is used, because energy < threshold doesn't mean there's
119  * no audible signal outright, it's just energy. Also make it rise
120  * slower than rdlambda, as rdscale has due compensation with
121  * noisy band depriorization below, whereas zeroing logic is rather dumb
122  */
123  float zeroscale;
124  if (lambda > 120.f) {
125  zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
126  } else {
127  zeroscale = 1.f;
128  }
129 
130  if (s->psy.bitres.alloc >= 0) {
131  /**
132  * Psy granted us extra bits to use, from the reservoire
133  * adjust for lambda except what psy already did
134  */
135  destbits = s->psy.bitres.alloc
136  * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
137  }
138 
139  if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
140  /**
141  * Constant Q-scale doesn't compensate MS coding on its own
142  * No need to be overly precise, this only controls RD
143  * adjustment CB limits when going overboard
144  */
145  if (s->options.mid_side && s->cur_type == TYPE_CPE)
146  destbits *= 2;
147 
148  /**
149  * When using a constant Q-scale, don't adjust bits, just use RD
150  * Don't let it go overboard, though... 8x psy target is enough
151  */
152  toomanybits = 5800;
153  toofewbits = destbits / 16;
154 
155  /** Don't offset scalers, just RD */
156  sfoffs = sce->ics.num_windows - 1;
157  rdlambda = sqrtf(rdlambda);
158 
159  /** search further */
160  maxits *= 2;
161  } else {
162  /* When using ABR, be strict, but a reasonable leeway is
163  * critical to allow RC to smoothly track desired bitrate
164  * without sudden quality drops that cause audible artifacts.
165  * Symmetry is also desirable, to avoid systematic bias.
166  */
167  toomanybits = destbits + destbits/8;
168  toofewbits = destbits - destbits/8;
169 
170  sfoffs = 0;
171  rdlambda = sqrtf(rdlambda);
172  }
173 
174  /** and zero out above cutoff frequency */
175  {
176  int wlen = 1024 / sce->ics.num_windows;
177  int bandwidth;
178 
179  /**
180  * Scale, psy gives us constant quality, this LP only scales
181  * bitrate by lambda, so we save bits on subjectively unimportant HF
182  * rather than increase quantization noise. Adjust nominal bitrate
183  * to effective bitrate according to encoding parameters,
184  * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
185  */
186  float rate_bandwidth_multiplier = 1.5f;
187  int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
188  ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
189  : (avctx->bit_rate / avctx->channels);
190 
191  /** Compensate for extensions that increase efficiency */
192  if (s->options.pns || s->options.intensity_stereo)
193  frame_bit_rate *= 1.15f;
194 
195  if (avctx->cutoff > 0) {
196  bandwidth = avctx->cutoff;
197  } else {
198  bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
199  s->psy.cutoff = bandwidth;
200  }
201 
202  cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
203  pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
204  }
205 
206  /**
207  * for values above this the decoder might end up in an endless loop
208  * due to always having more bits than what can be encoded.
209  */
210  destbits = FFMIN(destbits, 5800);
211  toomanybits = FFMIN(toomanybits, 5800);
212  toofewbits = FFMIN(toofewbits, 5800);
213  /**
214  * XXX: some heuristic to determine initial quantizers will reduce search time
215  * determine zero bands and upper distortion limits
216  */
217  min_spread_thr_r = -1;
218  max_spread_thr_r = -1;
219  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
220  for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
221  int nz = 0;
222  float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
223  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
224  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
225  if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
226  sce->zeroes[(w+w2)*16+g] = 1;
227  continue;
228  }
229  nz = 1;
230  }
231  if (!nz) {
232  uplim = 0.0f;
233  } else {
234  nz = 0;
235  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
236  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
237  if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
238  continue;
239  uplim += band->threshold;
240  energy += band->energy;
241  spread += band->spread;
242  nz++;
243  }
244  }
245  uplims[w*16+g] = uplim;
246  energies[w*16+g] = energy;
247  nzs[w*16+g] = nz;
248  sce->zeroes[w*16+g] = !nz;
249  allz |= nz;
250  if (nz && sce->can_pns[w*16+g]) {
251  spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
252  if (min_spread_thr_r < 0) {
253  min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
254  } else {
255  min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
256  max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
257  }
258  }
259  }
260  }
261 
262  /** Compute initial scalers */
263  minscaler = 65535;
264  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
265  for (g = 0; g < sce->ics.num_swb; g++) {
266  if (sce->zeroes[w*16+g]) {
267  sce->sf_idx[w*16+g] = SCALE_ONE_POS;
268  continue;
269  }
270  /**
271  * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
272  * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
273  * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
274  * more robust.
275  */
276  sce->sf_idx[w*16+g] = av_clip(
278  + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
279  + sfoffs,
280  60, SCALE_MAX_POS);
281  minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
282  }
283  }
284 
285  /** Clip */
286  minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
287  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
288  for (g = 0; g < sce->ics.num_swb; g++)
289  if (!sce->zeroes[w*16+g])
290  sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
291 
292  if (!allz)
293  return;
294  s->abs_pow34(s->scoefs, sce->coeffs, 1024);
296 
297  for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
298  minsf[i] = 0;
299  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
300  start = w*128;
301  for (g = 0; g < sce->ics.num_swb; g++) {
302  const float *scaled = s->scoefs + start;
303  int minsfidx;
304  maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
305  if (maxvals[w*16+g] > 0) {
306  minsfidx = coef2minsf(maxvals[w*16+g]);
307  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
308  minsf[(w+w2)*16+g] = minsfidx;
309  }
310  start += sce->ics.swb_sizes[g];
311  }
312  }
313 
314  /**
315  * Scale uplims to match rate distortion to quality
316  * bu applying noisy band depriorization and tonal band priorization.
317  * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
318  * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
319  * rate distortion requirements.
320  */
321  memcpy(euplims, uplims, sizeof(euplims));
322  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
323  /** psy already priorizes transients to some extent */
324  float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
325  start = w*128;
326  for (g = 0; g < sce->ics.num_swb; g++) {
327  if (nzs[g] > 0) {
328  float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
329  float energy2uplim = find_form_factor(
330  sce->ics.group_len[w], sce->ics.swb_sizes[g],
331  uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
332  sce->coeffs + start,
333  nzslope * cleanup_factor);
334  energy2uplim *= de_psy_factor;
335  if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
336  /** In ABR, we need to priorize less and let rate control do its thing */
337  energy2uplim = sqrtf(energy2uplim);
338  }
339  energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
340  uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
341  * sce->ics.group_len[w];
342 
343  energy2uplim = find_form_factor(
344  sce->ics.group_len[w], sce->ics.swb_sizes[g],
345  uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
346  sce->coeffs + start,
347  2.0f);
348  energy2uplim *= de_psy_factor;
349  if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
350  /** In ABR, we need to priorize less and let rate control do its thing */
351  energy2uplim = sqrtf(energy2uplim);
352  }
353  energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
354  euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
355  0.5f, 1.0f);
356  }
357  start += sce->ics.swb_sizes[g];
358  }
359  }
360 
361  for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
362  maxsf[i] = SCALE_MAX_POS;
363 
364  //perform two-loop search
365  //outer loop - improve quality
366  do {
367  //inner loop - quantize spectrum to fit into given number of bits
368  int overdist;
369  int qstep = its ? 1 : 32;
370  do {
371  int changed = 0;
372  prev = -1;
373  recomprd = 0;
374  tbits = 0;
375  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
376  start = w*128;
377  for (g = 0; g < sce->ics.num_swb; g++) {
378  const float *coefs = &sce->coeffs[start];
379  const float *scaled = &s->scoefs[start];
380  int bits = 0;
381  int cb;
382  float dist = 0.0f;
383  float qenergy = 0.0f;
384 
385  if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
386  start += sce->ics.swb_sizes[g];
387  if (sce->can_pns[w*16+g]) {
388  /** PNS isn't free */
389  tbits += ff_pns_bits(sce, w, g);
390  }
391  continue;
392  }
393  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
394  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
395  int b;
396  float sqenergy;
397  dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
398  scaled + w2*128,
399  sce->ics.swb_sizes[g],
400  sce->sf_idx[w*16+g],
401  cb,
402  1.0f,
403  INFINITY,
404  &b, &sqenergy,
405  0);
406  bits += b;
407  qenergy += sqenergy;
408  }
409  dists[w*16+g] = dist - bits;
410  qenergies[w*16+g] = qenergy;
411  if (prev != -1) {
412  int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
413  bits += ff_aac_scalefactor_bits[sfdiff];
414  }
415  tbits += bits;
416  start += sce->ics.swb_sizes[g];
417  prev = sce->sf_idx[w*16+g];
418  }
419  }
420  if (tbits > toomanybits) {
421  recomprd = 1;
422  for (i = 0; i < 128; i++) {
423  if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
424  int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
425  int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
426  if (new_sf != sce->sf_idx[i]) {
427  sce->sf_idx[i] = new_sf;
428  changed = 1;
429  }
430  }
431  }
432  } else if (tbits < toofewbits) {
433  recomprd = 1;
434  for (i = 0; i < 128; i++) {
435  if (sce->sf_idx[i] > SCALE_ONE_POS) {
436  int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
437  if (new_sf != sce->sf_idx[i]) {
438  sce->sf_idx[i] = new_sf;
439  changed = 1;
440  }
441  }
442  }
443  }
444  qstep >>= 1;
445  if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
446  qstep = 1;
447  } while (qstep);
448 
449  overdist = 1;
450  fflag = tbits < toofewbits;
451  for (i = 0; i < 2 && (overdist || recomprd); ++i) {
452  if (recomprd) {
453  /** Must recompute distortion */
454  prev = -1;
455  tbits = 0;
456  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
457  start = w*128;
458  for (g = 0; g < sce->ics.num_swb; g++) {
459  const float *coefs = sce->coeffs + start;
460  const float *scaled = s->scoefs + start;
461  int bits = 0;
462  int cb;
463  float dist = 0.0f;
464  float qenergy = 0.0f;
465 
466  if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
467  start += sce->ics.swb_sizes[g];
468  if (sce->can_pns[w*16+g]) {
469  /** PNS isn't free */
470  tbits += ff_pns_bits(sce, w, g);
471  }
472  continue;
473  }
474  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
475  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
476  int b;
477  float sqenergy;
478  dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
479  scaled + w2*128,
480  sce->ics.swb_sizes[g],
481  sce->sf_idx[w*16+g],
482  cb,
483  1.0f,
484  INFINITY,
485  &b, &sqenergy,
486  0);
487  bits += b;
488  qenergy += sqenergy;
489  }
490  dists[w*16+g] = dist - bits;
491  qenergies[w*16+g] = qenergy;
492  if (prev != -1) {
493  int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
494  bits += ff_aac_scalefactor_bits[sfdiff];
495  }
496  tbits += bits;
497  start += sce->ics.swb_sizes[g];
498  prev = sce->sf_idx[w*16+g];
499  }
500  }
501  }
502  if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
503  float maxoverdist = 0.0f;
504  float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
505  overdist = recomprd = 0;
506  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
507  for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
508  if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
509  float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
510  maxoverdist = FFMAX(maxoverdist, ovrdist);
511  overdist++;
512  }
513  }
514  }
515  if (overdist) {
516  /* We have overdistorted bands, trade for zeroes (that can be noise)
517  * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
518  */
519  float minspread = max_spread_thr_r;
520  float maxspread = min_spread_thr_r;
521  float zspread;
522  int zeroable = 0;
523  int zeroed = 0;
524  int maxzeroed, zloop;
525  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
526  for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
527  if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
528  minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
529  maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
530  zeroable++;
531  }
532  }
533  }
534  zspread = (maxspread-minspread) * 0.0125f + minspread;
535  /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
536  * and forced the hand of the later search_for_pns step.
537  * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
538  * and leave further PNSing to search_for_pns if worthwhile.
539  */
540  zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
541  ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
542  maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
543  for (zloop = 0; zloop < 2; zloop++) {
544  /* Two passes: first distorted stuff - two birds in one shot and all that,
545  * then anything viable. Viable means not zero, but either CB=zero-able
546  * (too high SF), not SF <= 1 (that means we'd be operating at very high
547  * quality, we don't want PNS when doing VHQ), PNS allowed, and within
548  * the lowest ranking percentile.
549  */
550  float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
551  int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
552  int mcb;
553  for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
554  if (sce->ics.swb_offset[g] < pns_start_pos)
555  continue;
556  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
557  if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
558  && sce->sf_idx[w*16+g] > loopminsf
559  && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
560  || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
561  sce->zeroes[w*16+g] = 1;
562  sce->band_type[w*16+g] = 0;
563  zeroed++;
564  }
565  }
566  }
567  }
568  if (zeroed)
569  recomprd = fflag = 1;
570  } else {
571  overdist = 0;
572  }
573  }
574  }
575 
576  minscaler = SCALE_MAX_POS;
577  maxscaler = 0;
578  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
579  for (g = 0; g < sce->ics.num_swb; g++) {
580  if (!sce->zeroes[w*16+g]) {
581  minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
582  maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
583  }
584  }
585  }
586 
587  minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
588  prev = -1;
589  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
590  /** Start with big steps, end up fine-tunning */
591  int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
592  int edepth = depth+2;
593  float uplmax = its / (maxits*0.25f) + 1.0f;
594  uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
595  start = w * 128;
596  for (g = 0; g < sce->ics.num_swb; g++) {
597  int prevsc = sce->sf_idx[w*16+g];
598  if (prev < 0 && !sce->zeroes[w*16+g])
599  prev = sce->sf_idx[0];
600  if (!sce->zeroes[w*16+g]) {
601  const float *coefs = sce->coeffs + start;
602  const float *scaled = s->scoefs + start;
603  int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
604  int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
605  int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
606  if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
607  /* Try to make sure there is some energy in every nonzero band
608  * NOTE: This algorithm must be forcibly imbalanced, pushing harder
609  * on holes or more distorted bands at first, otherwise there's
610  * no net gain (since the next iteration will offset all bands
611  * on the opposite direction to compensate for extra bits)
612  */
613  for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
614  int cb, bits;
615  float dist, qenergy;
616  int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
617  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
618  dist = qenergy = 0.f;
619  bits = 0;
620  if (!cb) {
621  maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
622  } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
623  break;
624  }
625  /* !g is the DC band, it's important, since quantization error here
626  * applies to less than a cycle, it creates horrible intermodulation
627  * distortion if it doesn't stick to what psy requests
628  */
629  if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
630  maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
631  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
632  int b;
633  float sqenergy;
634  dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
635  scaled + w2*128,
636  sce->ics.swb_sizes[g],
637  sce->sf_idx[w*16+g]-1,
638  cb,
639  1.0f,
640  INFINITY,
641  &b, &sqenergy,
642  0);
643  bits += b;
644  qenergy += sqenergy;
645  }
646  sce->sf_idx[w*16+g]--;
647  dists[w*16+g] = dist - bits;
648  qenergies[w*16+g] = qenergy;
649  if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
650  (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
651  && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
652  ) )) {
653  break;
654  }
655  }
656  } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
657  && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
658  && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
659  ) {
660  /** Um... over target. Save bits for more important stuff. */
661  for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
662  int cb, bits;
663  float dist, qenergy;
664  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
665  if (cb > 0) {
666  dist = qenergy = 0.f;
667  bits = 0;
668  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
669  int b;
670  float sqenergy;
671  dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
672  scaled + w2*128,
673  sce->ics.swb_sizes[g],
674  sce->sf_idx[w*16+g]+1,
675  cb,
676  1.0f,
677  INFINITY,
678  &b, &sqenergy,
679  0);
680  bits += b;
681  qenergy += sqenergy;
682  }
683  dist -= bits;
684  if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
685  sce->sf_idx[w*16+g]++;
686  dists[w*16+g] = dist;
687  qenergies[w*16+g] = qenergy;
688  } else {
689  break;
690  }
691  } else {
692  maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
693  break;
694  }
695  }
696  }
697  prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
698  if (sce->sf_idx[w*16+g] != prevsc)
699  fflag = 1;
700  nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
701  sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
702  }
703  start += sce->ics.swb_sizes[g];
704  }
705  }
706 
707  /** SF difference limit violation risk. Must re-clamp. */
708  prev = -1;
709  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
710  for (g = 0; g < sce->ics.num_swb; g++) {
711  if (!sce->zeroes[w*16+g]) {
712  int prevsf = sce->sf_idx[w*16+g];
713  if (prev < 0)
714  prev = prevsf;
715  sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
716  sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
717  prev = sce->sf_idx[w*16+g];
718  if (!fflag && prevsf != sce->sf_idx[w*16+g])
719  fflag = 1;
720  }
721  }
722  }
723 
724  its++;
725  } while (fflag && its < maxits);
726 
727  /** Scout out next nonzero bands */
728  ff_init_nextband_map(sce, nextband);
729 
730  prev = -1;
731  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
732  /** Make sure proper codebooks are set */
733  for (g = 0; g < sce->ics.num_swb; g++) {
734  if (!sce->zeroes[w*16+g]) {
735  sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
736  if (sce->band_type[w*16+g] <= 0) {
737  if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
738  /** Cannot zero out, make sure it's not attempted */
739  sce->band_type[w*16+g] = 1;
740  } else {
741  sce->zeroes[w*16+g] = 1;
742  sce->band_type[w*16+g] = 0;
743  }
744  }
745  } else {
746  sce->band_type[w*16+g] = 0;
747  }
748  /** Check that there's no SF delta range violations */
749  if (!sce->zeroes[w*16+g]) {
750  if (prev != -1) {
751  av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
752  av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
753  } else if (sce->zeroes[0]) {
754  /** Set global gain to something useful */
755  sce->sf_idx[0] = sce->sf_idx[w*16+g];
756  }
757  prev = sce->sf_idx[w*16+g];
758  }
759  }
760  }
761 }
762 
763 #endif /* AVCODEC_AACCODER_TWOLOOP_H */
INFINITY
#define INFINITY
Definition: mathematics.h:67
SingleChannelElement::can_pns
uint8_t can_pns[128]
band is allowed to PNS (informative)
Definition: aac.h:258
AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:1186
cb
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:215
aacenctab.h
log2f
#define log2f(x)
Definition: libm.h:409
AV_CODEC_FLAG_QSCALE
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:275
SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
av_unused
#define av_unused
Definition: attributes.h:131
b
#define b
Definition: input.c:41
float.h
mathematics.h
ff_sfdelta_can_remove_band
static int ff_sfdelta_can_remove_band(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int band)
Definition: aacenc_utils.h:232
SCALE_MAX_POS
#define SCALE_MAX_POS
scalefactor index maximum value
Definition: aac.h:150
IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aac.h:183
SCALE_DIV_512
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
TYPE_CPE
@ TYPE_CPE
Definition: aac.h:57
find_form_factor
static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope)
Definition: aacenc_utils.h:104
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:606
FFMIN3
#define FFMIN3(a, b, c)
Definition: common.h:97
SingleChannelElement::ics
IndividualChannelStream ics
Definition: aac.h:249
FFMAX3
#define FFMAX3(a, b, c)
Definition: common.h:95
s
#define s(width, name)
Definition: cbs_vp9.c:257
SingleChannelElement::coeffs
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:262
AVCodecContext::global_quality
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:592
IndividualChannelStream::swb_sizes
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:182
g
const char * g
Definition: vf_curves.c:115
bits
uint8_t bits
Definition: vp3data.h:202
IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aac.h:179
SCALE_DIFF_ZERO
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:152
f
#define f(width, name)
Definition: cbs_vp9.c:255
IndividualChannelStream::swb_offset
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aac.h:181
quantize_band_cost_cached
static float quantize_band_cost_cached(struct AACEncContext *s, int w, int g, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
Definition: aacenc_quantization_misc.h:31
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:576
mathops.h
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aac.h
aactab.h
ff_init_nextband_map
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Definition: aacenc_utils.h:199
SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
ff_aac_scalefactor_bits
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:92
coef2minsf
static uint8_t coef2minsf(float coef)
Return the minimum scalefactor where the quantized coef does not clip.
Definition: aacenc_utils.h:157
powf
#define powf(x, y)
Definition: libm.h:50
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
search_for_quantizers_twoloop
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
Definition: aaccoder_twoloop.h:67
mb
#define mb
Definition: vf_colormatrix.c:101
SCALE_MAX_DIFF
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
Definition: aac.h:151
AVCodecContext::channels
int channels
number of audio channels
Definition: avcodec.h:1187
AAC_CUTOFF_FROM_BITRATE
#define AAC_CUTOFF_FROM_BITRATE(bit_rate, channels, sample_rate)
Definition: psymodel.h:35
SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
IndividualChannelStream::num_windows
int num_windows
Definition: aac.h:184
SCALE_ONE_POS
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
find_min_book
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:92
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
AVCodecContext::cutoff
int cutoff
Audio cutoff bandwidth (0 means "automatic")
Definition: avcodec.h:1230
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
uint8_t
uint8_t
Definition: audio_convert.c:194
NOISE_LOW_LIMIT
#define NOISE_LOW_LIMIT
This file contains a template for the twoloop coder function.
Definition: aaccoder_twoloop.h:54
ff_sqrf
static av_const float ff_sqrf(float a)
Definition: mathops.h:228
avcodec.h
w
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
Definition: fate.txt:150
AACEncContext
AAC encoder context.
Definition: aacenc.h:376
FFPsyBand::energy
float energy
Definition: psymodel.h:52
AVCodecContext
main external API structure.
Definition: avcodec.h:526
find_max_val
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:80
FFPsyBand::spread
float spread
Definition: psymodel.h:54
put_bits.h
SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aac.h:252
ff_pns_bits
static int ff_pns_bits(SingleChannelElement *sce, int w, int g)
Definition: aaccoder_twoloop.h:59
ff_quantize_band_cost_cache_init
void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
Definition: aacenc.c:127
aacenc.h