FFmpeg  4.3
aacenc_is.c
Go to the documentation of this file.
1 /*
2  * AAC encoder intensity stereo
3  * Copyright (C) 2015 Rostislav Pehlivanov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder Intensity Stereo
25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
26  */
27 
28 #include "aacenc.h"
29 #include "aacenc_utils.h"
30 #include "aacenc_is.h"
31 #include "aacenc_quantization.h"
32 
34  int start, int w, int g, float ener0,
35  float ener1, float ener01,
36  int use_pcoeffs, int phase)
37 {
38  int i, w2;
39  SingleChannelElement *sce0 = &cpe->ch[0];
40  SingleChannelElement *sce1 = &cpe->ch[1];
41  float *L = use_pcoeffs ? sce0->pcoeffs : sce0->coeffs;
42  float *R = use_pcoeffs ? sce1->pcoeffs : sce1->coeffs;
43  float *L34 = &s->scoefs[256*0], *R34 = &s->scoefs[256*1];
44  float *IS = &s->scoefs[256*2], *I34 = &s->scoefs[256*3];
45  float dist1 = 0.0f, dist2 = 0.0f;
46  struct AACISError is_error = {0};
47 
48  if (ener01 <= 0 || ener0 <= 0) {
49  is_error.pass = 0;
50  return is_error;
51  }
52 
53  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
54  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
55  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
56  int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[w*16+g]-4);
57  float e01_34 = phase*pos_pow34(ener1/ener0);
58  float maxval, dist_spec_err = 0.0f;
59  float minthr = FFMIN(band0->threshold, band1->threshold);
60  for (i = 0; i < sce0->ics.swb_sizes[g]; i++)
61  IS[i] = (L[start+(w+w2)*128+i] + phase*R[start+(w+w2)*128+i])*sqrt(ener0/ener01);
62  s->abs_pow34(L34, &L[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
63  s->abs_pow34(R34, &R[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
64  s->abs_pow34(I34, IS, sce0->ics.swb_sizes[g]);
65  maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
66  is_band_type = find_min_book(maxval, is_sf_idx);
67  dist1 += quantize_band_cost(s, &L[start + (w+w2)*128], L34,
68  sce0->ics.swb_sizes[g],
69  sce0->sf_idx[w*16+g],
70  sce0->band_type[w*16+g],
71  s->lambda / band0->threshold, INFINITY, NULL, NULL, 0);
72  dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34,
73  sce1->ics.swb_sizes[g],
74  sce1->sf_idx[w*16+g],
75  sce1->band_type[w*16+g],
76  s->lambda / band1->threshold, INFINITY, NULL, NULL, 0);
77  dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
78  is_sf_idx, is_band_type,
79  s->lambda / minthr, INFINITY, NULL, NULL, 0);
80  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
81  dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
82  dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
83  }
84  dist_spec_err *= s->lambda / minthr;
85  dist2 += dist_spec_err;
86  }
87 
88  is_error.pass = dist2 <= dist1;
89  is_error.phase = phase;
90  is_error.error = dist2 - dist1;
91  is_error.dist1 = dist1;
92  is_error.dist2 = dist2;
93  is_error.ener01 = ener01;
94 
95  return is_error;
96 }
97 
99 {
100  SingleChannelElement *sce0 = &cpe->ch[0];
101  SingleChannelElement *sce1 = &cpe->ch[1];
102  int start = 0, count = 0, w, w2, g, i, prev_sf1 = -1, prev_bt = -1, prev_is = 0;
103  const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f;
104  uint8_t nextband1[128];
105 
106  if (!cpe->common_window)
107  return;
108 
109  /** Scout out next nonzero bands */
110  ff_init_nextband_map(sce1, nextband1);
111 
112  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
113  start = 0;
114  for (g = 0; g < sce0->ics.num_swb; g++) {
115  if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
116  cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
117  cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g] &&
118  ff_sfdelta_can_remove_band(sce1, nextband1, prev_sf1, w*16+g)) {
119  float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f, ener01p = 0.0f;
120  struct AACISError ph_err1, ph_err2, *best;
121  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
122  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
123  float coef0 = sce0->coeffs[start+(w+w2)*128+i];
124  float coef1 = sce1->coeffs[start+(w+w2)*128+i];
125  ener0 += coef0*coef0;
126  ener1 += coef1*coef1;
127  ener01 += (coef0 + coef1)*(coef0 + coef1);
128  ener01p += (coef0 - coef1)*(coef0 - coef1);
129  }
130  }
131  ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
132  ener0, ener1, ener01p, 0, -1);
133  ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
134  ener0, ener1, ener01, 0, +1);
135  best = (ph_err1.pass && ph_err1.error < ph_err2.error) ? &ph_err1 : &ph_err2;
136  if (best->pass) {
137  cpe->is_mask[w*16+g] = 1;
138  cpe->ms_mask[w*16+g] = 0;
139  cpe->ch[0].is_ener[w*16+g] = sqrt(ener0 / best->ener01);
140  cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
141  cpe->ch[1].band_type[w*16+g] = (best->phase > 0) ? INTENSITY_BT : INTENSITY_BT2;
142  if (prev_is && prev_bt != cpe->ch[1].band_type[w*16+g]) {
143  /** Flip M/S mask and pick the other CB, since it encodes more efficiently */
144  cpe->ms_mask[w*16+g] = 1;
145  cpe->ch[1].band_type[w*16+g] = (best->phase > 0) ? INTENSITY_BT2 : INTENSITY_BT;
146  }
147  prev_bt = cpe->ch[1].band_type[w*16+g];
148  count++;
149  }
150  }
151  if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
152  prev_sf1 = sce1->sf_idx[w*16+g];
153  prev_is = cpe->is_mask[w*16+g];
154  start += sce0->ics.swb_sizes[g];
155  }
156  }
157  cpe->is_mode = !!count;
158 }
INFINITY
#define INFINITY
Definition: mathematics.h:67
AACISError::dist2
float dist2
Definition: aacenc_is.h:41
AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:1186
SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
AACISError::pass
int pass
Definition: aacenc_is.h:37
R
#define R
Definition: huffyuvdsp.h:34
SingleChannelElement::pcoeffs
INTFLOAT pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:261
ff_sfdelta_can_remove_band
static int ff_sfdelta_can_remove_band(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int band)
Definition: aacenc_utils.h:232
IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aac.h:183
AACISError::dist1
float dist1
Definition: aacenc_is.h:40
SingleChannelElement::ics
IndividualChannelStream ics
Definition: aac.h:249
NOISE_BT
@ NOISE_BT
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:87
AACISError::error
float error
Definition: aacenc_is.h:39
s
#define s(width, name)
Definition: cbs_vp9.c:257
SingleChannelElement::coeffs
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:262
IndividualChannelStream::swb_sizes
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:182
g
const char * g
Definition: vf_curves.c:115
INTENSITY_BT2
@ INTENSITY_BT2
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:88
IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aac.h:179
f
#define f(width, name)
Definition: cbs_vp9.c:255
AACISError::ener01
float ener01
Definition: aacenc_is.h:42
INTENSITY_BT
@ INTENSITY_BT
Scalefactor data are intensity stereo positions (in phase).
Definition: aac.h:89
ChannelElement::is_mask
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:282
NULL
#define NULL
Definition: coverity.c:32
SingleChannelElement::is_ener
float is_ener[128]
Intensity stereo pos (used by encoder)
Definition: aac.h:259
aacenc_quantization.h
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
ff_init_nextband_map
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Definition: aacenc_utils.h:199
SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
ChannelElement::ch
SingleChannelElement ch[2]
Definition: aac.h:284
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
ChannelElement::common_window
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:278
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
ChannelElement::ms_mask
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:281
pos_pow34
static float pos_pow34(float a)
Definition: aacenc_utils.h:49
aacenc_is.h
SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
IndividualChannelStream::num_windows
int num_windows
Definition: aac.h:184
find_min_book
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:92
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
ChannelElement
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:275
uint8_t
uint8_t
Definition: audio_convert.c:194
ff_aac_search_for_is
void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
Definition: aacenc_is.c:98
AACISError::phase
int phase
Definition: aacenc_is.h:38
w
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
Definition: fate.txt:150
RESERVED_BT
@ RESERVED_BT
Band types following are encoded differently from others.
Definition: aac.h:86
AACEncContext
AAC encoder context.
Definition: aacenc.h:376
L
#define L(x)
Definition: vp56_arith.h:36
ff_aac_is_encoding_err
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, int start, int w, int g, float ener0, float ener1, float ener01, int use_pcoeffs, int phase)
Definition: aacenc_is.c:33
INT_STEREO_LOW_LIMIT
#define INT_STEREO_LOW_LIMIT
Frequency in Hz for lower limit of intensity stereo.
Definition: aacenc_is.h:34
AVCodecContext
main external API structure.
Definition: avcodec.h:526
ChannelElement::is_mode
uint8_t is_mode
Set if any bands have been encoded using intensity stereo (used by encoder)
Definition: aac.h:280
find_max_val
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:80
quantize_band_cost
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
Definition: aacenc_quantization.h:250
aacenc_utils.h
SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aac.h:252
AACISError
Definition: aacenc_is.h:36
aacenc.h