FFmpeg  1.2.12
aacdec.c
Go to the documentation of this file.
1 /*
2  * AAC decoder
3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5  *
6  * AAC LATM decoder
7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
8  * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
34 /*
35  * supported tools
36  *
37  * Support? Name
38  * N (code in SoC repo) gain control
39  * Y block switching
40  * Y window shapes - standard
41  * N window shapes - Low Delay
42  * Y filterbank - standard
43  * N (code in SoC repo) filterbank - Scalable Sample Rate
44  * Y Temporal Noise Shaping
45  * Y Long Term Prediction
46  * Y intensity stereo
47  * Y channel coupling
48  * Y frequency domain prediction
49  * Y Perceptual Noise Substitution
50  * Y Mid/Side stereo
51  * N Scalable Inverse AAC Quantization
52  * N Frequency Selective Switch
53  * N upsampling filter
54  * Y quantization & coding - AAC
55  * N quantization & coding - TwinVQ
56  * N quantization & coding - BSAC
57  * N AAC Error Resilience tools
58  * N Error Resilience payload syntax
59  * N Error Protection tool
60  * N CELP
61  * N Silence Compression
62  * N HVXC
63  * N HVXC 4kbits/s VR
64  * N Structured Audio tools
65  * N Structured Audio Sample Bank Format
66  * N MIDI
67  * N Harmonic and Individual Lines plus Noise
68  * N Text-To-Speech Interface
69  * Y Spectral Band Replication
70  * Y (not in this code) Layer-1
71  * Y (not in this code) Layer-2
72  * Y (not in this code) Layer-3
73  * N SinuSoidal Coding (Transient, Sinusoid, Noise)
74  * Y Parametric Stereo
75  * N Direct Stream Transfer
76  *
77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
78  * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
79  Parametric Stereo.
80  */
81 
82 #include "libavutil/float_dsp.h"
83 #include "libavutil/opt.h"
84 #include "avcodec.h"
85 #include "internal.h"
86 #include "get_bits.h"
87 #include "fft.h"
88 #include "fmtconvert.h"
89 #include "lpc.h"
90 #include "kbdwin.h"
91 #include "sinewin.h"
92 
93 #include "aac.h"
94 #include "aactab.h"
95 #include "aacdectab.h"
96 #include "cbrt_tablegen.h"
97 #include "sbr.h"
98 #include "aacsbr.h"
99 #include "mpeg4audio.h"
100 #include "aacadtsdec.h"
101 #include "libavutil/intfloat.h"
102 
103 #include <assert.h>
104 #include <errno.h>
105 #include <math.h>
106 #include <string.h>
107 
108 #if ARCH_ARM
109 # include "arm/aac.h"
110 #elif ARCH_MIPS
111 # include "mips/aacdec_mips.h"
112 #endif
113 
115 static VLC vlc_spectral[11];
116 
117 static int output_configure(AACContext *ac,
118  uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
119  enum OCStatus oc_type, int get_new_frame);
120 
121 #define overread_err "Input buffer exhausted before END element found\n"
122 
123 static int count_channels(uint8_t (*layout)[3], int tags)
124 {
125  int i, sum = 0;
126  for (i = 0; i < tags; i++) {
127  int syn_ele = layout[i][0];
128  int pos = layout[i][2];
129  sum += (1 + (syn_ele == TYPE_CPE)) *
130  (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
131  }
132  return sum;
133 }
134 
148  enum ChannelPosition che_pos,
149  int type, int id, int *channels)
150 {
151  if (che_pos) {
152  if (!ac->che[type][id]) {
153  if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
154  return AVERROR(ENOMEM);
155  ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
156  }
157  if (type != TYPE_CCE) {
158  if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
159  av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
160  return AVERROR_INVALIDDATA;
161  }
162  ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
163  if (type == TYPE_CPE ||
164  (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
165  ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
166  }
167  }
168  } else {
169  if (ac->che[type][id])
170  ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
171  av_freep(&ac->che[type][id]);
172  }
173  return 0;
174 }
175 
177 {
178  AACContext *ac = avctx->priv_data;
179  int type, id, ch, ret;
180 
181  /* set channel pointers to internal buffers by default */
182  for (type = 0; type < 4; type++) {
183  for (id = 0; id < MAX_ELEM_ID; id++) {
184  ChannelElement *che = ac->che[type][id];
185  if (che) {
186  che->ch[0].ret = che->ch[0].ret_buf;
187  che->ch[1].ret = che->ch[1].ret_buf;
188  }
189  }
190  }
191 
192  if (!avctx->channels)
193  return 1;
194 
195  /* get output buffer */
196  ac->frame->nb_samples = 2048;
197  if ((ret = ff_get_buffer(avctx, ac->frame)) < 0) {
198  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
199  return ret;
200  }
201 
202  /* map output channel pointers to AVFrame data */
203  for (ch = 0; ch < avctx->channels; ch++) {
204  if (ac->output_element[ch])
205  ac->output_element[ch]->ret = (float *)ac->frame->extended_data[ch];
206  }
207 
208  return 0;
209 }
210 
212  uint64_t av_position;
216 };
217 
218 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
219  uint8_t (*layout_map)[3], int offset, uint64_t left,
220  uint64_t right, int pos)
221 {
222  if (layout_map[offset][0] == TYPE_CPE) {
223  e2c_vec[offset] = (struct elem_to_channel) {
224  .av_position = left | right, .syn_ele = TYPE_CPE,
225  .elem_id = layout_map[offset ][1], .aac_position = pos };
226  return 1;
227  } else {
228  e2c_vec[offset] = (struct elem_to_channel) {
229  .av_position = left, .syn_ele = TYPE_SCE,
230  .elem_id = layout_map[offset ][1], .aac_position = pos };
231  e2c_vec[offset + 1] = (struct elem_to_channel) {
232  .av_position = right, .syn_ele = TYPE_SCE,
233  .elem_id = layout_map[offset + 1][1], .aac_position = pos };
234  return 2;
235  }
236 }
237 
238 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, int *current) {
239  int num_pos_channels = 0;
240  int first_cpe = 0;
241  int sce_parity = 0;
242  int i;
243  for (i = *current; i < tags; i++) {
244  if (layout_map[i][2] != pos)
245  break;
246  if (layout_map[i][0] == TYPE_CPE) {
247  if (sce_parity) {
248  if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
249  sce_parity = 0;
250  } else {
251  return -1;
252  }
253  }
254  num_pos_channels += 2;
255  first_cpe = 1;
256  } else {
257  num_pos_channels++;
258  sce_parity ^= 1;
259  }
260  }
261  if (sce_parity &&
262  ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
263  return -1;
264  *current = i;
265  return num_pos_channels;
266 }
267 
268 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
269 {
270  int i, n, total_non_cc_elements;
271  struct elem_to_channel e2c_vec[4*MAX_ELEM_ID] = {{ 0 }};
272  int num_front_channels, num_side_channels, num_back_channels;
273  uint64_t layout;
274 
275  if (FF_ARRAY_ELEMS(e2c_vec) < tags)
276  return 0;
277 
278  i = 0;
279  num_front_channels =
280  count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
281  if (num_front_channels < 0)
282  return 0;
283  num_side_channels =
284  count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
285  if (num_side_channels < 0)
286  return 0;
287  num_back_channels =
288  count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
289  if (num_back_channels < 0)
290  return 0;
291 
292  i = 0;
293  if (num_front_channels & 1) {
294  e2c_vec[i] = (struct elem_to_channel) {
295  .av_position = AV_CH_FRONT_CENTER, .syn_ele = TYPE_SCE,
296  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_FRONT };
297  i++;
298  num_front_channels--;
299  }
300  if (num_front_channels >= 4) {
301  i += assign_pair(e2c_vec, layout_map, i,
305  num_front_channels -= 2;
306  }
307  if (num_front_channels >= 2) {
308  i += assign_pair(e2c_vec, layout_map, i,
312  num_front_channels -= 2;
313  }
314  while (num_front_channels >= 2) {
315  i += assign_pair(e2c_vec, layout_map, i,
316  UINT64_MAX,
317  UINT64_MAX,
319  num_front_channels -= 2;
320  }
321 
322  if (num_side_channels >= 2) {
323  i += assign_pair(e2c_vec, layout_map, i,
327  num_side_channels -= 2;
328  }
329  while (num_side_channels >= 2) {
330  i += assign_pair(e2c_vec, layout_map, i,
331  UINT64_MAX,
332  UINT64_MAX,
334  num_side_channels -= 2;
335  }
336 
337  while (num_back_channels >= 4) {
338  i += assign_pair(e2c_vec, layout_map, i,
339  UINT64_MAX,
340  UINT64_MAX,
342  num_back_channels -= 2;
343  }
344  if (num_back_channels >= 2) {
345  i += assign_pair(e2c_vec, layout_map, i,
349  num_back_channels -= 2;
350  }
351  if (num_back_channels) {
352  e2c_vec[i] = (struct elem_to_channel) {
353  .av_position = AV_CH_BACK_CENTER, .syn_ele = TYPE_SCE,
354  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_BACK };
355  i++;
356  num_back_channels--;
357  }
358 
359  if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
360  e2c_vec[i] = (struct elem_to_channel) {
362  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
363  i++;
364  }
365  while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
366  e2c_vec[i] = (struct elem_to_channel) {
367  .av_position = UINT64_MAX, .syn_ele = TYPE_LFE,
368  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
369  i++;
370  }
371 
372  // Must choose a stable sort
373  total_non_cc_elements = n = i;
374  do {
375  int next_n = 0;
376  for (i = 1; i < n; i++) {
377  if (e2c_vec[i-1].av_position > e2c_vec[i].av_position) {
378  FFSWAP(struct elem_to_channel, e2c_vec[i-1], e2c_vec[i]);
379  next_n = i;
380  }
381  }
382  n = next_n;
383  } while (n > 0);
384 
385  layout = 0;
386  for (i = 0; i < total_non_cc_elements; i++) {
387  layout_map[i][0] = e2c_vec[i].syn_ele;
388  layout_map[i][1] = e2c_vec[i].elem_id;
389  layout_map[i][2] = e2c_vec[i].aac_position;
390  if (e2c_vec[i].av_position != UINT64_MAX) {
391  layout |= e2c_vec[i].av_position;
392  }
393  }
394 
395  return layout;
396 }
397 
402  if (ac->oc[1].status == OC_LOCKED) {
403  ac->oc[0] = ac->oc[1];
404  }
405  ac->oc[1].status = OC_NONE;
406 }
407 
413  if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
414  ac->oc[1] = ac->oc[0];
415  ac->avctx->channels = ac->oc[1].channels;
416  ac->avctx->channel_layout = ac->oc[1].channel_layout;
417  output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
418  ac->oc[1].status, 0);
419  }
420 }
421 
428  uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
429  enum OCStatus oc_type, int get_new_frame)
430 {
431  AVCodecContext *avctx = ac->avctx;
432  int i, channels = 0, ret;
433  uint64_t layout = 0;
434 
435  if (ac->oc[1].layout_map != layout_map) {
436  memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
437  ac->oc[1].layout_map_tags = tags;
438  }
439 
440  // Try to sniff a reasonable channel order, otherwise output the
441  // channels in the order the PCE declared them.
443  layout = sniff_channel_order(layout_map, tags);
444  for (i = 0; i < tags; i++) {
445  int type = layout_map[i][0];
446  int id = layout_map[i][1];
447  int position = layout_map[i][2];
448  // Allocate or free elements depending on if they are in the
449  // current program configuration.
450  ret = che_configure(ac, position, type, id, &channels);
451  if (ret < 0)
452  return ret;
453  }
454  if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
455  if (layout == AV_CH_FRONT_CENTER) {
457  } else {
458  layout = 0;
459  }
460  }
461 
462  memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
463  if (layout) avctx->channel_layout = layout;
464  ac->oc[1].channel_layout = layout;
465  avctx->channels = ac->oc[1].channels = channels;
466  ac->oc[1].status = oc_type;
467 
468  if (get_new_frame) {
469  if ((ret = frame_configure_elements(ac->avctx)) < 0)
470  return ret;
471  }
472 
473  return 0;
474 }
475 
476 static void flush(AVCodecContext *avctx)
477 {
478  AACContext *ac= avctx->priv_data;
479  int type, i, j;
480 
481  for (type = 3; type >= 0; type--) {
482  for (i = 0; i < MAX_ELEM_ID; i++) {
483  ChannelElement *che = ac->che[type][i];
484  if (che) {
485  for (j = 0; j <= 1; j++) {
486  memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
487  }
488  }
489  }
490  }
491 }
492 
500  uint8_t (*layout_map)[3],
501  int *tags,
502  int channel_config)
503 {
504  if (channel_config < 1 || channel_config > 7) {
505  av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
506  channel_config);
507  return -1;
508  }
509  *tags = tags_per_config[channel_config];
510  memcpy(layout_map, aac_channel_layout_map[channel_config-1], *tags * sizeof(*layout_map));
511  return 0;
512 }
513 
514 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
515 {
516  // For PCE based channel configurations map the channels solely based on tags.
517  if (!ac->oc[1].m4ac.chan_config) {
518  return ac->tag_che_map[type][elem_id];
519  }
520  // Allow single CPE stereo files to be signalled with mono configuration.
521  if (!ac->tags_mapped && type == TYPE_CPE && ac->oc[1].m4ac.chan_config == 1) {
522  uint8_t layout_map[MAX_ELEM_ID*4][3];
523  int layout_map_tags;
525 
526  av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
527 
528  if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
529  2) < 0)
530  return NULL;
531  if (output_configure(ac, layout_map, layout_map_tags,
532  OC_TRIAL_FRAME, 1) < 0)
533  return NULL;
534 
535  ac->oc[1].m4ac.chan_config = 2;
536  ac->oc[1].m4ac.ps = 0;
537  }
538  // And vice-versa
539  if (!ac->tags_mapped && type == TYPE_SCE && ac->oc[1].m4ac.chan_config == 2) {
540  uint8_t layout_map[MAX_ELEM_ID*4][3];
541  int layout_map_tags;
543 
544  av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
545 
546  if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
547  1) < 0)
548  return NULL;
549  if (output_configure(ac, layout_map, layout_map_tags,
550  OC_TRIAL_FRAME, 1) < 0)
551  return NULL;
552 
553  ac->oc[1].m4ac.chan_config = 1;
554  if (ac->oc[1].m4ac.sbr)
555  ac->oc[1].m4ac.ps = -1;
556  }
557  // For indexed channel configurations map the channels solely based on position.
558  switch (ac->oc[1].m4ac.chan_config) {
559  case 7:
560  if (ac->tags_mapped == 3 && type == TYPE_CPE) {
561  ac->tags_mapped++;
562  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
563  }
564  case 6:
565  /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
566  instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
567  encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
568  if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
569  ac->tags_mapped++;
570  return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
571  }
572  case 5:
573  if (ac->tags_mapped == 2 && type == TYPE_CPE) {
574  ac->tags_mapped++;
575  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
576  }
577  case 4:
578  if (ac->tags_mapped == 2 && ac->oc[1].m4ac.chan_config == 4 && type == TYPE_SCE) {
579  ac->tags_mapped++;
580  return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
581  }
582  case 3:
583  case 2:
584  if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && type == TYPE_CPE) {
585  ac->tags_mapped++;
586  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
587  } else if (ac->oc[1].m4ac.chan_config == 2) {
588  return NULL;
589  }
590  case 1:
591  if (!ac->tags_mapped && type == TYPE_SCE) {
592  ac->tags_mapped++;
593  return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
594  }
595  default:
596  return NULL;
597  }
598 }
599 
605 static void decode_channel_map(uint8_t layout_map[][3],
606  enum ChannelPosition type,
607  GetBitContext *gb, int n)
608 {
609  while (n--) {
610  enum RawDataBlockType syn_ele;
611  switch (type) {
612  case AAC_CHANNEL_FRONT:
613  case AAC_CHANNEL_BACK:
614  case AAC_CHANNEL_SIDE:
615  syn_ele = get_bits1(gb);
616  break;
617  case AAC_CHANNEL_CC:
618  skip_bits1(gb);
619  syn_ele = TYPE_CCE;
620  break;
621  case AAC_CHANNEL_LFE:
622  syn_ele = TYPE_LFE;
623  break;
624  default:
625  av_assert0(0);
626  }
627  layout_map[0][0] = syn_ele;
628  layout_map[0][1] = get_bits(gb, 4);
629  layout_map[0][2] = type;
630  layout_map++;
631  }
632 }
633 
639 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
640  uint8_t (*layout_map)[3],
641  GetBitContext *gb)
642 {
643  int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
644  int comment_len;
645  int tags;
646 
647  skip_bits(gb, 2); // object_type
648 
649  sampling_index = get_bits(gb, 4);
650  if (m4ac->sampling_index != sampling_index)
651  av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
652 
653  num_front = get_bits(gb, 4);
654  num_side = get_bits(gb, 4);
655  num_back = get_bits(gb, 4);
656  num_lfe = get_bits(gb, 2);
657  num_assoc_data = get_bits(gb, 3);
658  num_cc = get_bits(gb, 4);
659 
660  if (get_bits1(gb))
661  skip_bits(gb, 4); // mono_mixdown_tag
662  if (get_bits1(gb))
663  skip_bits(gb, 4); // stereo_mixdown_tag
664 
665  if (get_bits1(gb))
666  skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
667 
668  if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
669  av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
670  return -1;
671  }
672  decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front);
673  tags = num_front;
674  decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side);
675  tags += num_side;
676  decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back);
677  tags += num_back;
678  decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe);
679  tags += num_lfe;
680 
681  skip_bits_long(gb, 4 * num_assoc_data);
682 
683  decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc);
684  tags += num_cc;
685 
686  align_get_bits(gb);
687 
688  /* comment field, first byte is length */
689  comment_len = get_bits(gb, 8) * 8;
690  if (get_bits_left(gb) < comment_len) {
691  av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
692  return -1;
693  }
694  skip_bits_long(gb, comment_len);
695  return tags;
696 }
697 
707  GetBitContext *gb,
708  MPEG4AudioConfig *m4ac,
709  int channel_config)
710 {
711  int extension_flag, ret;
712  uint8_t layout_map[MAX_ELEM_ID*4][3];
713  int tags = 0;
714 
715  if (get_bits1(gb)) { // frameLengthFlag
716  av_log_missing_feature(avctx, "960/120 MDCT window", 1);
717  return AVERROR_PATCHWELCOME;
718  }
719 
720  if (get_bits1(gb)) // dependsOnCoreCoder
721  skip_bits(gb, 14); // coreCoderDelay
722  extension_flag = get_bits1(gb);
723 
724  if (m4ac->object_type == AOT_AAC_SCALABLE ||
726  skip_bits(gb, 3); // layerNr
727 
728  if (channel_config == 0) {
729  skip_bits(gb, 4); // element_instance_tag
730  tags = decode_pce(avctx, m4ac, layout_map, gb);
731  if (tags < 0)
732  return tags;
733  } else {
734  if ((ret = set_default_channel_config(avctx, layout_map, &tags, channel_config)))
735  return ret;
736  }
737 
738  if (count_channels(layout_map, tags) > 1) {
739  m4ac->ps = 0;
740  } else if (m4ac->sbr == 1 && m4ac->ps == -1)
741  m4ac->ps = 1;
742 
743  if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
744  return ret;
745 
746  if (extension_flag) {
747  switch (m4ac->object_type) {
748  case AOT_ER_BSAC:
749  skip_bits(gb, 5); // numOfSubFrame
750  skip_bits(gb, 11); // layer_length
751  break;
752  case AOT_ER_AAC_LC:
753  case AOT_ER_AAC_LTP:
754  case AOT_ER_AAC_SCALABLE:
755  case AOT_ER_AAC_LD:
756  skip_bits(gb, 3); /* aacSectionDataResilienceFlag
757  * aacScalefactorDataResilienceFlag
758  * aacSpectralDataResilienceFlag
759  */
760  break;
761  }
762  skip_bits1(gb); // extensionFlag3 (TBD in version 3)
763  }
764  return 0;
765 }
766 
780  AVCodecContext *avctx,
781  MPEG4AudioConfig *m4ac,
782  const uint8_t *data, int bit_size,
783  int sync_extension)
784 {
785  GetBitContext gb;
786  int i;
787  int ret;
788 
789  av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
790  for (i = 0; i < bit_size >> 3; i++)
791  av_dlog(avctx, "%02x ", data[i]);
792  av_dlog(avctx, "\n");
793 
794  if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
795  return ret;
796 
797  if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0)
798  return -1;
799  if (m4ac->sampling_index > 12) {
800  av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
801  return -1;
802  }
803 
804  skip_bits_long(&gb, i);
805 
806  switch (m4ac->object_type) {
807  case AOT_AAC_MAIN:
808  case AOT_AAC_LC:
809  case AOT_AAC_LTP:
810  if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
811  return -1;
812  break;
813  default:
814  av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
815  m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
816  return -1;
817  }
818 
819  av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
820  m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
821  m4ac->sample_rate, m4ac->sbr, m4ac->ps);
822 
823  return get_bits_count(&gb);
824 }
825 
833 static av_always_inline int lcg_random(unsigned previous_val)
834 {
835  union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
836  return v.s;
837 }
838 
840 {
841  ps->r0 = 0.0f;
842  ps->r1 = 0.0f;
843  ps->cor0 = 0.0f;
844  ps->cor1 = 0.0f;
845  ps->var0 = 1.0f;
846  ps->var1 = 1.0f;
847 }
848 
850 {
851  int i;
852  for (i = 0; i < MAX_PREDICTORS; i++)
853  reset_predict_state(&ps[i]);
854 }
855 
856 static int sample_rate_idx (int rate)
857 {
858  if (92017 <= rate) return 0;
859  else if (75132 <= rate) return 1;
860  else if (55426 <= rate) return 2;
861  else if (46009 <= rate) return 3;
862  else if (37566 <= rate) return 4;
863  else if (27713 <= rate) return 5;
864  else if (23004 <= rate) return 6;
865  else if (18783 <= rate) return 7;
866  else if (13856 <= rate) return 8;
867  else if (11502 <= rate) return 9;
868  else if (9391 <= rate) return 10;
869  else return 11;
870 }
871 
872 static void reset_predictor_group(PredictorState *ps, int group_num)
873 {
874  int i;
875  for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
876  reset_predict_state(&ps[i]);
877 }
878 
879 #define AAC_INIT_VLC_STATIC(num, size) \
880  INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
881  ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
882  ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
883  size);
884 
885 static void aacdec_init(AACContext *ac);
886 
888 {
889  AACContext *ac = avctx->priv_data;
890 
891  ac->avctx = avctx;
892  ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
893 
894  aacdec_init(ac);
895 
897 
898  if (avctx->extradata_size > 0) {
899  if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
900  avctx->extradata,
901  avctx->extradata_size*8, 1) < 0)
902  return -1;
903  } else {
904  int sr, i;
905  uint8_t layout_map[MAX_ELEM_ID*4][3];
906  int layout_map_tags;
907 
908  sr = sample_rate_idx(avctx->sample_rate);
909  ac->oc[1].m4ac.sampling_index = sr;
910  ac->oc[1].m4ac.channels = avctx->channels;
911  ac->oc[1].m4ac.sbr = -1;
912  ac->oc[1].m4ac.ps = -1;
913 
914  for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
915  if (ff_mpeg4audio_channels[i] == avctx->channels)
916  break;
918  i = 0;
919  }
920  ac->oc[1].m4ac.chan_config = i;
921 
922  if (ac->oc[1].m4ac.chan_config) {
923  int ret = set_default_channel_config(avctx, layout_map,
924  &layout_map_tags, ac->oc[1].m4ac.chan_config);
925  if (!ret)
926  output_configure(ac, layout_map, layout_map_tags,
927  OC_GLOBAL_HDR, 0);
928  else if (avctx->err_recognition & AV_EF_EXPLODE)
929  return AVERROR_INVALIDDATA;
930  }
931  }
932 
933  if (avctx->channels > MAX_CHANNELS) {
934  av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
935  return AVERROR_INVALIDDATA;
936  }
937 
938  AAC_INIT_VLC_STATIC( 0, 304);
939  AAC_INIT_VLC_STATIC( 1, 270);
940  AAC_INIT_VLC_STATIC( 2, 550);
941  AAC_INIT_VLC_STATIC( 3, 300);
942  AAC_INIT_VLC_STATIC( 4, 328);
943  AAC_INIT_VLC_STATIC( 5, 294);
944  AAC_INIT_VLC_STATIC( 6, 306);
945  AAC_INIT_VLC_STATIC( 7, 268);
946  AAC_INIT_VLC_STATIC( 8, 510);
947  AAC_INIT_VLC_STATIC( 9, 366);
948  AAC_INIT_VLC_STATIC(10, 462);
949 
950  ff_aac_sbr_init();
951 
952  ff_fmt_convert_init(&ac->fmt_conv, avctx);
954 
955  ac->random_state = 0x1f2e3d4c;
956 
958 
962  352);
963 
964  ff_mdct_init(&ac->mdct, 11, 1, 1.0 / (32768.0 * 1024.0));
965  ff_mdct_init(&ac->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0));
966  ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0 * 32768.0);
967  // window initialization
972 
973  cbrt_tableinit();
974 
975  return 0;
976 }
977 
982 {
983  int byte_align = get_bits1(gb);
984  int count = get_bits(gb, 8);
985  if (count == 255)
986  count += get_bits(gb, 8);
987  if (byte_align)
988  align_get_bits(gb);
989 
990  if (get_bits_left(gb) < 8 * count) {
991  av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
992  return -1;
993  }
994  skip_bits_long(gb, 8 * count);
995  return 0;
996 }
997 
999  GetBitContext *gb)
1000 {
1001  int sfb;
1002  if (get_bits1(gb)) {
1003  ics->predictor_reset_group = get_bits(gb, 5);
1004  if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
1005  av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
1006  return -1;
1007  }
1008  }
1009  for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
1010  ics->prediction_used[sfb] = get_bits1(gb);
1011  }
1012  return 0;
1013 }
1014 
1019  GetBitContext *gb, uint8_t max_sfb)
1020 {
1021  int sfb;
1022 
1023  ltp->lag = get_bits(gb, 11);
1024  ltp->coef = ltp_coef[get_bits(gb, 3)];
1025  for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
1026  ltp->used[sfb] = get_bits1(gb);
1027 }
1028 
1033  GetBitContext *gb)
1034 {
1035  if (get_bits1(gb)) {
1036  av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
1037  return AVERROR_INVALIDDATA;
1038  }
1039  ics->window_sequence[1] = ics->window_sequence[0];
1040  ics->window_sequence[0] = get_bits(gb, 2);
1041  ics->use_kb_window[1] = ics->use_kb_window[0];
1042  ics->use_kb_window[0] = get_bits1(gb);
1043  ics->num_window_groups = 1;
1044  ics->group_len[0] = 1;
1045  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1046  int i;
1047  ics->max_sfb = get_bits(gb, 4);
1048  for (i = 0; i < 7; i++) {
1049  if (get_bits1(gb)) {
1050  ics->group_len[ics->num_window_groups - 1]++;
1051  } else {
1052  ics->num_window_groups++;
1053  ics->group_len[ics->num_window_groups - 1] = 1;
1054  }
1055  }
1056  ics->num_windows = 8;
1060  ics->predictor_present = 0;
1061  } else {
1062  ics->max_sfb = get_bits(gb, 6);
1063  ics->num_windows = 1;
1067  ics->predictor_present = get_bits1(gb);
1068  ics->predictor_reset_group = 0;
1069  if (ics->predictor_present) {
1070  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1071  if (decode_prediction(ac, ics, gb)) {
1072  goto fail;
1073  }
1074  } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC) {
1075  av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
1076  goto fail;
1077  } else {
1078  if ((ics->ltp.present = get_bits(gb, 1)))
1079  decode_ltp(&ics->ltp, gb, ics->max_sfb);
1080  }
1081  }
1082  }
1083 
1084  if (ics->max_sfb > ics->num_swb) {
1085  av_log(ac->avctx, AV_LOG_ERROR,
1086  "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
1087  ics->max_sfb, ics->num_swb);
1088  goto fail;
1089  }
1090 
1091  return 0;
1092 fail:
1093  ics->max_sfb = 0;
1094  return AVERROR_INVALIDDATA;
1095 }
1096 
1105 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1106  int band_type_run_end[120], GetBitContext *gb,
1108 {
1109  int g, idx = 0;
1110  const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1111  for (g = 0; g < ics->num_window_groups; g++) {
1112  int k = 0;
1113  while (k < ics->max_sfb) {
1114  uint8_t sect_end = k;
1115  int sect_len_incr;
1116  int sect_band_type = get_bits(gb, 4);
1117  if (sect_band_type == 12) {
1118  av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1119  return -1;
1120  }
1121  do {
1122  sect_len_incr = get_bits(gb, bits);
1123  sect_end += sect_len_incr;
1124  if (get_bits_left(gb) < 0) {
1125  av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
1126  return -1;
1127  }
1128  if (sect_end > ics->max_sfb) {
1129  av_log(ac->avctx, AV_LOG_ERROR,
1130  "Number of bands (%d) exceeds limit (%d).\n",
1131  sect_end, ics->max_sfb);
1132  return -1;
1133  }
1134  } while (sect_len_incr == (1 << bits) - 1);
1135  for (; k < sect_end; k++) {
1136  band_type [idx] = sect_band_type;
1137  band_type_run_end[idx++] = sect_end;
1138  }
1139  }
1140  }
1141  return 0;
1142 }
1143 
1154 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1155  unsigned int global_gain,
1157  enum BandType band_type[120],
1158  int band_type_run_end[120])
1159 {
1160  int g, i, idx = 0;
1161  int offset[3] = { global_gain, global_gain - 90, 0 };
1162  int clipped_offset;
1163  int noise_flag = 1;
1164  for (g = 0; g < ics->num_window_groups; g++) {
1165  for (i = 0; i < ics->max_sfb;) {
1166  int run_end = band_type_run_end[idx];
1167  if (band_type[idx] == ZERO_BT) {
1168  for (; i < run_end; i++, idx++)
1169  sf[idx] = 0.;
1170  } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
1171  for (; i < run_end; i++, idx++) {
1172  offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1173  clipped_offset = av_clip(offset[2], -155, 100);
1174  if (offset[2] != clipped_offset) {
1175  av_log_ask_for_sample(ac->avctx, "Intensity stereo "
1176  "position clipped (%d -> %d).\nIf you heard an "
1177  "audible artifact, there may be a bug in the "
1178  "decoder. ", offset[2], clipped_offset);
1179  }
1180  sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1181  }
1182  } else if (band_type[idx] == NOISE_BT) {
1183  for (; i < run_end; i++, idx++) {
1184  if (noise_flag-- > 0)
1185  offset[1] += get_bits(gb, 9) - 256;
1186  else
1187  offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1188  clipped_offset = av_clip(offset[1], -100, 155);
1189  if (offset[1] != clipped_offset) {
1190  av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
1191  "(%d -> %d).\nIf you heard an audible "
1192  "artifact, there may be a bug in the decoder. ",
1193  offset[1], clipped_offset);
1194  }
1195  sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1196  }
1197  } else {
1198  for (; i < run_end; i++, idx++) {
1199  offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1200  if (offset[0] > 255U) {
1201  av_log(ac->avctx, AV_LOG_ERROR,
1202  "Scalefactor (%d) out of range.\n", offset[0]);
1203  return -1;
1204  }
1205  sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1206  }
1207  }
1208  }
1209  }
1210  return 0;
1211 }
1212 
1216 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1217  const uint16_t *swb_offset, int num_swb)
1218 {
1219  int i, pulse_swb;
1220  pulse->num_pulse = get_bits(gb, 2) + 1;
1221  pulse_swb = get_bits(gb, 6);
1222  if (pulse_swb >= num_swb)
1223  return -1;
1224  pulse->pos[0] = swb_offset[pulse_swb];
1225  pulse->pos[0] += get_bits(gb, 5);
1226  if (pulse->pos[0] > 1023)
1227  return -1;
1228  pulse->amp[0] = get_bits(gb, 4);
1229  for (i = 1; i < pulse->num_pulse; i++) {
1230  pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1231  if (pulse->pos[i] > 1023)
1232  return -1;
1233  pulse->amp[i] = get_bits(gb, 4);
1234  }
1235  return 0;
1236 }
1237 
1244  GetBitContext *gb, const IndividualChannelStream *ics)
1245 {
1246  int w, filt, i, coef_len, coef_res, coef_compress;
1247  const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1248  const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1249  for (w = 0; w < ics->num_windows; w++) {
1250  if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1251  coef_res = get_bits1(gb);
1252 
1253  for (filt = 0; filt < tns->n_filt[w]; filt++) {
1254  int tmp2_idx;
1255  tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1256 
1257  if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1258  av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
1259  tns->order[w][filt], tns_max_order);
1260  tns->order[w][filt] = 0;
1261  return -1;
1262  }
1263  if (tns->order[w][filt]) {
1264  tns->direction[w][filt] = get_bits1(gb);
1265  coef_compress = get_bits1(gb);
1266  coef_len = coef_res + 3 - coef_compress;
1267  tmp2_idx = 2 * coef_compress + coef_res;
1268 
1269  for (i = 0; i < tns->order[w][filt]; i++)
1270  tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1271  }
1272  }
1273  }
1274  }
1275  return 0;
1276 }
1277 
1286  int ms_present)
1287 {
1288  int idx;
1289  if (ms_present == 1) {
1290  for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
1291  cpe->ms_mask[idx] = get_bits1(gb);
1292  } else if (ms_present == 2) {
1293  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb);
1294  }
1295 }
1296 
1297 #ifndef VMUL2
1298 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1299  const float *scale)
1300 {
1301  float s = *scale;
1302  *dst++ = v[idx & 15] * s;
1303  *dst++ = v[idx>>4 & 15] * s;
1304  return dst;
1305 }
1306 #endif
1307 
1308 #ifndef VMUL4
1309 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1310  const float *scale)
1311 {
1312  float s = *scale;
1313  *dst++ = v[idx & 3] * s;
1314  *dst++ = v[idx>>2 & 3] * s;
1315  *dst++ = v[idx>>4 & 3] * s;
1316  *dst++ = v[idx>>6 & 3] * s;
1317  return dst;
1318 }
1319 #endif
1320 
1321 #ifndef VMUL2S
1322 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1323  unsigned sign, const float *scale)
1324 {
1325  union av_intfloat32 s0, s1;
1326 
1327  s0.f = s1.f = *scale;
1328  s0.i ^= sign >> 1 << 31;
1329  s1.i ^= sign << 31;
1330 
1331  *dst++ = v[idx & 15] * s0.f;
1332  *dst++ = v[idx>>4 & 15] * s1.f;
1333 
1334  return dst;
1335 }
1336 #endif
1337 
1338 #ifndef VMUL4S
1339 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1340  unsigned sign, const float *scale)
1341 {
1342  unsigned nz = idx >> 12;
1343  union av_intfloat32 s = { .f = *scale };
1344  union av_intfloat32 t;
1345 
1346  t.i = s.i ^ (sign & 1U<<31);
1347  *dst++ = v[idx & 3] * t.f;
1348 
1349  sign <<= nz & 1; nz >>= 1;
1350  t.i = s.i ^ (sign & 1U<<31);
1351  *dst++ = v[idx>>2 & 3] * t.f;
1352 
1353  sign <<= nz & 1; nz >>= 1;
1354  t.i = s.i ^ (sign & 1U<<31);
1355  *dst++ = v[idx>>4 & 3] * t.f;
1356 
1357  sign <<= nz & 1;
1358  t.i = s.i ^ (sign & 1U<<31);
1359  *dst++ = v[idx>>6 & 3] * t.f;
1360 
1361  return dst;
1362 }
1363 #endif
1364 
1377 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1378  GetBitContext *gb, const float sf[120],
1379  int pulse_present, const Pulse *pulse,
1380  const IndividualChannelStream *ics,
1381  enum BandType band_type[120])
1382 {
1383  int i, k, g, idx = 0;
1384  const int c = 1024 / ics->num_windows;
1385  const uint16_t *offsets = ics->swb_offset;
1386  float *coef_base = coef;
1387 
1388  for (g = 0; g < ics->num_windows; g++)
1389  memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1390 
1391  for (g = 0; g < ics->num_window_groups; g++) {
1392  unsigned g_len = ics->group_len[g];
1393 
1394  for (i = 0; i < ics->max_sfb; i++, idx++) {
1395  const unsigned cbt_m1 = band_type[idx] - 1;
1396  float *cfo = coef + offsets[i];
1397  int off_len = offsets[i + 1] - offsets[i];
1398  int group;
1399 
1400  if (cbt_m1 >= INTENSITY_BT2 - 1) {
1401  for (group = 0; group < g_len; group++, cfo+=128) {
1402  memset(cfo, 0, off_len * sizeof(float));
1403  }
1404  } else if (cbt_m1 == NOISE_BT - 1) {
1405  for (group = 0; group < g_len; group++, cfo+=128) {
1406  float scale;
1407  float band_energy;
1408 
1409  for (k = 0; k < off_len; k++) {
1411  cfo[k] = ac->random_state;
1412  }
1413 
1414  band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len);
1415  scale = sf[idx] / sqrtf(band_energy);
1416  ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1417  }
1418  } else {
1419  const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1420  const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1421  VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1422  OPEN_READER(re, gb);
1423 
1424  switch (cbt_m1 >> 1) {
1425  case 0:
1426  for (group = 0; group < g_len; group++, cfo+=128) {
1427  float *cf = cfo;
1428  int len = off_len;
1429 
1430  do {
1431  int code;
1432  unsigned cb_idx;
1433 
1434  UPDATE_CACHE(re, gb);
1435  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1436  cb_idx = cb_vector_idx[code];
1437  cf = VMUL4(cf, vq, cb_idx, sf + idx);
1438  } while (len -= 4);
1439  }
1440  break;
1441 
1442  case 1:
1443  for (group = 0; group < g_len; group++, cfo+=128) {
1444  float *cf = cfo;
1445  int len = off_len;
1446 
1447  do {
1448  int code;
1449  unsigned nnz;
1450  unsigned cb_idx;
1451  uint32_t bits;
1452 
1453  UPDATE_CACHE(re, gb);
1454  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1455  cb_idx = cb_vector_idx[code];
1456  nnz = cb_idx >> 8 & 15;
1457  bits = nnz ? GET_CACHE(re, gb) : 0;
1458  LAST_SKIP_BITS(re, gb, nnz);
1459  cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1460  } while (len -= 4);
1461  }
1462  break;
1463 
1464  case 2:
1465  for (group = 0; group < g_len; group++, cfo+=128) {
1466  float *cf = cfo;
1467  int len = off_len;
1468 
1469  do {
1470  int code;
1471  unsigned cb_idx;
1472 
1473  UPDATE_CACHE(re, gb);
1474  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1475  cb_idx = cb_vector_idx[code];
1476  cf = VMUL2(cf, vq, cb_idx, sf + idx);
1477  } while (len -= 2);
1478  }
1479  break;
1480 
1481  case 3:
1482  case 4:
1483  for (group = 0; group < g_len; group++, cfo+=128) {
1484  float *cf = cfo;
1485  int len = off_len;
1486 
1487  do {
1488  int code;
1489  unsigned nnz;
1490  unsigned cb_idx;
1491  unsigned sign;
1492 
1493  UPDATE_CACHE(re, gb);
1494  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1495  cb_idx = cb_vector_idx[code];
1496  nnz = cb_idx >> 8 & 15;
1497  sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1498  LAST_SKIP_BITS(re, gb, nnz);
1499  cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1500  } while (len -= 2);
1501  }
1502  break;
1503 
1504  default:
1505  for (group = 0; group < g_len; group++, cfo+=128) {
1506  float *cf = cfo;
1507  uint32_t *icf = (uint32_t *) cf;
1508  int len = off_len;
1509 
1510  do {
1511  int code;
1512  unsigned nzt, nnz;
1513  unsigned cb_idx;
1514  uint32_t bits;
1515  int j;
1516 
1517  UPDATE_CACHE(re, gb);
1518  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1519 
1520  if (!code) {
1521  *icf++ = 0;
1522  *icf++ = 0;
1523  continue;
1524  }
1525 
1526  cb_idx = cb_vector_idx[code];
1527  nnz = cb_idx >> 12;
1528  nzt = cb_idx >> 8;
1529  bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1530  LAST_SKIP_BITS(re, gb, nnz);
1531 
1532  for (j = 0; j < 2; j++) {
1533  if (nzt & 1<<j) {
1534  uint32_t b;
1535  int n;
1536  /* The total length of escape_sequence must be < 22 bits according
1537  to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1538  UPDATE_CACHE(re, gb);
1539  b = GET_CACHE(re, gb);
1540  b = 31 - av_log2(~b);
1541 
1542  if (b > 8) {
1543  av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1544  return -1;
1545  }
1546 
1547  SKIP_BITS(re, gb, b + 1);
1548  b += 4;
1549  n = (1 << b) + SHOW_UBITS(re, gb, b);
1550  LAST_SKIP_BITS(re, gb, b);
1551  *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1552  bits <<= 1;
1553  } else {
1554  unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1555  *icf++ = (bits & 1U<<31) | v;
1556  bits <<= !!v;
1557  }
1558  cb_idx >>= 4;
1559  }
1560  } while (len -= 2);
1561 
1562  ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1563  }
1564  }
1565 
1566  CLOSE_READER(re, gb);
1567  }
1568  }
1569  coef += g_len << 7;
1570  }
1571 
1572  if (pulse_present) {
1573  idx = 0;
1574  for (i = 0; i < pulse->num_pulse; i++) {
1575  float co = coef_base[ pulse->pos[i] ];
1576  while (offsets[idx + 1] <= pulse->pos[i])
1577  idx++;
1578  if (band_type[idx] != NOISE_BT && sf[idx]) {
1579  float ico = -pulse->amp[i];
1580  if (co) {
1581  co /= sf[idx];
1582  ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1583  }
1584  coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1585  }
1586  }
1587  }
1588  return 0;
1589 }
1590 
1591 static av_always_inline float flt16_round(float pf)
1592 {
1593  union av_intfloat32 tmp;
1594  tmp.f = pf;
1595  tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1596  return tmp.f;
1597 }
1598 
1599 static av_always_inline float flt16_even(float pf)
1600 {
1601  union av_intfloat32 tmp;
1602  tmp.f = pf;
1603  tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1604  return tmp.f;
1605 }
1606 
1607 static av_always_inline float flt16_trunc(float pf)
1608 {
1609  union av_intfloat32 pun;
1610  pun.f = pf;
1611  pun.i &= 0xFFFF0000U;
1612  return pun.f;
1613 }
1614 
1615 static av_always_inline void predict(PredictorState *ps, float *coef,
1616  int output_enable)
1617 {
1618  const float a = 0.953125; // 61.0 / 64
1619  const float alpha = 0.90625; // 29.0 / 32
1620  float e0, e1;
1621  float pv;
1622  float k1, k2;
1623  float r0 = ps->r0, r1 = ps->r1;
1624  float cor0 = ps->cor0, cor1 = ps->cor1;
1625  float var0 = ps->var0, var1 = ps->var1;
1626 
1627  k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1628  k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1629 
1630  pv = flt16_round(k1 * r0 + k2 * r1);
1631  if (output_enable)
1632  *coef += pv;
1633 
1634  e0 = *coef;
1635  e1 = e0 - k1 * r0;
1636 
1637  ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1638  ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1639  ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1640  ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1641 
1642  ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1643  ps->r0 = flt16_trunc(a * e0);
1644 }
1645 
1650 {
1651  int sfb, k;
1652 
1653  if (!sce->ics.predictor_initialized) {
1655  sce->ics.predictor_initialized = 1;
1656  }
1657 
1658  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1659  for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]; sfb++) {
1660  for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1661  predict(&sce->predictor_state[k], &sce->coeffs[k],
1662  sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1663  }
1664  }
1665  if (sce->ics.predictor_reset_group)
1667  } else
1669 }
1670 
1680  GetBitContext *gb, int common_window, int scale_flag)
1681 {
1682  Pulse pulse;
1683  TemporalNoiseShaping *tns = &sce->tns;
1684  IndividualChannelStream *ics = &sce->ics;
1685  float *out = sce->coeffs;
1686  int global_gain, pulse_present = 0;
1687 
1688  /* This assignment is to silence a GCC warning about the variable being used
1689  * uninitialized when in fact it always is.
1690  */
1691  pulse.num_pulse = 0;
1692 
1693  global_gain = get_bits(gb, 8);
1694 
1695  if (!common_window && !scale_flag) {
1696  if (decode_ics_info(ac, ics, gb) < 0)
1697  return AVERROR_INVALIDDATA;
1698  }
1699 
1700  if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1701  return -1;
1702  if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1703  return -1;
1704 
1705  pulse_present = 0;
1706  if (!scale_flag) {
1707  if ((pulse_present = get_bits1(gb))) {
1708  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1709  av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1710  return -1;
1711  }
1712  if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1713  av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1714  return -1;
1715  }
1716  }
1717  if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1718  return -1;
1719  if (get_bits1(gb)) {
1720  av_log_missing_feature(ac->avctx, "SSR", 1);
1721  return AVERROR_PATCHWELCOME;
1722  }
1723  }
1724 
1725  if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1726  return -1;
1727 
1728  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
1729  apply_prediction(ac, sce);
1730 
1731  return 0;
1732 }
1733 
1738 {
1739  const IndividualChannelStream *ics = &cpe->ch[0].ics;
1740  float *ch0 = cpe->ch[0].coeffs;
1741  float *ch1 = cpe->ch[1].coeffs;
1742  int g, i, group, idx = 0;
1743  const uint16_t *offsets = ics->swb_offset;
1744  for (g = 0; g < ics->num_window_groups; g++) {
1745  for (i = 0; i < ics->max_sfb; i++, idx++) {
1746  if (cpe->ms_mask[idx] &&
1747  cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1748  for (group = 0; group < ics->group_len[g]; group++) {
1749  ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i],
1750  ch1 + group * 128 + offsets[i],
1751  offsets[i+1] - offsets[i]);
1752  }
1753  }
1754  }
1755  ch0 += ics->group_len[g] * 128;
1756  ch1 += ics->group_len[g] * 128;
1757  }
1758 }
1759 
1767 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1768 {
1769  const IndividualChannelStream *ics = &cpe->ch[1].ics;
1770  SingleChannelElement *sce1 = &cpe->ch[1];
1771  float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1772  const uint16_t *offsets = ics->swb_offset;
1773  int g, group, i, idx = 0;
1774  int c;
1775  float scale;
1776  for (g = 0; g < ics->num_window_groups; g++) {
1777  for (i = 0; i < ics->max_sfb;) {
1778  if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1779  const int bt_run_end = sce1->band_type_run_end[idx];
1780  for (; i < bt_run_end; i++, idx++) {
1781  c = -1 + 2 * (sce1->band_type[idx] - 14);
1782  if (ms_present)
1783  c *= 1 - 2 * cpe->ms_mask[idx];
1784  scale = c * sce1->sf[idx];
1785  for (group = 0; group < ics->group_len[g]; group++)
1786  ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1787  coef0 + group * 128 + offsets[i],
1788  scale,
1789  offsets[i + 1] - offsets[i]);
1790  }
1791  } else {
1792  int bt_run_end = sce1->band_type_run_end[idx];
1793  idx += bt_run_end - i;
1794  i = bt_run_end;
1795  }
1796  }
1797  coef0 += ics->group_len[g] * 128;
1798  coef1 += ics->group_len[g] * 128;
1799  }
1800 }
1801 
1808 {
1809  int i, ret, common_window, ms_present = 0;
1810 
1811  common_window = get_bits1(gb);
1812  if (common_window) {
1813  if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
1814  return AVERROR_INVALIDDATA;
1815  i = cpe->ch[1].ics.use_kb_window[0];
1816  cpe->ch[1].ics = cpe->ch[0].ics;
1817  cpe->ch[1].ics.use_kb_window[1] = i;
1818  if (cpe->ch[1].ics.predictor_present && (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
1819  if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1820  decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1821  ms_present = get_bits(gb, 2);
1822  if (ms_present == 3) {
1823  av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1824  return -1;
1825  } else if (ms_present)
1826  decode_mid_side_stereo(cpe, gb, ms_present);
1827  }
1828  if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1829  return ret;
1830  if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1831  return ret;
1832 
1833  if (common_window) {
1834  if (ms_present)
1835  apply_mid_side_stereo(ac, cpe);
1836  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1837  apply_prediction(ac, &cpe->ch[0]);
1838  apply_prediction(ac, &cpe->ch[1]);
1839  }
1840  }
1841 
1842  apply_intensity_stereo(ac, cpe, ms_present);
1843  return 0;
1844 }
1845 
1846 static const float cce_scale[] = {
1847  1.09050773266525765921, //2^(1/8)
1848  1.18920711500272106672, //2^(1/4)
1849  M_SQRT2,
1850  2,
1851 };
1852 
1859 {
1860  int num_gain = 0;
1861  int c, g, sfb, ret;
1862  int sign;
1863  float scale;
1864  SingleChannelElement *sce = &che->ch[0];
1865  ChannelCoupling *coup = &che->coup;
1866 
1867  coup->coupling_point = 2 * get_bits1(gb);
1868  coup->num_coupled = get_bits(gb, 3);
1869  for (c = 0; c <= coup->num_coupled; c++) {
1870  num_gain++;
1871  coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1872  coup->id_select[c] = get_bits(gb, 4);
1873  if (coup->type[c] == TYPE_CPE) {
1874  coup->ch_select[c] = get_bits(gb, 2);
1875  if (coup->ch_select[c] == 3)
1876  num_gain++;
1877  } else
1878  coup->ch_select[c] = 2;
1879  }
1880  coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1881 
1882  sign = get_bits(gb, 1);
1883  scale = cce_scale[get_bits(gb, 2)];
1884 
1885  if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1886  return ret;
1887 
1888  for (c = 0; c < num_gain; c++) {
1889  int idx = 0;
1890  int cge = 1;
1891  int gain = 0;
1892  float gain_cache = 1.;
1893  if (c) {
1894  cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1895  gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1896  gain_cache = powf(scale, -gain);
1897  }
1898  if (coup->coupling_point == AFTER_IMDCT) {
1899  coup->gain[c][0] = gain_cache;
1900  } else {
1901  for (g = 0; g < sce->ics.num_window_groups; g++) {
1902  for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1903  if (sce->band_type[idx] != ZERO_BT) {
1904  if (!cge) {
1905  int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1906  if (t) {
1907  int s = 1;
1908  t = gain += t;
1909  if (sign) {
1910  s -= 2 * (t & 0x1);
1911  t >>= 1;
1912  }
1913  gain_cache = powf(scale, -t) * s;
1914  }
1915  }
1916  coup->gain[c][idx] = gain_cache;
1917  }
1918  }
1919  }
1920  }
1921  }
1922  return 0;
1923 }
1924 
1931  GetBitContext *gb)
1932 {
1933  int i;
1934  int num_excl_chan = 0;
1935 
1936  do {
1937  for (i = 0; i < 7; i++)
1938  che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1939  } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1940 
1941  return num_excl_chan / 7;
1942 }
1943 
1950  GetBitContext *gb)
1951 {
1952  int n = 1;
1953  int drc_num_bands = 1;
1954  int i;
1955 
1956  /* pce_tag_present? */
1957  if (get_bits1(gb)) {
1958  che_drc->pce_instance_tag = get_bits(gb, 4);
1959  skip_bits(gb, 4); // tag_reserved_bits
1960  n++;
1961  }
1962 
1963  /* excluded_chns_present? */
1964  if (get_bits1(gb)) {
1965  n += decode_drc_channel_exclusions(che_drc, gb);
1966  }
1967 
1968  /* drc_bands_present? */
1969  if (get_bits1(gb)) {
1970  che_drc->band_incr = get_bits(gb, 4);
1971  che_drc->interpolation_scheme = get_bits(gb, 4);
1972  n++;
1973  drc_num_bands += che_drc->band_incr;
1974  for (i = 0; i < drc_num_bands; i++) {
1975  che_drc->band_top[i] = get_bits(gb, 8);
1976  n++;
1977  }
1978  }
1979 
1980  /* prog_ref_level_present? */
1981  if (get_bits1(gb)) {
1982  che_drc->prog_ref_level = get_bits(gb, 7);
1983  skip_bits1(gb); // prog_ref_level_reserved_bits
1984  n++;
1985  }
1986 
1987  for (i = 0; i < drc_num_bands; i++) {
1988  che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1989  che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1990  n++;
1991  }
1992 
1993  return n;
1994 }
1995 
1996 static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
1997  uint8_t buf[256];
1998  int i, major, minor;
1999 
2000  if (len < 13+7*8)
2001  goto unknown;
2002 
2003  get_bits(gb, 13); len -= 13;
2004 
2005  for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
2006  buf[i] = get_bits(gb, 8);
2007 
2008  buf[i] = 0;
2009  if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
2010  av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
2011 
2012  if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
2013  ac->avctx->internal->skip_samples = 1024;
2014  }
2015 
2016 unknown:
2017  skip_bits_long(gb, len);
2018 
2019  return 0;
2020 }
2021 
2030  ChannelElement *che, enum RawDataBlockType elem_type)
2031 {
2032  int crc_flag = 0;
2033  int res = cnt;
2034  switch (get_bits(gb, 4)) { // extension type
2035  case EXT_SBR_DATA_CRC:
2036  crc_flag++;
2037  case EXT_SBR_DATA:
2038  if (!che) {
2039  av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
2040  return res;
2041  } else if (!ac->oc[1].m4ac.sbr) {
2042  av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
2043  skip_bits_long(gb, 8 * cnt - 4);
2044  return res;
2045  } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
2046  av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
2047  skip_bits_long(gb, 8 * cnt - 4);
2048  return res;
2049  } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
2050  ac->oc[1].m4ac.sbr = 1;
2051  ac->oc[1].m4ac.ps = 1;
2052  output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
2053  ac->oc[1].status, 1);
2054  } else {
2055  ac->oc[1].m4ac.sbr = 1;
2056  }
2057  res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
2058  break;
2059  case EXT_DYNAMIC_RANGE:
2060  res = decode_dynamic_range(&ac->che_drc, gb);
2061  break;
2062  case EXT_FILL:
2063  decode_fill(ac, gb, 8 * cnt - 4);
2064  break;
2065  case EXT_FILL_DATA:
2066  case EXT_DATA_ELEMENT:
2067  default:
2068  skip_bits_long(gb, 8 * cnt - 4);
2069  break;
2070  };
2071  return res;
2072 }
2073 
2080 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
2081  IndividualChannelStream *ics, int decode)
2082 {
2083  const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
2084  int w, filt, m, i;
2085  int bottom, top, order, start, end, size, inc;
2086  float lpc[TNS_MAX_ORDER];
2087  float tmp[TNS_MAX_ORDER+1];
2088 
2089  for (w = 0; w < ics->num_windows; w++) {
2090  bottom = ics->num_swb;
2091  for (filt = 0; filt < tns->n_filt[w]; filt++) {
2092  top = bottom;
2093  bottom = FFMAX(0, top - tns->length[w][filt]);
2094  order = tns->order[w][filt];
2095  if (order == 0)
2096  continue;
2097 
2098  // tns_decode_coef
2099  compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
2100 
2101  start = ics->swb_offset[FFMIN(bottom, mmm)];
2102  end = ics->swb_offset[FFMIN( top, mmm)];
2103  if ((size = end - start) <= 0)
2104  continue;
2105  if (tns->direction[w][filt]) {
2106  inc = -1;
2107  start = end - 1;
2108  } else {
2109  inc = 1;
2110  }
2111  start += w * 128;
2112 
2113  if (decode) {
2114  // ar filter
2115  for (m = 0; m < size; m++, start += inc)
2116  for (i = 1; i <= FFMIN(m, order); i++)
2117  coef[start] -= coef[start - i * inc] * lpc[i - 1];
2118  } else {
2119  // ma filter
2120  for (m = 0; m < size; m++, start += inc) {
2121  tmp[0] = coef[start];
2122  for (i = 1; i <= FFMIN(m, order); i++)
2123  coef[start] += tmp[i] * lpc[i - 1];
2124  for (i = order; i > 0; i--)
2125  tmp[i] = tmp[i - 1];
2126  }
2127  }
2128  }
2129  }
2130 }
2131 
2136 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2137  float *in, IndividualChannelStream *ics)
2138 {
2139  const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2140  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2141  const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2142  const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2143 
2144  if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2145  ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024);
2146  } else {
2147  memset(in, 0, 448 * sizeof(float));
2148  ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2149  }
2150  if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2151  ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2152  } else {
2153  ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2154  memset(in + 1024 + 576, 0, 448 * sizeof(float));
2155  }
2156  ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2157 }
2158 
2163 {
2164  const LongTermPrediction *ltp = &sce->ics.ltp;
2165  const uint16_t *offsets = sce->ics.swb_offset;
2166  int i, sfb;
2167 
2168  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2169  float *predTime = sce->ret;
2170  float *predFreq = ac->buf_mdct;
2171  int16_t num_samples = 2048;
2172 
2173  if (ltp->lag < 1024)
2174  num_samples = ltp->lag + 1024;
2175  for (i = 0; i < num_samples; i++)
2176  predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2177  memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2178 
2179  ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2180 
2181  if (sce->tns.present)
2182  ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2183 
2184  for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2185  if (ltp->used[sfb])
2186  for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2187  sce->coeffs[i] += predFreq[i];
2188  }
2189 }
2190 
2195 {
2196  IndividualChannelStream *ics = &sce->ics;
2197  float *saved = sce->saved;
2198  float *saved_ltp = sce->coeffs;
2199  const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2200  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2201  int i;
2202 
2203  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2204  memcpy(saved_ltp, saved, 512 * sizeof(float));
2205  memset(saved_ltp + 576, 0, 448 * sizeof(float));
2206  ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2207  for (i = 0; i < 64; i++)
2208  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2209  } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2210  memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float));
2211  memset(saved_ltp + 576, 0, 448 * sizeof(float));
2212  ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2213  for (i = 0; i < 64; i++)
2214  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2215  } else { // LONG_STOP or ONLY_LONG
2216  ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
2217  for (i = 0; i < 512; i++)
2218  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2219  }
2220 
2221  memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2222  memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state));
2223  memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state));
2224 }
2225 
2230 {
2231  IndividualChannelStream *ics = &sce->ics;
2232  float *in = sce->coeffs;
2233  float *out = sce->ret;
2234  float *saved = sce->saved;
2235  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2236  const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2237  const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2238  float *buf = ac->buf_mdct;
2239  float *temp = ac->temp;
2240  int i;
2241 
2242  // imdct
2243  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2244  for (i = 0; i < 1024; i += 128)
2245  ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2246  } else
2247  ac->mdct.imdct_half(&ac->mdct, buf, in);
2248 
2249  /* window overlapping
2250  * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2251  * and long to short transitions are considered to be short to short
2252  * transitions. This leaves just two cases (long to long and short to short)
2253  * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2254  */
2255  if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2257  ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
2258  } else {
2259  memcpy( out, saved, 448 * sizeof(float));
2260 
2261  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2262  ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
2263  ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
2264  ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
2265  ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
2266  ac->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
2267  memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
2268  } else {
2269  ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
2270  memcpy( out + 576, buf + 64, 448 * sizeof(float));
2271  }
2272  }
2273 
2274  // buffer update
2275  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2276  memcpy( saved, temp + 64, 64 * sizeof(float));
2277  ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
2278  ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2279  ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2280  memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2281  } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2282  memcpy( saved, buf + 512, 448 * sizeof(float));
2283  memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2284  } else { // LONG_STOP or ONLY_LONG
2285  memcpy( saved, buf + 512, 512 * sizeof(float));
2286  }
2287 }
2288 
2295  SingleChannelElement *target,
2296  ChannelElement *cce, int index)
2297 {
2298  IndividualChannelStream *ics = &cce->ch[0].ics;
2299  const uint16_t *offsets = ics->swb_offset;
2300  float *dest = target->coeffs;
2301  const float *src = cce->ch[0].coeffs;
2302  int g, i, group, k, idx = 0;
2303  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2304  av_log(ac->avctx, AV_LOG_ERROR,
2305  "Dependent coupling is not supported together with LTP\n");
2306  return;
2307  }
2308  for (g = 0; g < ics->num_window_groups; g++) {
2309  for (i = 0; i < ics->max_sfb; i++, idx++) {
2310  if (cce->ch[0].band_type[idx] != ZERO_BT) {
2311  const float gain = cce->coup.gain[index][idx];
2312  for (group = 0; group < ics->group_len[g]; group++) {
2313  for (k = offsets[i]; k < offsets[i + 1]; k++) {
2314  // XXX dsputil-ize
2315  dest[group * 128 + k] += gain * src[group * 128 + k];
2316  }
2317  }
2318  }
2319  }
2320  dest += ics->group_len[g] * 128;
2321  src += ics->group_len[g] * 128;
2322  }
2323 }
2324 
2331  SingleChannelElement *target,
2332  ChannelElement *cce, int index)
2333 {
2334  int i;
2335  const float gain = cce->coup.gain[index][0];
2336  const float *src = cce->ch[0].ret;
2337  float *dest = target->ret;
2338  const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
2339 
2340  for (i = 0; i < len; i++)
2341  dest[i] += gain * src[i];
2342 }
2343 
2350  enum RawDataBlockType type, int elem_id,
2351  enum CouplingPoint coupling_point,
2352  void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2353 {
2354  int i, c;
2355 
2356  for (i = 0; i < MAX_ELEM_ID; i++) {
2357  ChannelElement *cce = ac->che[TYPE_CCE][i];
2358  int index = 0;
2359 
2360  if (cce && cce->coup.coupling_point == coupling_point) {
2361  ChannelCoupling *coup = &cce->coup;
2362 
2363  for (c = 0; c <= coup->num_coupled; c++) {
2364  if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2365  if (coup->ch_select[c] != 1) {
2366  apply_coupling_method(ac, &cc->ch[0], cce, index);
2367  if (coup->ch_select[c] != 0)
2368  index++;
2369  }
2370  if (coup->ch_select[c] != 2)
2371  apply_coupling_method(ac, &cc->ch[1], cce, index++);
2372  } else
2373  index += 1 + (coup->ch_select[c] == 3);
2374  }
2375  }
2376  }
2377 }
2378 
2383 {
2384  int i, type;
2385  for (type = 3; type >= 0; type--) {
2386  for (i = 0; i < MAX_ELEM_ID; i++) {
2387  ChannelElement *che = ac->che[type][i];
2388  if (che) {
2389  if (type <= TYPE_CPE)
2391  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2392  if (che->ch[0].ics.predictor_present) {
2393  if (che->ch[0].ics.ltp.present)
2394  ac->apply_ltp(ac, &che->ch[0]);
2395  if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2396  ac->apply_ltp(ac, &che->ch[1]);
2397  }
2398  }
2399  if (che->ch[0].tns.present)
2400  ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2401  if (che->ch[1].tns.present)
2402  ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2403  if (type <= TYPE_CPE)
2405  if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2406  ac->imdct_and_windowing(ac, &che->ch[0]);
2407  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2408  ac->update_ltp(ac, &che->ch[0]);
2409  if (type == TYPE_CPE) {
2410  ac->imdct_and_windowing(ac, &che->ch[1]);
2411  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2412  ac->update_ltp(ac, &che->ch[1]);
2413  }
2414  if (ac->oc[1].m4ac.sbr > 0) {
2415  ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2416  }
2417  }
2418  if (type <= TYPE_CCE)
2420  }
2421  }
2422  }
2423 }
2424 
2426 {
2427  int size;
2428  AACADTSHeaderInfo hdr_info;
2429  uint8_t layout_map[MAX_ELEM_ID*4][3];
2430  int layout_map_tags;
2431 
2432  size = avpriv_aac_parse_header(gb, &hdr_info);
2433  if (size > 0) {
2434  if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
2435  // This is 2 for "VLB " audio in NSV files.
2436  // See samples/nsv/vlb_audio.
2437  av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame", 0);
2438  ac->warned_num_aac_frames = 1;
2439  }
2441  if (hdr_info.chan_config) {
2442  ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
2443  if (set_default_channel_config(ac->avctx, layout_map,
2444  &layout_map_tags, hdr_info.chan_config))
2445  return -7;
2446  if (output_configure(ac, layout_map, layout_map_tags,
2447  FFMAX(ac->oc[1].status, OC_TRIAL_FRAME), 0))
2448  return -7;
2449  } else {
2450  ac->oc[1].m4ac.chan_config = 0;
2456  if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
2457  layout_map_tags = 2;
2458  layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
2459  layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
2460  layout_map[0][1] = 0;
2461  layout_map[1][1] = 1;
2462  if (output_configure(ac, layout_map, layout_map_tags,
2463  OC_TRIAL_FRAME, 0))
2464  return -7;
2465  }
2466  }
2467  ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate;
2468  ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index;
2469  ac->oc[1].m4ac.object_type = hdr_info.object_type;
2470  if (ac->oc[0].status != OC_LOCKED ||
2471  ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
2472  ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
2473  ac->oc[1].m4ac.sbr = -1;
2474  ac->oc[1].m4ac.ps = -1;
2475  }
2476  if (!hdr_info.crc_absent)
2477  skip_bits(gb, 16);
2478  }
2479  return size;
2480 }
2481 
2482 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2483  int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
2484 {
2485  AACContext *ac = avctx->priv_data;
2486  ChannelElement *che = NULL, *che_prev = NULL;
2487  enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2488  int err, elem_id;
2489  int samples = 0, multiplier, audio_found = 0, pce_found = 0;
2490  int is_dmono, sce_count = 0;
2491 
2492  ac->frame = data;
2493 
2494  if (show_bits(gb, 12) == 0xfff) {
2495  if (parse_adts_frame_header(ac, gb) < 0) {
2496  av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2497  err = -1;
2498  goto fail;
2499  }
2500  if (ac->oc[1].m4ac.sampling_index > 12) {
2501  av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
2502  err = -1;
2503  goto fail;
2504  }
2505  }
2506 
2507  if (frame_configure_elements(avctx) < 0) {
2508  err = -1;
2509  goto fail;
2510  }
2511 
2512  ac->tags_mapped = 0;
2513  // parse
2514  while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2515  elem_id = get_bits(gb, 4);
2516 
2517  if (elem_type < TYPE_DSE) {
2518  if (!(che=get_che(ac, elem_type, elem_id))) {
2519  av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2520  elem_type, elem_id);
2521  err = -1;
2522  goto fail;
2523  }
2524  samples = 1024;
2525  }
2526 
2527  switch (elem_type) {
2528 
2529  case TYPE_SCE:
2530  err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2531  audio_found = 1;
2532  sce_count++;
2533  break;
2534 
2535  case TYPE_CPE:
2536  err = decode_cpe(ac, gb, che);
2537  audio_found = 1;
2538  break;
2539 
2540  case TYPE_CCE:
2541  err = decode_cce(ac, gb, che);
2542  break;
2543 
2544  case TYPE_LFE:
2545  err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2546  audio_found = 1;
2547  break;
2548 
2549  case TYPE_DSE:
2550  err = skip_data_stream_element(ac, gb);
2551  break;
2552 
2553  case TYPE_PCE: {
2554  uint8_t layout_map[MAX_ELEM_ID*4][3];
2555  int tags;
2557  tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
2558  if (tags < 0) {
2559  err = tags;
2560  break;
2561  }
2562  if (pce_found) {
2563  av_log(avctx, AV_LOG_ERROR,
2564  "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2565  } else {
2566  err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
2567  if (!err)
2568  ac->oc[1].m4ac.chan_config = 0;
2569  pce_found = 1;
2570  }
2571  break;
2572  }
2573 
2574  case TYPE_FIL:
2575  if (elem_id == 15)
2576  elem_id += get_bits(gb, 8) - 1;
2577  if (get_bits_left(gb) < 8 * elem_id) {
2578  av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
2579  err = -1;
2580  goto fail;
2581  }
2582  while (elem_id > 0)
2583  elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2584  err = 0; /* FIXME */
2585  break;
2586 
2587  default:
2588  err = -1; /* should not happen, but keeps compiler happy */
2589  break;
2590  }
2591 
2592  che_prev = che;
2593  elem_type_prev = elem_type;
2594 
2595  if (err)
2596  goto fail;
2597 
2598  if (get_bits_left(gb) < 3) {
2599  av_log(avctx, AV_LOG_ERROR, overread_err);
2600  err = -1;
2601  goto fail;
2602  }
2603  }
2604 
2605  spectral_to_sample(ac);
2606 
2607  multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
2608  samples <<= multiplier;
2609  /* for dual-mono audio (SCE + SCE) */
2610  is_dmono = ac->dmono_mode && sce_count == 2 &&
2612 
2613  if (samples)
2614  ac->frame->nb_samples = samples;
2615  *got_frame_ptr = !!samples;
2616 
2617  if (is_dmono) {
2618  if (ac->dmono_mode == 1)
2619  ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
2620  else if (ac->dmono_mode == 2)
2621  ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
2622  }
2623 
2624  if (ac->oc[1].status && audio_found) {
2625  avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
2626  avctx->frame_size = samples;
2627  ac->oc[1].status = OC_LOCKED;
2628  }
2629 
2630  if (multiplier) {
2631  int side_size;
2632  const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
2633  if (side && side_size>=4)
2634  AV_WL32(side, 2*AV_RL32(side));
2635  }
2636  return 0;
2637 fail:
2639  return err;
2640 }
2641 
2642 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2643  int *got_frame_ptr, AVPacket *avpkt)
2644 {
2645  AACContext *ac = avctx->priv_data;
2646  const uint8_t *buf = avpkt->data;
2647  int buf_size = avpkt->size;
2648  GetBitContext gb;
2649  int buf_consumed;
2650  int buf_offset;
2651  int err;
2652  int new_extradata_size;
2653  const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
2655  &new_extradata_size);
2656  int jp_dualmono_size;
2657  const uint8_t *jp_dualmono = av_packet_get_side_data(avpkt,
2659  &jp_dualmono_size);
2660 
2661  if (new_extradata && 0) {
2662  av_free(avctx->extradata);
2663  avctx->extradata = av_mallocz(new_extradata_size +
2665  if (!avctx->extradata)
2666  return AVERROR(ENOMEM);
2667  avctx->extradata_size = new_extradata_size;
2668  memcpy(avctx->extradata, new_extradata, new_extradata_size);
2670  if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
2671  avctx->extradata,
2672  avctx->extradata_size*8, 1) < 0) {
2674  return AVERROR_INVALIDDATA;
2675  }
2676  }
2677 
2678  ac->dmono_mode = 0;
2679  if (jp_dualmono && jp_dualmono_size > 0)
2680  ac->dmono_mode = 1 + *jp_dualmono;
2681  if (ac->force_dmono_mode >= 0)
2682  ac->dmono_mode = ac->force_dmono_mode;
2683 
2684  if (INT_MAX / 8 <= buf_size)
2685  return AVERROR_INVALIDDATA;
2686 
2687  init_get_bits(&gb, buf, buf_size * 8);
2688 
2689  if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt)) < 0)
2690  return err;
2691 
2692  buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2693  for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2694  if (buf[buf_offset])
2695  break;
2696 
2697  return buf_size > buf_offset ? buf_consumed : buf_size;
2698 }
2699 
2701 {
2702  AACContext *ac = avctx->priv_data;
2703  int i, type;
2704 
2705  for (i = 0; i < MAX_ELEM_ID; i++) {
2706  for (type = 0; type < 4; type++) {
2707  if (ac->che[type][i])
2708  ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2709  av_freep(&ac->che[type][i]);
2710  }
2711  }
2712 
2713  ff_mdct_end(&ac->mdct);
2714  ff_mdct_end(&ac->mdct_small);
2715  ff_mdct_end(&ac->mdct_ltp);
2716  return 0;
2717 }
2718 
2719 
2720 #define LOAS_SYNC_WORD 0x2b7
2721 
2722 struct LATMContext {
2725 
2726  // parser data
2730 };
2731 
2732 static inline uint32_t latm_get_value(GetBitContext *b)
2733 {
2734  int length = get_bits(b, 2);
2735 
2736  return get_bits_long(b, (length+1)*8);
2737 }
2738 
2740  GetBitContext *gb, int asclen)
2741 {
2742  AACContext *ac = &latmctx->aac_ctx;
2743  AVCodecContext *avctx = ac->avctx;
2744  MPEG4AudioConfig m4ac = { 0 };
2745  int config_start_bit = get_bits_count(gb);
2746  int sync_extension = 0;
2747  int bits_consumed, esize;
2748 
2749  if (asclen) {
2750  sync_extension = 1;
2751  asclen = FFMIN(asclen, get_bits_left(gb));
2752  } else
2753  asclen = get_bits_left(gb);
2754 
2755  if (config_start_bit % 8) {
2757  "Non-byte-aligned audio-specific config", 1);
2758  return AVERROR_PATCHWELCOME;
2759  }
2760  if (asclen <= 0)
2761  return AVERROR_INVALIDDATA;
2762  bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
2763  gb->buffer + (config_start_bit / 8),
2764  asclen, sync_extension);
2765 
2766  if (bits_consumed < 0)
2767  return AVERROR_INVALIDDATA;
2768 
2769  if (!latmctx->initialized ||
2770  ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
2771  ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
2772 
2773  if(latmctx->initialized) {
2774  av_log(avctx, AV_LOG_INFO, "audio config changed\n");
2775  } else {
2776  av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
2777  }
2778  latmctx->initialized = 0;
2779 
2780  esize = (bits_consumed+7) / 8;
2781 
2782  if (avctx->extradata_size < esize) {
2783  av_free(avctx->extradata);
2785  if (!avctx->extradata)
2786  return AVERROR(ENOMEM);
2787  }
2788 
2789  avctx->extradata_size = esize;
2790  memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2791  memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2792  }
2793  skip_bits_long(gb, bits_consumed);
2794 
2795  return bits_consumed;
2796 }
2797 
2798 static int read_stream_mux_config(struct LATMContext *latmctx,
2799  GetBitContext *gb)
2800 {
2801  int ret, audio_mux_version = get_bits(gb, 1);
2802 
2803  latmctx->audio_mux_version_A = 0;
2804  if (audio_mux_version)
2805  latmctx->audio_mux_version_A = get_bits(gb, 1);
2806 
2807  if (!latmctx->audio_mux_version_A) {
2808 
2809  if (audio_mux_version)
2810  latm_get_value(gb); // taraFullness
2811 
2812  skip_bits(gb, 1); // allStreamSameTimeFraming
2813  skip_bits(gb, 6); // numSubFrames
2814  // numPrograms
2815  if (get_bits(gb, 4)) { // numPrograms
2817  "Multiple programs", 1);
2818  return AVERROR_PATCHWELCOME;
2819  }
2820 
2821  // for each program (which there is only one in DVB)
2822 
2823  // for each layer (which there is only one in DVB)
2824  if (get_bits(gb, 3)) { // numLayer
2826  "Multiple layers", 1);
2827  return AVERROR_PATCHWELCOME;
2828  }
2829 
2830  // for all but first stream: use_same_config = get_bits(gb, 1);
2831  if (!audio_mux_version) {
2832  if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2833  return ret;
2834  } else {
2835  int ascLen = latm_get_value(gb);
2836  if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2837  return ret;
2838  ascLen -= ret;
2839  skip_bits_long(gb, ascLen);
2840  }
2841 
2842  latmctx->frame_length_type = get_bits(gb, 3);
2843  switch (latmctx->frame_length_type) {
2844  case 0:
2845  skip_bits(gb, 8); // latmBufferFullness
2846  break;
2847  case 1:
2848  latmctx->frame_length = get_bits(gb, 9);
2849  break;
2850  case 3:
2851  case 4:
2852  case 5:
2853  skip_bits(gb, 6); // CELP frame length table index
2854  break;
2855  case 6:
2856  case 7:
2857  skip_bits(gb, 1); // HVXC frame length table index
2858  break;
2859  }
2860 
2861  if (get_bits(gb, 1)) { // other data
2862  if (audio_mux_version) {
2863  latm_get_value(gb); // other_data_bits
2864  } else {
2865  int esc;
2866  do {
2867  esc = get_bits(gb, 1);
2868  skip_bits(gb, 8);
2869  } while (esc);
2870  }
2871  }
2872 
2873  if (get_bits(gb, 1)) // crc present
2874  skip_bits(gb, 8); // config_crc
2875  }
2876 
2877  return 0;
2878 }
2879 
2881 {
2882  uint8_t tmp;
2883 
2884  if (ctx->frame_length_type == 0) {
2885  int mux_slot_length = 0;
2886  do {
2887  tmp = get_bits(gb, 8);
2888  mux_slot_length += tmp;
2889  } while (tmp == 255);
2890  return mux_slot_length;
2891  } else if (ctx->frame_length_type == 1) {
2892  return ctx->frame_length;
2893  } else if (ctx->frame_length_type == 3 ||
2894  ctx->frame_length_type == 5 ||
2895  ctx->frame_length_type == 7) {
2896  skip_bits(gb, 2); // mux_slot_length_coded
2897  }
2898  return 0;
2899 }
2900 
2901 static int read_audio_mux_element(struct LATMContext *latmctx,
2902  GetBitContext *gb)
2903 {
2904  int err;
2905  uint8_t use_same_mux = get_bits(gb, 1);
2906  if (!use_same_mux) {
2907  if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2908  return err;
2909  } else if (!latmctx->aac_ctx.avctx->extradata) {
2910  av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2911  "no decoder config found\n");
2912  return AVERROR(EAGAIN);
2913  }
2914  if (latmctx->audio_mux_version_A == 0) {
2915  int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2916  if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2917  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2918  return AVERROR_INVALIDDATA;
2919  } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2920  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2921  "frame length mismatch %d << %d\n",
2922  mux_slot_length_bytes * 8, get_bits_left(gb));
2923  return AVERROR_INVALIDDATA;
2924  }
2925  }
2926  return 0;
2927 }
2928 
2929 
2930 static int latm_decode_frame(AVCodecContext *avctx, void *out,
2931  int *got_frame_ptr, AVPacket *avpkt)
2932 {
2933  struct LATMContext *latmctx = avctx->priv_data;
2934  int muxlength, err;
2935  GetBitContext gb;
2936 
2937  if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
2938  return err;
2939 
2940  // check for LOAS sync word
2941  if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2942  return AVERROR_INVALIDDATA;
2943 
2944  muxlength = get_bits(&gb, 13) + 3;
2945  // not enough data, the parser should have sorted this out
2946  if (muxlength > avpkt->size)
2947  return AVERROR_INVALIDDATA;
2948 
2949  if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2950  return err;
2951 
2952  if (!latmctx->initialized) {
2953  if (!avctx->extradata) {
2954  *got_frame_ptr = 0;
2955  return avpkt->size;
2956  } else {
2958  if ((err = decode_audio_specific_config(
2959  &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
2960  avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
2961  pop_output_configuration(&latmctx->aac_ctx);
2962  return err;
2963  }
2964  latmctx->initialized = 1;
2965  }
2966  }
2967 
2968  if (show_bits(&gb, 12) == 0xfff) {
2969  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2970  "ADTS header detected, probably as result of configuration "
2971  "misparsing\n");
2972  return AVERROR_INVALIDDATA;
2973  }
2974 
2975  if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0)
2976  return err;
2977 
2978  return muxlength;
2979 }
2980 
2982 {
2983  struct LATMContext *latmctx = avctx->priv_data;
2984  int ret = aac_decode_init(avctx);
2985 
2986  if (avctx->extradata_size > 0)
2987  latmctx->initialized = !ret;
2988 
2989  return ret;
2990 }
2991 
2992 static void aacdec_init(AACContext *c)
2993 {
2995  c->apply_ltp = apply_ltp;
2996  c->apply_tns = apply_tns;
2998  c->update_ltp = update_ltp;
2999 
3000  if(ARCH_MIPS)
3002 }
3006 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
3007 static const AVOption options[] = {
3008  {"dual_mono_mode", "Select the channel to decode for dual mono",
3009  offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
3010  AACDEC_FLAGS, "dual_mono_mode"},
3011 
3012  {"auto", "autoselection", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3013  {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3014  {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3015  {"both", "Select both channels", 0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3016 
3017  {NULL},
3018 };
3019 
3020 static const AVClass aac_decoder_class = {
3021  .class_name = "AAC decoder",
3022  .item_name = av_default_item_name,
3023  .option = options,
3024  .version = LIBAVUTIL_VERSION_INT,
3025 };
3026 
3028  .name = "aac",
3029  .type = AVMEDIA_TYPE_AUDIO,
3030  .id = AV_CODEC_ID_AAC,
3031  .priv_data_size = sizeof(AACContext),
3032  .init = aac_decode_init,
3035  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3036  .sample_fmts = (const enum AVSampleFormat[]) {
3038  },
3039  .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3040  .channel_layouts = aac_channel_layout,
3041  .flush = flush,
3042  .priv_class = &aac_decoder_class,
3043 };
3044 
3045 /*
3046  Note: This decoder filter is intended to decode LATM streams transferred
3047  in MPEG transport streams which only contain one program.
3048  To do a more complex LATM demuxing a separate LATM demuxer should be used.
3049 */
3051  .name = "aac_latm",
3052  .type = AVMEDIA_TYPE_AUDIO,
3053  .id = AV_CODEC_ID_AAC_LATM,
3054  .priv_data_size = sizeof(struct LATMContext),
3055  .init = latm_decode_init,
3056  .close = aac_decode_close,
3057  .decode = latm_decode_frame,
3058  .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3059  .sample_fmts = (const enum AVSampleFormat[]) {
3061  },
3062  .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3063  .channel_layouts = aac_channel_layout,
3064  .flush = flush,
3065 };