Go to the documentation of this file.
44 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
45 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
47 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
49 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
51 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
53 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
55 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
57 #define PSY_3GPP_RPEMIN 0.01f
58 #define PSY_3GPP_RPELEV 2.0f
60 #define PSY_3GPP_C1 3.0f
61 #define PSY_3GPP_C2 1.3219281f
62 #define PSY_3GPP_C3 0.55935729f
64 #define PSY_SNR_1DB 7.9432821e-1f
65 #define PSY_SNR_25DB 3.1622776e-3f
67 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
68 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
69 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
70 #define PSY_3GPP_SAVE_ADD_S -0.75f
71 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
72 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
73 #define PSY_3GPP_SPEND_ADD_L -0.35f
74 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
75 #define PSY_3GPP_CLIP_LO_L 0.2f
76 #define PSY_3GPP_CLIP_LO_S 0.2f
77 #define PSY_3GPP_CLIP_HI_L 0.95f
78 #define PSY_3GPP_CLIP_HI_S 0.75f
80 #define PSY_3GPP_AH_THR_LONG 0.5f
81 #define PSY_3GPP_AH_THR_SHORT 0.63f
83 #define PSY_PE_FORGET_SLOPE 511
91 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
92 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
95 #define PSY_LAME_FIR_LEN 21
96 #define AAC_BLOCK_SIZE_LONG 1024
97 #define AAC_BLOCK_SIZE_SHORT 128
98 #define AAC_NUM_BLOCKS_SHORT 8
99 #define PSY_LAME_NUM_SUBBLOCKS 3
220 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
221 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
222 -5.52212e-17 * 2, -0.313819 * 2
235 int lower_range = 12, upper_range = 12;
243 for (
i = 1;
i < 13;
i++) {
284 return 13.3f *
atanf(0.00076
f *
f) + 3.5f *
atanf((
f / 7500.0
f) * (
f / 7500.0
f));
295 return 3.64 * pow(
f, -0.8)
296 - 6.8 *
exp(-0.6 * (
f - 3.4) * (
f - 3.4))
297 + 6.0 *
exp(-0.15 * (
f - 8.7) * (
f - 8.7))
298 + (0.6 + 0.04 * add) * 0.001 *
f *
f *
f *
f;
305 float prev, minscale, minath, minsnr, pe_min;
309 const float num_bark =
calc_bark((
float)bandwidth);
312 if (!
ctx->model_priv_data)
314 pctx =
ctx->model_priv_data;
319 chan_bitrate = (
int)(chan_bitrate / 120.0 * (
ctx->avctx->global_quality ?
ctx->avctx->global_quality : 120));
327 ctx->bitres.size -=
ctx->bitres.size % 8;
330 for (j = 0; j < 2; j++) {
333 float line_to_frequency =
ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
334 float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) /
ctx->avctx->sample_rate;
343 for (
g = 0;
g <
ctx->num_bands[j];
g++) {
346 coeffs[
g].
barks = (bark + prev) / 2.0;
349 for (
g = 0;
g <
ctx->num_bands[j] - 1;
g++) {
351 float bark_width = coeffs[
g+1].
barks - coeffs->
barks;
354 coeff->spread_low[1] =
ff_exp10(-bark_width * en_spread_low);
356 pe_min = bark_pe * bark_width;
357 minsnr =
exp2(pe_min / band_sizes[
g]) - 1.5f;
361 for (
g = 0;
g <
ctx->num_bands[j];
g++) {
362 minscale =
ath(start * line_to_frequency,
ATH_ADD);
363 for (
i = 1;
i < band_sizes[
g];
i++)
365 coeffs[
g].
ath = minscale - minath;
366 start += band_sizes[
g];
398 0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
406 const int16_t *audio,
412 int attack_ratio = br <= 16000 ? 18 : 10;
416 int next_type = pch->next_window_seq;
421 int switch_to_eight = 0;
422 float sum = 0.0, sum2 = 0.0;
425 for (
i = 0;
i < 8;
i++) {
426 for (j = 0; j < 128; j++) {
433 for (
i = 0;
i < 8;
i++) {
434 if (
s[
i] > pch->win_energy * attack_ratio) {
440 pch->win_energy = pch->win_energy*7/8 + sum2/64;
442 wi.window_type[1] = prev_type;
450 grouping = pch->next_grouping;
466 pch->next_window_seq = next_type;
468 for (
i = 0;
i < 3;
i++)
469 wi.window_type[
i] = prev_type;
480 for (
i = 0;
i < 8;
i++) {
481 if (!((grouping >>
i) & 1))
483 wi.grouping[lastgrp]++;
500 float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
503 ctx->fill_level = av_clip(
ctx->fill_level, 0,
size);
504 fill_level = av_clipf((
float)
ctx->fill_level /
size, clip_low, clip_high);
505 clipped_pe = av_clipf(pe,
ctx->pe.min,
ctx->pe.max);
506 bit_save = (fill_level + bitsave_add) * bitsave_slope;
507 assert(bit_save <= 0.3f && bit_save >= -0.05000001
f);
508 bit_spend = (fill_level + bitspend_add) * bitspend_slope;
509 assert(bit_spend <= 0.5f && bit_spend >= -0.1
f);
516 bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (
ctx->pe.max -
ctx->pe.min)) * (clipped_pe -
ctx->pe.min);
524 ctx->pe.min =
FFMIN(pe, forgetful_min_pe);
530 ctx->frame_bits * bit_factor,
560 float thr_avg, reduction;
562 if(active_lines == 0.0)
565 thr_avg =
exp2f((
a - pe) / (4.0
f * active_lines));
566 reduction =
exp2f((
a - desired_pe) / (4.0
f * active_lines)) - thr_avg;
568 return FFMAX(reduction, 0.0
f);
574 float thr = band->
thr;
578 thr = sqrtf(thr) + reduction;
596 #ifndef calc_thr_3gpp
598 const uint8_t *band_sizes,
const float *coefs,
const int cutoff)
601 int start = 0, wstart = 0;
604 for (
g = 0;
g < num_bands;
g++) {
607 float form_factor = 0.0f;
610 if (wstart < cutoff) {
611 for (
i = 0;
i < band_sizes[
g];
i++) {
612 band->
energy += coefs[start+
i] * coefs[start+
i];
613 form_factor += sqrtf(fabs(coefs[start+
i]));
616 Temp = band->
energy > 0 ? sqrtf((
float)band_sizes[
g] / band->
energy) : 0;
618 band->
nz_lines = form_factor * sqrtf(Temp);
620 start += band_sizes[
g];
621 wstart += band_sizes[
g];
627 #ifndef psy_hp_filter
641 hpfsmpl[
i] = (sum1 + sum2) * 32768.0
f;
655 float desired_bits, desired_pe, delta_pe, reduction=
NAN, spread_en[128] = {0};
656 float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
657 float pe = pctx->chan_bitrate > 32000 ? 0.0f :
FFMAX(50.0
f, 100.0
f - pctx->chan_bitrate * 100.0f / 32000.0f);
658 const int num_bands =
ctx->num_bands[wi->num_windows == 8];
659 const uint8_t *band_sizes =
ctx->bands[wi->num_windows == 8];
660 AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
663 const int cutoff = bandwidth * 2048 / wi->num_windows /
ctx->avctx->sample_rate;
666 calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
669 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
673 spread_en[0] =
bands[0].energy;
674 for (
g = 1;
g < num_bands;
g++) {
676 spread_en[
w+
g] =
FFMAX(
bands[
g].energy, spread_en[
w+
g-1] * coeffs[
g].spread_hi[1]);
678 for (
g = num_bands - 2;
g >= 0;
g--) {
680 spread_en[
w+
g] =
FFMAX(spread_en[
w+
g], spread_en[
w+
g+1] * coeffs[
g].spread_low[1]);
683 for (
g = 0;
g < num_bands;
g++) {
698 if (spread_en[
w+
g] * avoid_hole_thr > band->
energy || coeffs[
g].min_snr > 1.0f)
711 desired_pe = pe * (
ctx->avctx->global_quality ?
ctx->avctx->global_quality : 120) / (2 * 2.5
f * 120.0
f);
716 if (
ctx->bitres.bits > 0) {
721 pctx->pe.max =
FFMAX(pe, pctx->pe.max);
722 pctx->pe.min =
FFMIN(pe, pctx->pe.min);
731 if (
ctx->bitres.bits > 0)
736 ctx->bitres.alloc = desired_bits;
738 if (desired_pe < pe) {
740 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
745 for (
g = 0;
g < num_bands;
g++) {
757 for (
i = 0;
i < 2;
i++) {
758 float pe_no_ah = 0.0f, desired_pe_no_ah;
759 active_lines =
a = 0.0f;
760 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
761 for (
g = 0;
g < num_bands;
g++) {
765 pe_no_ah += band->
pe;
771 desired_pe_no_ah =
FFMAX(desired_pe - (pe - pe_no_ah), 0.0
f);
772 if (active_lines > 0.0
f)
776 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
777 for (
g = 0;
g < num_bands;
g++) {
780 if (active_lines > 0.0
f)
783 if (band->
thr > 0.0f)
790 delta_pe = desired_pe - pe;
791 if (fabs(delta_pe) > 0.05
f * desired_pe)
795 if (pe < 1.15
f * desired_pe) {
797 norm_fac = 1.0f / norm_fac;
798 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
799 for (
g = 0;
g < num_bands;
g++) {
803 float delta_sfb_pe = band->
norm_fac * norm_fac * delta_pe;
804 float thr = band->
thr;
816 while (pe > desired_pe &&
g--) {
817 for (
w = 0;
w < wi->num_windows*16;
w+= 16) {
830 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
831 for (
g = 0;
g < num_bands;
g++) {
842 memcpy(pch->prev_band, pch->band,
sizeof(pch->band));
877 ctx->next_window_seq = blocktype;
881 const float *la,
int channel,
int prev_type)
886 int uselongblock = 1;
893 const float *pf = hpfsmpl;
908 energy_short[0] += energy_subshort[
i];
914 for (; pf < pfe; pf++)
915 p =
FFMAX(p, fabsf(*pf));
925 if (p > energy_subshort[
i + 1])
926 p = p / energy_subshort[
i + 1];
927 else if (energy_subshort[
i + 1] > p * 10.0
f)
928 p = energy_subshort[
i + 1] / (p * 10.0f);
937 if (attack_intensity[
i] > pch->attack_threshold)
945 const float u = energy_short[
i - 1];
946 const float v = energy_short[
i];
947 const float m =
FFMAX(
u, v);
949 if (
u < 1.7
f * v && v < 1.7
f *
u) {
950 if (
i == 1 && attacks[0] < attacks[
i])
955 att_sum += attacks[
i];
958 if (attacks[0] <= pch->prev_attack)
961 att_sum += attacks[0];
963 if (pch->prev_attack == 3 || att_sum) {
967 if (attacks[
i] && attacks[
i-1])
992 for (
i = 0;
i < 8;
i++) {
993 if (!((pch->next_grouping >>
i) & 1))
1005 for (
i = 0;
i < 9;
i++) {
1013 pch->prev_attack = attacks[8];
1020 .
name =
"3GPP TS 26.403-inspired model",
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
#define u(width, name, range_min, range_max)
#define PSY_PE_FORGET_SLOPE
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
float thr
energy threshold
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
#define PSY_3GPP_PE_TO_BITS(bits)
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
float nz_lines
number of non-zero spectral lines
#define PSY_3GPP_CLIP_LO_S
#define PSY_3GPP_AH_THR_LONG
void * av_mallocz_array(size_t nmemb, size_t size)
Allocate a memory block for an array with av_mallocz().
int window_shape
window shape (sine/KBD/whatever)
static float calc_pe_3gpp(AacPsyBand *band)
float min
minimum allowed PE for bit factor calculation
#define PSY_3GPP_SPEND_SLOPE_L
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
int fill_level
bit reservoir fill level
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
psychoacoustic model frame type-dependent coefficients
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
float st_lrm
short threshold for L, R, and M channels
#define PSY_3GPP_EN_SPREAD_HI_S
#define PSY_3GPP_SPEND_ADD_L
int flags
AV_CODEC_FLAG_*.
float barks
Bark value for each spectral band in long frame.
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
windowing related information
int64_t bit_rate
Total stream bitrate in bit/s, 0 if not available.
float previous
allowed PE of the previous frame
const FFPsyModel ff_aac_psy_model
uint8_t num_ch
number of channels in this group
LAME psy model preset struct.
#define PSY_3GPP_CLIP_HI_S
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
int global_quality
Global quality for codecs which cannot change it per frame.
int flags
Flags modifying the (de)muxer behaviour.
int quality
Quality to map the rest of the vaules to.
float pe_const
constant part of the PE calculation
3GPP TS26.403-inspired psychoacoustic model specific data
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
#define AAC_BLOCK_SIZE_SHORT
short block size
static const float bands[]
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
#define PSY_3GPP_AH_THR_SHORT
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(const uint8_t *) pi - 0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(const int16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(const int16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(const int32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(const int32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(const int64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64, *(const int64_t *) pi *(1.0f/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64, *(const int64_t *) pi *(1.0/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float *) pi *(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double *) pi *(UINT64_C(1)<< 63))) #define FMT_PAIR_FUNC(out, in) static conv_func_type *const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={ FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64), };static void cpy1(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, len);} static void cpy2(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, 2 *len);} static void cpy4(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, 4 *len);} static void cpy8(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, 8 *len);} AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, const int *ch_map, int flags) { AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) return NULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) return NULL;if(channels==1){ in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);} ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map) { switch(av_get_bytes_per_sample(in_fmt)){ case 1:ctx->simd_f=cpy1;break;case 2:ctx->simd_f=cpy2;break;case 4:ctx->simd_f=cpy4;break;case 8:ctx->simd_f=cpy8;break;} } if(HAVE_X86ASM &&HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);return ctx;} void swri_audio_convert_free(AudioConvert **ctx) { av_freep(ctx);} int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len) { int ch;int off=0;const int os=(out->planar ? 1 :out->ch_count) *out->bps;unsigned misaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask) { int planes=in->planar ? in->ch_count :1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;} if(ctx->out_simd_align_mask) { int planes=out->planar ? out->ch_count :1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;} if(ctx->simd_f &&!ctx->ch_map &&!misaligned){ off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){ if(out->planar==in->planar){ int planes=out->planar ? out->ch_count :1;for(ch=0;ch< planes;ch++){ ctx->simd_f(out-> ch ch
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
int64_t bit_rate
the average bitrate
static av_cold void psy_3gpp_end(FFPsyContext *apc)
#define PSY_3GPP_BITS_TO_PE(bits)
single band psychoacoustic information
int grouping[8]
window grouping (for e.g. AAC)
float max
maximum allowed PE for bit factor calculation
float iir_state[2]
hi-pass IIR filter state
AacPsyCoeffs psy_coef[2][64]
float thr_quiet
threshold in quiet
#define AAC_BLOCK_SIZE_LONG
long block size
AacPsyBand band[128]
bands information
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
float ath
absolute threshold of hearing per bands
float active_lines
number of active spectral lines
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define PSY_3GPP_CLIP_LO_L
int avoid_holes
hole avoidance flag
#define PSY_3GPP_THR_SPREAD_LOW
#define PSY_3GPP_SAVE_ADD_S
#define PSY_3GPP_SPEND_ADD_S
int channels
number of audio channels
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
struct AacPsyContext::@9 pe
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
float attack_threshold
attack threshold for this channel
#define i(width, name, range_min, range_max)
float norm_fac
normalization factor for linearization
#define PSY_3GPP_CLIP_HI_L
float pe
perceptual entropy
psychoacoustic information for an arbitrary group of channels
enum WindowSequence next_window_seq
window sequence to be used in the next frame
float win_energy
sliding average of channel energy
single/pair channel context for psychoacoustic model
float correction
PE correction factor.
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
void * model_priv_data
psychoacoustic model implementation private data
#define PSY_3GPP_SAVE_SLOPE_S
#define PSY_3GPP_EN_SPREAD_HI_L1
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
main external API structure.
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
float global_quality
normalized global quality taken from avctx
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
codec-specific psychoacoustic model implementation
int frame_bits
average bits per frame
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
#define PSY_3GPP_EN_SPREAD_LOW_L
int chan_bitrate
bitrate per channel
#define PSY_3GPP_SAVE_SLOPE_L
static const double coeff[2][5]
#define PSY_3GPP_SPEND_SLOPE_S
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
#define PSY_3GPP_EN_SPREAD_LOW_S
int prev_attack
attack value for the last short block in the previous sequence
context used by psychoacoustic model
AacPsyBand prev_band[128]
bands information from the previous frame
int num_windows
number of windows in a frame
#define PSY_3GPP_SAVE_ADD_L