FFmpeg  4.2.3
af_rubberband.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <rubberband/rubberband-c.h>
20 
22 #include "libavutil/common.h"
23 #include "libavutil/opt.h"
24 
25 #include "audio.h"
26 #include "avfilter.h"
27 #include "filters.h"
28 #include "formats.h"
29 #include "internal.h"
30 
31 typedef struct RubberBandContext {
32  const AVClass *class;
33  RubberBandState rbs;
34 
35  double tempo, pitch;
38  int64_t nb_samples_out;
39  int64_t nb_samples_in;
40  int64_t first_pts;
43 
44 #define OFFSET(x) offsetof(RubberBandContext, x)
45 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
46 
47 static const AVOption rubberband_options[] = {
48  { "tempo", "set tempo scale factor", OFFSET(tempo), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A },
49  { "pitch", "set pitch scale factor", OFFSET(pitch), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A },
50  { "transients", "set transients", OFFSET(transients), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "transients" },
51  { "crisp", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsCrisp}, 0, 0, A, "transients" },
52  { "mixed", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsMixed}, 0, 0, A, "transients" },
53  { "smooth", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsSmooth}, 0, 0, A, "transients" },
54  { "detector", "set detector", OFFSET(detector), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "detector" },
55  { "compound", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorCompound}, 0, 0, A, "detector" },
56  { "percussive", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorPercussive}, 0, 0, A, "detector" },
57  { "soft", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorSoft}, 0, 0, A, "detector" },
58  { "phase", "set phase", OFFSET(phase), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "phase" },
59  { "laminar", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseLaminar}, 0, 0, A, "phase" },
60  { "independent", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseIndependent}, 0, 0, A, "phase" },
61  { "window", "set window", OFFSET(window), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "window" },
62  { "standard", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowStandard}, 0, 0, A, "window" },
63  { "short", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowShort}, 0, 0, A, "window" },
64  { "long", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowLong}, 0, 0, A, "window" },
65  { "smoothing", "set smoothing", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "smoothing" },
66  { "off", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOff}, 0, 0, A, "smoothing" },
67  { "on", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOn}, 0, 0, A, "smoothing" },
68  { "formant", "set formant", OFFSET(formant), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "formant" },
69  { "shifted", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantShifted}, 0, 0, A, "formant" },
70  { "preserved", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantPreserved}, 0, 0, A, "formant" },
71  { "pitchq", "set pitch quality", OFFSET(opitch), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "pitch" },
72  { "quality", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighQuality}, 0, 0, A, "pitch" },
73  { "speed", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighSpeed}, 0, 0, A, "pitch" },
74  { "consistency", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighConsistency}, 0, 0, A, "pitch" },
75  { "channels", "set channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "channels" },
76  { "apart", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsApart}, 0, 0, A, "channels" },
77  { "together", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsTogether}, 0, 0, A, "channels" },
78  { NULL },
79 };
80 
81 AVFILTER_DEFINE_CLASS(rubberband);
82 
84 {
85  RubberBandContext *s = ctx->priv;
86 
87  if (s->rbs)
88  rubberband_delete(s->rbs);
89 }
90 
92 {
95  static const enum AVSampleFormat sample_fmts[] = {
98  };
99  int ret;
100 
101  layouts = ff_all_channel_counts();
102  if (!layouts)
103  return AVERROR(ENOMEM);
104  ret = ff_set_common_channel_layouts(ctx, layouts);
105  if (ret < 0)
106  return ret;
107 
108  formats = ff_make_format_list(sample_fmts);
109  if (!formats)
110  return AVERROR(ENOMEM);
111  ret = ff_set_common_formats(ctx, formats);
112  if (ret < 0)
113  return ret;
114 
115  formats = ff_all_samplerates();
116  if (!formats)
117  return AVERROR(ENOMEM);
118  return ff_set_common_samplerates(ctx, formats);
119 }
120 
121 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
122 {
123  RubberBandContext *s = inlink->dst->priv;
124  AVFilterLink *outlink = inlink->dst->outputs[0];
125  AVFrame *out;
126  int ret = 0, nb_samples;
127 
128  if (s->first_pts == AV_NOPTS_VALUE)
129  s->first_pts = in->pts;
130 
131  rubberband_process(s->rbs, (const float *const *)in->data, in->nb_samples, ff_outlink_get_status(inlink));
132  s->nb_samples_in += in->nb_samples;
133 
134  nb_samples = rubberband_available(s->rbs);
135  if (nb_samples > 0) {
136  out = ff_get_audio_buffer(outlink, nb_samples);
137  if (!out) {
138  av_frame_free(&in);
139  return AVERROR(ENOMEM);
140  }
141  out->pts = s->first_pts + av_rescale_q(s->nb_samples_out,
142  (AVRational){ 1, outlink->sample_rate },
143  outlink->time_base);
144  nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples);
145  out->nb_samples = nb_samples;
146  ret = ff_filter_frame(outlink, out);
148  }
149 
150  av_frame_free(&in);
151  return ret < 0 ? ret : nb_samples;
152 }
153 
154 static int config_input(AVFilterLink *inlink)
155 {
156  AVFilterContext *ctx = inlink->dst;
157  RubberBandContext *s = ctx->priv;
158  int opts = s->transients|s->detector|s->phase|s->window|
159  s->smoothing|s->formant|s->opitch|s->channels|
160  RubberBandOptionProcessRealTime;
161 
162  if (s->rbs)
163  rubberband_delete(s->rbs);
164  s->rbs = rubberband_new(inlink->sample_rate, inlink->channels, opts, 1. / s->tempo, s->pitch);
165  if (!s->rbs)
166  return AVERROR(ENOMEM);
167 
168  s->nb_samples = rubberband_get_samples_required(s->rbs);
170 
171  return 0;
172 }
173 
175 {
176  AVFilterLink *inlink = ctx->inputs[0];
177  AVFilterLink *outlink = ctx->outputs[0];
178  RubberBandContext *s = ctx->priv;
179  AVFrame *in = NULL;
180  int ret;
181 
182  FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
183 
184  ret = ff_inlink_consume_samples(inlink, s->nb_samples, s->nb_samples, &in);
185  if (ret < 0)
186  return ret;
187  if (ret > 0) {
188  ret = filter_frame(inlink, in);
189  if (ret != 0)
190  return ret;
191  }
192 
193  FF_FILTER_FORWARD_STATUS(inlink, outlink);
194  FF_FILTER_FORWARD_WANTED(outlink, inlink);
195 
196  return FFERROR_NOT_READY;
197 }
198 
199 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
200  char *res, int res_len, int flags)
201 {
202  RubberBandContext *s = ctx->priv;
203 
204  if (!strcmp(cmd, "tempo")) {
205  double arg;
206 
207  sscanf(args, "%lf", &arg);
208  if (arg < 0.01 || arg > 100) {
209  av_log(ctx, AV_LOG_ERROR,
210  "Tempo scale factor '%f' out of range\n", arg);
211  return AVERROR(EINVAL);
212  }
213  rubberband_set_time_ratio(s->rbs, 1. / arg);
214  }
215 
216  if (!strcmp(cmd, "pitch")) {
217  double arg;
218 
219  sscanf(args, "%lf", &arg);
220  if (arg < 0.01 || arg > 100) {
221  av_log(ctx, AV_LOG_ERROR,
222  "Pitch scale factor '%f' out of range\n", arg);
223  return AVERROR(EINVAL);
224  }
225  rubberband_set_pitch_scale(s->rbs, arg);
226  }
227 
228  return 0;
229 }
230 
231 static const AVFilterPad rubberband_inputs[] = {
232  {
233  .name = "default",
234  .type = AVMEDIA_TYPE_AUDIO,
235  .config_props = config_input,
236  },
237  { NULL }
238 };
239 
240 static const AVFilterPad rubberband_outputs[] = {
241  {
242  .name = "default",
243  .type = AVMEDIA_TYPE_AUDIO,
244  },
245  { NULL }
246 };
247 
249  .name = "rubberband",
250  .description = NULL_IF_CONFIG_SMALL("Apply time-stretching and pitch-shifting."),
251  .query_formats = query_formats,
252  .priv_size = sizeof(RubberBandContext),
253  .priv_class = &rubberband_class,
254  .uninit = uninit,
255  .activate = activate,
256  .inputs = rubberband_inputs,
257  .outputs = rubberband_outputs,
259 };
static const AVFilterPad rubberband_outputs[]
float, planar
Definition: samplefmt.h:69
int64_t nb_samples_out
Definition: af_rubberband.c:38
#define NULL
Definition: coverity.c:32
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates...
Definition: formats.c:549
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AVOption.
Definition: opt.h:246
static int config_input(AVFilterLink *inlink)
#define A
Definition: af_rubberband.c:45
Main libavfilter public API header.
channels
Definition: aptx.c:30
#define FFERROR_NOT_READY
Filters implementation helper functions.
Definition: filters.h:34
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
#define av_cold
Definition: attributes.h:82
AVOptions.
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
#define OFFSET(x)
Definition: af_rubberband.c:44
#define av_log(a,...)
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
A filter pad used for either input or output.
Definition: internal.h:54
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
static int activate(AVFilterContext *ctx)
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:86
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
const char * arg
Definition: jacosubdec.c:66
static const AVOption rubberband_options[]
Definition: af_rubberband.c:47
static const AVFilterPad rubberband_inputs[]
AVDictionary * opts
Definition: movenc.c:50
#define FF_FILTER_FORWARD_WANTED(outlink, inlink)
Forward the frame_wanted_out flag from an output link to an input link.
Definition: filters.h:254
audio channel layout utility functions
const char AVS_Value args
Definition: avisynth_c.h:872
AVFormatContext * ctx
Definition: movenc.c:48
#define s(width, name)
Definition: cbs_vp9.c:257
static int query_formats(AVFilterContext *ctx)
Definition: af_rubberband.c:91
RubberBandState rbs
Definition: af_rubberband.c:33
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
A list of supported channel layouts.
Definition: formats.h:85
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
int ff_inlink_consume_samples(AVFilterLink *link, unsigned min, unsigned max, AVFrame **rframe)
Take samples from the link&#39;s FIFO and update the link&#39;s stats.
Definition: avfilter.c:1500
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
int ff_outlink_get_status(AVFilterLink *link)
Get the status on an output link.
Definition: avfilter.c:1630
Rational number (pair of numerator and denominator).
Definition: rational.h:58
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_rubberband.c:83
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
#define FF_FILTER_FORWARD_STATUS(inlink, outlink)
Acknowledge the status on an input link and forward it to an output link.
Definition: filters.h:226
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:395
#define flags(name, subs,...)
Definition: cbs_av1.c:564
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
AVFilter ff_af_rubberband
common internal and external API header
AVFILTER_DEFINE_CLASS(rubberband)
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:338
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
FILE * out
Definition: movenc.c:54
formats
Definition: signature.h:48
internal API functions
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition...
Definition: formats.c:410
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:361
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:556
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248