FFmpeg  4.2.3
vf_vmafmotion.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate VMAF Motion score.
25  */
26 
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avfilter.h"
30 #include "formats.h"
31 #include "internal.h"
32 #include "vmaf_motion.h"
33 
34 #define BIT_SHIFT 15
35 
36 static const float FILTER_5[5] = {
37  0.054488685,
38  0.244201342,
39  0.402619947,
40  0.244201342,
41  0.054488685
42 };
43 
44 typedef struct VMAFMotionContext {
45  const AVClass *class;
47  FILE *stats_file;
50 
51 #define OFFSET(x) offsetof(VMAFMotionContext, x)
52 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
53 
54 static const AVOption vmafmotion_options[] = {
55  {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
56  { NULL }
57 };
58 
59 AVFILTER_DEFINE_CLASS(vmafmotion);
60 
61 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
62  int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
63 {
64  ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
65  ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
66  uint64_t sum = 0;
67  int i, j;
68 
69  for (i = 0; i < h; i++) {
70  for (j = 0; j < w; j++) {
71  sum += abs(img1[j] - img2[j]);
72  }
73  img1 += img1_stride;
74  img2 += img2_stride;
75  }
76 
77  return sum;
78 }
79 
80 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
81  uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
82  ptrdiff_t _dst_stride)
83 {
84  ptrdiff_t src_stride = _src_stride / sizeof(*src);
85  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
86  int radius = filt_w / 2;
87  int borders_left = radius;
88  int borders_right = w - (filt_w - radius);
89  int i, j, k;
90  int sum = 0;
91 
92  for (i = 0; i < h; i++) {
93  for (j = 0; j < borders_left; j++) {
94  sum = 0;
95  for (k = 0; k < filt_w; k++) {
96  int j_tap = FFABS(j - radius + k);
97  if (j_tap >= w) {
98  j_tap = w - (j_tap - w + 1);
99  }
100  sum += filter[k] * src[i * src_stride + j_tap];
101  }
102  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
103  }
104 
105  for (j = borders_left; j < borders_right; j++) {
106  int sum = 0;
107  for (k = 0; k < filt_w; k++) {
108  sum += filter[k] * src[i * src_stride + j - radius + k];
109  }
110  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
111  }
112 
113  for (j = borders_right; j < w; j++) {
114  sum = 0;
115  for (k = 0; k < filt_w; k++) {
116  int j_tap = FFABS(j - radius + k);
117  if (j_tap >= w) {
118  j_tap = w - (j_tap - w + 1);
119  }
120  sum += filter[k] * src[i * src_stride + j_tap];
121  }
122  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
123  }
124  }
125 }
126 
127 #define conv_y_fn(type, bits) \
128 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
129  const uint8_t *_src, uint16_t *dst, \
130  int w, int h, ptrdiff_t _src_stride, \
131  ptrdiff_t _dst_stride) \
132 { \
133  const type *src = (const type *) _src; \
134  ptrdiff_t src_stride = _src_stride / sizeof(*src); \
135  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
136  int radius = filt_w / 2; \
137  int borders_top = radius; \
138  int borders_bottom = h - (filt_w - radius); \
139  int i, j, k; \
140  int sum = 0; \
141  \
142  for (i = 0; i < borders_top; i++) { \
143  for (j = 0; j < w; j++) { \
144  sum = 0; \
145  for (k = 0; k < filt_w; k++) { \
146  int i_tap = FFABS(i - radius + k); \
147  if (i_tap >= h) { \
148  i_tap = h - (i_tap - h + 1); \
149  } \
150  sum += filter[k] * src[i_tap * src_stride + j]; \
151  } \
152  dst[i * dst_stride + j] = sum >> bits; \
153  } \
154  } \
155  for (i = borders_top; i < borders_bottom; i++) { \
156  for (j = 0; j < w; j++) { \
157  sum = 0; \
158  for (k = 0; k < filt_w; k++) { \
159  sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
160  } \
161  dst[i * dst_stride + j] = sum >> bits; \
162  } \
163  } \
164  for (i = borders_bottom; i < h; i++) { \
165  for (j = 0; j < w; j++) { \
166  sum = 0; \
167  for (k = 0; k < filt_w; k++) { \
168  int i_tap = FFABS(i - radius + k); \
169  if (i_tap >= h) { \
170  i_tap = h - (i_tap - h + 1); \
171  } \
172  sum += filter[k] * src[i_tap * src_stride + j]; \
173  } \
174  dst[i * dst_stride + j] = sum >> bits; \
175  } \
176  } \
177 }
178 
179 conv_y_fn(uint8_t, 8);
180 conv_y_fn(uint16_t, 10);
181 
182 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
184  dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
185  dsp->sad = image_sad;
186 }
187 
189 {
190  double score;
191 
192  s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
193  s->width, s->height, ref->linesize[0], s->stride);
194  s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
195  s->width, s->height, s->stride, s->stride);
196 
197  if (!s->nb_frames) {
198  score = 0.0;
199  } else {
200  uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
201  s->width, s->height, s->stride, s->stride);
202  // the output score is always normalized to 8 bits
203  score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
204  }
205 
206  FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
207  s->nb_frames++;
208  s->motion_sum += score;
209 
210  return score;
211 }
212 
213 static void set_meta(AVDictionary **metadata, const char *key, float d)
214 {
215  char value[128];
216  snprintf(value, sizeof(value), "%0.2f", d);
217  av_dict_set(metadata, key, value, 0);
218 }
219 
221 {
222  VMAFMotionContext *s = ctx->priv;
223  double score;
224 
225  score = ff_vmafmotion_process(&s->data, ref);
226  set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
227  if (s->stats_file) {
228  fprintf(s->stats_file,
229  "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
230  }
231 }
232 
233 
235  int w, int h, enum AVPixelFormat fmt)
236 {
237  size_t data_sz;
238  int i;
240 
241  s->width = w;
242  s->height = h;
243  s->stride = FFALIGN(w * sizeof(uint16_t), 32);
244 
245  data_sz = (size_t) s->stride * h;
246  if (!(s->blur_data[0] = av_malloc(data_sz)) ||
247  !(s->blur_data[1] = av_malloc(data_sz)) ||
248  !(s->temp_data = av_malloc(data_sz))) {
249  return AVERROR(ENOMEM);
250  }
251 
252  for (i = 0; i < 5; i++) {
253  s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
254  }
255 
256  vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
257 
258  return 0;
259 }
260 
262 {
263  AVFilterFormats *fmts_list = NULL;
264  int format, ret;
265 
266  for (format = 0; av_pix_fmt_desc_get(format); format++) {
269  (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
270  (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
271  (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
272  (ret = ff_add_format(&fmts_list, format)) < 0)
273  return ret;
274  }
275 
276  return ff_set_common_formats(ctx, fmts_list);
277 }
278 
279 static int config_input_ref(AVFilterLink *inlink)
280 {
281  AVFilterContext *ctx = inlink->dst;
282  VMAFMotionContext *s = ctx->priv;
283 
284  return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
285  ctx->inputs[0]->h, ctx->inputs[0]->format);
286 }
287 
289 {
290  av_free(s->blur_data[0]);
291  av_free(s->blur_data[1]);
292  av_free(s->temp_data);
293 
294  return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
295 }
296 
297 static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
298 {
299  AVFilterContext *ctx = inlink->dst;
300  do_vmafmotion(ctx, ref);
301  return ff_filter_frame(ctx->outputs[0], ref);
302 }
303 
305 {
306  VMAFMotionContext *s = ctx->priv;
307 
308  if (s->stats_file_str) {
309  if (!strcmp(s->stats_file_str, "-")) {
310  s->stats_file = stdout;
311  } else {
312  s->stats_file = fopen(s->stats_file_str, "w");
313  if (!s->stats_file) {
314  int err = AVERROR(errno);
315  char buf[128];
316  av_strerror(err, buf, sizeof(buf));
317  av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
318  s->stats_file_str, buf);
319  return err;
320  }
321  }
322  }
323 
324  return 0;
325 }
326 
328 {
329  VMAFMotionContext *s = ctx->priv;
330  double avg_motion = ff_vmafmotion_uninit(&s->data);
331 
332  if (s->data.nb_frames > 0) {
333  av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
334  }
335 
336  if (s->stats_file && s->stats_file != stdout)
337  fclose(s->stats_file);
338 }
339 
340 static const AVFilterPad vmafmotion_inputs[] = {
341  {
342  .name = "reference",
343  .type = AVMEDIA_TYPE_VIDEO,
344  .filter_frame = filter_frame,
345  .config_props = config_input_ref,
346  },
347  { NULL }
348 };
349 
350 static const AVFilterPad vmafmotion_outputs[] = {
351  {
352  .name = "default",
353  .type = AVMEDIA_TYPE_VIDEO,
354  },
355  { NULL }
356 };
357 
359  .name = "vmafmotion",
360  .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
361  .init = init,
362  .uninit = uninit,
363  .query_formats = query_formats,
364  .priv_size = sizeof(VMAFMotionContext),
365  .priv_class = &vmafmotion_class,
366  .inputs = vmafmotion_inputs,
367  .outputs = vmafmotion_outputs,
368 };
#define AV_PIX_FMT_FLAG_PAL
Pixel format has a palette in data[1], values are indexes in this palette.
Definition: pixdesc.h:132
#define NULL
Definition: coverity.c:32
static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
static const char * format[]
Definition: af_aiir.c:338
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AVOption.
Definition: opt.h:246
static const AVFilterPad vmafmotion_inputs[]
const char * fmt
Definition: avisynth_c.h:861
Main libavfilter public API header.
uint16_t * blur_data[2]
Definition: vmaf_motion.h:47
AVFILTER_DEFINE_CLASS(vmafmotion)
const char * desc
Definition: nvenc.c:68
uint64_t(* sad)(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t img1_stride, ptrdiff_t img2_stride)
Definition: vmaf_motion.h:30
static av_cold void uninit(AVFilterContext *ctx)
static uint8_t img2[WIDTH *HEIGHT]
Definition: motion.c:43
const char * key
static const AVFilterPad vmafmotion_outputs[]
#define src
Definition: vp8dsp.c:254
static uint8_t img1[WIDTH *HEIGHT]
Definition: motion.c:42
const char * name
Pad name.
Definition: internal.h:60
#define OFFSET(x)
Definition: vf_vmafmotion.c:51
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
static void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride, int16_t *high, ptrdiff_t high_stride, int len, int clip)
Definition: cfhd.c:153
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
AVOptions.
uint64_t nb_frames
Definition: vmaf_motion.h:50
VMAFMotionData data
Definition: vf_vmafmotion.c:46
AVDictionary * metadata
metadata.
Definition: frame.h:581
static int query_formats(AVFilterContext *ctx)
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
void(* convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t src_stride, ptrdiff_t dst_stride)
Definition: vmaf_motion.h:32
#define FLAGS
Definition: vf_vmafmotion.c:52
A filter pad used for either input or output.
Definition: internal.h:54
static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
#define AVERROR(e)
Definition: error.h:43
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
Definition: vf_vmafmotion.c:61
#define AV_PIX_FMT_FLAG_HWACCEL
Pixel format is an HW accelerated format.
Definition: pixdesc.h:140
int ff_add_format(AVFilterFormats **avff, int64_t fmt)
Add fmt to the list of media formats contained in *avff.
Definition: formats.c:337
static av_cold int init(AVFilterContext *ctx)
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
uint8_t w
Definition: llviddspenc.c:38
AVFormatContext * ctx
Definition: movenc.c:48
static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define s(width, name)
Definition: cbs_vp9.c:257
double ff_vmafmotion_uninit(VMAFMotionData *s)
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
if(ret< 0)
Definition: vf_mcdeint.c:279
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define BIT_SHIFT
Definition: vf_vmafmotion.c:34
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
#define abs(x)
Definition: cuda_runtime.h:35
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
void * buf
Definition: avisynth_c.h:766
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:70
static int config_input_ref(AVFilterLink *inlink)
double value
Definition: eval.c:98
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
ptrdiff_t stride
Definition: vmaf_motion.h:46
#define snprintf
Definition: snprintf.h:34
AVFilter ff_vf_vmafmotion
#define AV_PIX_FMT_FLAG_BITSTREAM
All values of a component are bit-wise packed end to end.
Definition: pixdesc.h:136
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
uint16_t filter[5]
Definition: vmaf_motion.h:43
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
int ff_vmafmotion_init(VMAFMotionData *s, int w, int h, enum AVPixelFormat fmt)
static void set_meta(AVDictionary **metadata, const char *key, float d)
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
Definition: error.c:105
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
static const AVOption vmafmotion_options[]
Definition: vf_vmafmotion.c:54
uint16_t * temp_data
Definition: vmaf_motion.h:48
#define AV_PIX_FMT_FLAG_BE
Pixel format is big-endian.
Definition: pixdesc.h:128
#define av_free(p)
void(* convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src, uint16_t *dst, int w, int h, ptrdiff_t src_stride, ptrdiff_t dst_stride)
Definition: vmaf_motion.h:35
static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t _src_stride, ptrdiff_t _dst_stride)
Definition: vf_vmafmotion.c:80
A list of supported formats for one end of a filter link.
Definition: formats.h:64
#define lrint
Definition: tablegen.h:53
An instance of a filter.
Definition: avfilter.h:338
static const float FILTER_5[5]
Definition: vf_vmafmotion.c:36
VMAFMotionDSPContext vmafdsp
Definition: vmaf_motion.h:51
#define FFSWAP(type, a, b)
Definition: common.h:99
#define HAVE_BIGENDIAN
Definition: config.h:199
internal API functions
int depth
Number of bits in the component.
Definition: pixdesc.h:58
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
#define AV_PIX_FMT_FLAG_PLANAR
At least one pixel component is not in the first data plane.
Definition: pixdesc.h:144
#define conv_y_fn(type, bits)
double motion_sum
Definition: vmaf_motion.h:49