FFmpeg  4.3
vf_overlay_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Yaroslav Pogrebnyak <yyyaroslav@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Overlay one video on top of another using cuda hardware acceleration
24  */
25 
26 #include "libavutil/log.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/pixdesc.h"
30 #include "libavutil/hwcontext.h"
32 #include "libavutil/cuda_check.h"
33 
34 #include "avfilter.h"
35 #include "framesync.h"
36 #include "internal.h"
37 
38 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x)
39 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
40 
41 #define BLOCK_X 32
42 #define BLOCK_Y 16
43 
44 static const enum AVPixelFormat supported_main_formats[] = {
48 };
49 
55 };
56 
57 /**
58  * OverlayCUDAContext
59  */
60 typedef struct OverlayCUDAContext {
61  const AVClass *class;
62 
65 
67 
68  CUcontext cu_ctx;
69  CUmodule cu_module;
70  CUfunction cu_func;
71  CUstream cu_stream;
72 
74 
77 
79 
80 /**
81  * Helper to find out if provided format is supported by filter
82  */
83 static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
84 {
85  for (int i = 0; formats[i] != AV_PIX_FMT_NONE; i++)
86  if (formats[i] == fmt)
87  return 1;
88  return 0;
89 }
90 
91 /**
92  * Helper checks if we can process main and overlay pixel formats
93  */
94 static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay) {
95  switch(format_main) {
96  case AV_PIX_FMT_NV12:
97  return format_overlay == AV_PIX_FMT_NV12;
98  case AV_PIX_FMT_YUV420P:
99  return format_overlay == AV_PIX_FMT_YUV420P ||
100  format_overlay == AV_PIX_FMT_YUVA420P;
101  default:
102  return 0;
103  }
104 }
105 
106 /**
107  * Call overlay kernell for a plane
108  */
111  int x_position, int y_position,
112  uint8_t* main_data, int main_linesize,
113  int main_width, int main_height,
114  uint8_t* overlay_data, int overlay_linesize,
115  int overlay_width, int overlay_height,
116  uint8_t* alpha_data, int alpha_linesize,
117  int alpha_adj_x, int alpha_adj_y) {
118 
119  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
120 
121  void* kernel_args[] = {
123  &main_data, &main_linesize,
124  &overlay_data, &overlay_linesize,
125  &overlay_width, &overlay_height,
126  &alpha_data, &alpha_linesize,
127  &alpha_adj_x, &alpha_adj_y,
128  };
129 
130  return CHECK_CU(cu->cuLaunchKernel(
131  ctx->cu_func,
132  DIV_UP(main_width, BLOCK_X), DIV_UP(main_height, BLOCK_Y), 1,
133  BLOCK_X, BLOCK_Y, 1,
134  0, ctx->cu_stream, kernel_args, NULL));
135 }
136 
137 /**
138  * Perform blend overlay picture over main picture
139  */
141 {
142  int ret;
143 
144  AVFilterContext *avctx = fs->parent;
145  OverlayCUDAContext *ctx = avctx->priv;
146  AVFilterLink *outlink = avctx->outputs[0];
147 
148  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
149  CUcontext dummy, cuda_ctx = ctx->hwctx->cuda_ctx;
150 
151  AVFrame *input_main, *input_overlay;
152 
153  ctx->cu_ctx = cuda_ctx;
154 
155  // read main and overlay frames from inputs
156  ret = ff_framesync_dualinput_get(fs, &input_main, &input_overlay);
157  if (ret < 0)
158  return ret;
159 
160  if (!input_main || !input_overlay)
161  return AVERROR_BUG;
162 
163  ret = av_frame_make_writable(input_main);
164  if (ret < 0) {
165  av_frame_free(&input_main);
166  return ret;
167  }
168 
169  // push cuda context
170 
171  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
172  if (ret < 0) {
173  av_frame_free(&input_main);
174  return ret;
175  }
176 
177  // overlay first plane
178 
180  ctx->x_position, ctx->y_position,
181  input_main->data[0], input_main->linesize[0],
182  input_main->width, input_main->height,
183  input_overlay->data[0], input_overlay->linesize[0],
184  input_overlay->width, input_overlay->height,
185  input_overlay->data[3], input_overlay->linesize[3], 1, 1);
186 
187  // overlay rest planes depending on pixel format
188 
189  switch(ctx->in_format_overlay) {
190  case AV_PIX_FMT_NV12:
192  ctx->x_position, ctx->y_position / 2,
193  input_main->data[1], input_main->linesize[1],
194  input_main->width, input_main->height / 2,
195  input_overlay->data[1], input_overlay->linesize[1],
196  input_overlay->width, input_overlay->height / 2,
197  0, 0, 0, 0);
198  break;
199  case AV_PIX_FMT_YUV420P:
200  case AV_PIX_FMT_YUVA420P:
202  ctx->x_position / 2 , ctx->y_position / 2,
203  input_main->data[1], input_main->linesize[1],
204  input_main->width / 2, input_main->height / 2,
205  input_overlay->data[1], input_overlay->linesize[1],
206  input_overlay->width / 2, input_overlay->height / 2,
207  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
208 
210  ctx->x_position / 2 , ctx->y_position / 2,
211  input_main->data[2], input_main->linesize[2],
212  input_main->width / 2, input_main->height / 2,
213  input_overlay->data[2], input_overlay->linesize[2],
214  input_overlay->width / 2, input_overlay->height / 2,
215  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
216  break;
217  default:
218  av_log(ctx, AV_LOG_ERROR, "Passed unsupported overlay pixel format\n");
219  av_frame_free(&input_main);
220  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
221  return AVERROR_BUG;
222  }
223 
224  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
225 
226  return ff_filter_frame(outlink, input_main);
227 }
228 
229 /**
230  * Initialize overlay_cuda
231  */
233 {
234  OverlayCUDAContext* ctx = avctx->priv;
236 
237  return 0;
238 }
239 
240 /**
241  * Uninitialize overlay_cuda
242  */
244 {
245  OverlayCUDAContext* ctx = avctx->priv;
246 
247  ff_framesync_uninit(&ctx->fs);
248 
249  if (ctx->hwctx && ctx->cu_module) {
250  CUcontext dummy;
251  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
252  CHECK_CU(cu->cuCtxPushCurrent(ctx->cu_ctx));
253  CHECK_CU(cu->cuModuleUnload(ctx->cu_module));
254  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
255  }
256 }
257 
258 /**
259  * Activate overlay_cuda
260  */
262 {
263  OverlayCUDAContext *ctx = avctx->priv;
264 
265  return ff_framesync_activate(&ctx->fs);
266 }
267 
268 /**
269  * Query formats
270  */
272 {
273  static const enum AVPixelFormat pixel_formats[] = {
275  };
276 
277  AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
278 
279  return ff_set_common_formats(avctx, pix_fmts);
280 }
281 
282 /**
283  * Configure output
284  */
286 {
287 
288  extern char vf_overlay_cuda_ptx[];
289 
290  int err;
291  AVFilterContext* avctx = outlink->src;
292  OverlayCUDAContext* ctx = avctx->priv;
293 
294  AVFilterLink *inlink = avctx->inputs[0];
295  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
296 
297  AVFilterLink *inlink_overlay = avctx->inputs[1];
298  AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
299 
300  CUcontext dummy, cuda_ctx;
301  CudaFunctions *cu;
302 
303  // check main input formats
304 
305  if (!frames_ctx) {
306  av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n");
307  return AVERROR(EINVAL);
308  }
309 
310  ctx->in_format_main = frames_ctx->sw_format;
312  av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n",
314  return AVERROR(ENOSYS);
315  }
316 
317  // check overlay input formats
318 
319  if (!frames_ctx_overlay) {
320  av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n");
321  return AVERROR(EINVAL);
322  }
323 
324  ctx->in_format_overlay = frames_ctx_overlay->sw_format;
326  av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n",
328  return AVERROR(ENOSYS);
329  }
330 
331  // check we can overlay pictures with those pixel formats
332 
334  av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n",
336  return AVERROR(EINVAL);
337  }
338 
339  // initialize
340 
341  ctx->hwctx = frames_ctx->device_ctx->hwctx;
342  cuda_ctx = ctx->hwctx->cuda_ctx;
343  ctx->fs.time_base = inlink->time_base;
344 
345  ctx->cu_stream = ctx->hwctx->stream;
346 
347  outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
348 
349  // load functions
350 
351  cu = ctx->hwctx->internal->cuda_dl;
352 
353  err = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
354  if (err < 0) {
355  return err;
356  }
357 
358  err = CHECK_CU(cu->cuModuleLoadData(&ctx->cu_module, vf_overlay_cuda_ptx));
359  if (err < 0) {
360  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
361  return err;
362  }
363 
364  err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func, ctx->cu_module, "Overlay_Cuda"));
365  if (err < 0) {
366  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
367  return err;
368  }
369 
370  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
371 
372  // init dual input
373 
374  err = ff_framesync_init_dualinput(&ctx->fs, avctx);
375  if (err < 0) {
376  return err;
377  }
378 
379  return ff_framesync_configure(&ctx->fs);
380 }
381 
382 
383 #define OFFSET(x) offsetof(OverlayCUDAContext, x)
384 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
385 
386 static const AVOption overlay_cuda_options[] = {
387  { "x", "Overlay x position",
388  OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
389  { "y", "Overlay y position",
390  OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
391  { "eof_action", "Action to take when encountering EOF from secondary input ",
393  EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
394  { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
395  { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
396  { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" },
397  { "shortest", "force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
398  { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
399  { NULL },
400 };
401 
403 
405  {
406  .name = "main",
407  .type = AVMEDIA_TYPE_VIDEO,
408  },
409  {
410  .name = "overlay",
411  .type = AVMEDIA_TYPE_VIDEO,
412  },
413  { NULL }
414 };
415 
417  {
418  .name = "default",
419  .type = AVMEDIA_TYPE_VIDEO,
420  .config_props = &overlay_cuda_config_output,
421  },
422  { NULL }
423 };
424 
426  .name = "overlay_cuda",
427  .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another using CUDA"),
428  .priv_size = sizeof(OverlayCUDAContext),
429  .priv_class = &overlay_cuda_class,
434  .inputs = overlay_cuda_inputs,
435  .outputs = overlay_cuda_outputs,
436  .preinit = overlay_cuda_framesync_preinit,
437  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
438 };
#define NULL
Definition: coverity.c:32
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:365
#define FLAGS
FRAMESYNC_DEFINE_CLASS(overlay_cuda, OverlayCUDAContext, fs)
This structure describes decoded (raw) audio or video data.
Definition: frame.h:300
AVOption.
Definition: opt.h:246
Main libavfilter public API header.
Memory handling functions.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
AVCUDADeviceContextInternal * internal
static enum AVPixelFormat supported_overlay_formats[]
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:117
#define BLOCK_X
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterContext * parent
Parent filter context.
Definition: framesync.h:152
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
static int overlay_cuda_call_kernel(OverlayCUDAContext *ctx, int x_position, int y_position, uint8_t *main_data, int main_linesize, int main_width, int main_height, uint8_t *overlay_data, int overlay_linesize, int overlay_width, int overlay_height, uint8_t *alpha_data, int alpha_linesize, int alpha_adj_x, int alpha_adj_y)
Call overlay kernell for a plane.
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
uint8_t
#define av_cold
Definition: attributes.h:88
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:351
#define BLOCK_Y
int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1)
Definition: framesync.c:369
static const AVFilterPad overlay_cuda_inputs[]
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:92
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
Helper to find out if provided format is supported by filter.
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
int width
Definition: frame.h:358
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:600
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:283
Frame sync structure.
Definition: framesync.h:146
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
int opt_shortest
Definition: framesync.h:206
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter&#39;s input and try to produce output.
Definition: framesync.c:334
int(* on_event)(struct FFFrameSync *fs)
Callback called when a frame event is ready.
Definition: framesync.h:172
int opt_repeatlast
Definition: framesync.h:205
OverlayCUDAContext.
static const AVOption overlay_cuda_options[]
enum AVPixelFormat in_format_main
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:149
AVFormatContext * ctx
Definition: movenc.c:48
AVRational time_base
Time base for the output events.
Definition: framesync.h:162
static int activate(AVFilterContext *ctx)
Definition: af_adeclick.c:622
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
HW acceleration through CUDA.
Definition: pixfmt.h:235
static av_cold int overlay_cuda_init(AVFilterContext *avctx)
Initialize overlay_cuda.
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static int overlay_cuda_activate(AVFilterContext *avctx)
Activate overlay_cuda.
enum AVPixelFormat in_format_overlay
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:331
uint8_t * data
The data buffer.
Definition: buffer.h:89
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
static av_cold void overlay_cuda_uninit(AVFilterContext *avctx)
Uninitialize overlay_cuda.
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define OFFSET(x)
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:612
static const AVFilterPad overlay_cuda_outputs[]
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:314
int opt_eof_action
Definition: framesync.h:207
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
#define DIV_UP(a, b)
static int overlay_cuda_blend(FFFrameSync *fs)
Perform blend overlay picture over main picture.
static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay)
Helper checks if we can process main and overlay pixel formats.
A list of supported formats for one end of a filter link.
Definition: formats.h:64
static int overlay_cuda_config_output(AVFilterLink *outlink)
Configure output.
An instance of a filter.
Definition: avfilter.h:338
AVCUDADeviceContext * hwctx
#define CHECK_CU(x)
int height
Definition: frame.h:358
formats
Definition: signature.h:48
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2465
internal API functions
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVFilter ff_vf_overlay_cuda
static int overlay_cuda_query_formats(AVFilterContext *avctx)
Query formats.
static enum AVPixelFormat supported_main_formats[]