FFmpeg  4.2.2
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
24 #include "cuda_check.h"
25 #include "mem.h"
26 #include "pixdesc.h"
27 #include "pixfmt.h"
28 #include "imgutils.h"
29 
30 #define CUDA_FRAME_ALIGNMENT 256
31 
32 typedef struct CUDAFramesContext {
35 
36 static const enum AVPixelFormat supported_formats[] = {
45 };
46 
47 #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
48 
50  const void *hwconfig,
51  AVHWFramesConstraints *constraints)
52 {
53  int i;
54 
56  sizeof(*constraints->valid_sw_formats));
57  if (!constraints->valid_sw_formats)
58  return AVERROR(ENOMEM);
59 
60  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
61  constraints->valid_sw_formats[i] = supported_formats[i];
62  constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
63 
64  constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
65  if (!constraints->valid_hw_formats)
66  return AVERROR(ENOMEM);
67 
68  constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
69  constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
70 
71  return 0;
72 }
73 
74 static void cuda_buffer_free(void *opaque, uint8_t *data)
75 {
76  AVHWFramesContext *ctx = opaque;
77  AVHWDeviceContext *device_ctx = ctx->device_ctx;
78  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
79  CudaFunctions *cu = hwctx->internal->cuda_dl;
80 
81  CUcontext dummy;
82 
83  CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
84 
85  CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
86 
87  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
88 }
89 
90 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
91 {
92  AVHWFramesContext *ctx = opaque;
93  AVHWDeviceContext *device_ctx = ctx->device_ctx;
94  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
95  CudaFunctions *cu = hwctx->internal->cuda_dl;
96 
97  AVBufferRef *ret = NULL;
98  CUcontext dummy = NULL;
99  CUdeviceptr data;
100  int err;
101 
102  err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
103  if (err < 0)
104  return NULL;
105 
106  err = CHECK_CU(cu->cuMemAlloc(&data, size));
107  if (err < 0)
108  goto fail;
109 
110  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
111  if (!ret) {
112  CHECK_CU(cu->cuMemFree(data));
113  goto fail;
114  }
115 
116 fail:
117  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
118  return ret;
119 }
120 
122 {
123  CUDAFramesContext *priv = ctx->internal->priv;
124  int i;
125 
126  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
127  if (ctx->sw_format == supported_formats[i])
128  break;
129  }
130  if (i == FF_ARRAY_ELEMS(supported_formats)) {
131  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
133  return AVERROR(ENOSYS);
134  }
135 
137 
138  if (!ctx->pool) {
140  if (size < 0)
141  return size;
142 
144  if (!ctx->internal->pool_internal)
145  return AVERROR(ENOMEM);
146  }
147 
148  return 0;
149 }
150 
152 {
153  int res;
154 
155  frame->buf[0] = av_buffer_pool_get(ctx->pool);
156  if (!frame->buf[0])
157  return AVERROR(ENOMEM);
158 
159  res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data,
160  ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
161  if (res < 0)
162  return res;
163 
164  // YUV420P is a special case.
165  // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned
166  if (ctx->sw_format == AV_PIX_FMT_YUV420P) {
167  frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2;
168  frame->data[2] = frame->data[1];
169  frame->data[1] = frame->data[2] + frame->linesize[2] * ctx->height / 2;
170  }
171 
172  frame->format = AV_PIX_FMT_CUDA;
173  frame->width = ctx->width;
174  frame->height = ctx->height;
175 
176  return 0;
177 }
178 
181  enum AVPixelFormat **formats)
182 {
183  enum AVPixelFormat *fmts;
184 
185  fmts = av_malloc_array(2, sizeof(*fmts));
186  if (!fmts)
187  return AVERROR(ENOMEM);
188 
189  fmts[0] = ctx->sw_format;
190  fmts[1] = AV_PIX_FMT_NONE;
191 
192  *formats = fmts;
193 
194  return 0;
195 }
196 
198  const AVFrame *src)
199 {
200  CUDAFramesContext *priv = ctx->internal->priv;
201  AVHWDeviceContext *device_ctx = ctx->device_ctx;
202  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
203  CudaFunctions *cu = hwctx->internal->cuda_dl;
204 
205  CUcontext dummy;
206  int i, ret;
207 
208  ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
209  if (ret < 0)
210  return ret;
211 
212  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
213  CUDA_MEMCPY2D cpy = {
214  .srcMemoryType = CU_MEMORYTYPE_DEVICE,
215  .dstMemoryType = CU_MEMORYTYPE_HOST,
216  .srcDevice = (CUdeviceptr)src->data[i],
217  .dstHost = dst->data[i],
218  .srcPitch = src->linesize[i],
219  .dstPitch = dst->linesize[i],
220  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
221  .Height = src->height >> (i ? priv->shift_height : 0),
222  };
223 
224  ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
225  if (ret < 0)
226  goto exit;
227  }
228 
229  ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
230  if (ret < 0)
231  goto exit;
232 
233 exit:
234  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
235 
236  return 0;
237 }
238 
240  const AVFrame *src)
241 {
242  CUDAFramesContext *priv = ctx->internal->priv;
243  AVHWDeviceContext *device_ctx = ctx->device_ctx;
244  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
245  CudaFunctions *cu = hwctx->internal->cuda_dl;
246 
247  CUcontext dummy;
248  int i, ret;
249 
250  ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
251  if (ret < 0)
252  return ret;
253 
254  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
255  CUDA_MEMCPY2D cpy = {
256  .srcMemoryType = CU_MEMORYTYPE_HOST,
257  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
258  .srcHost = src->data[i],
259  .dstDevice = (CUdeviceptr)dst->data[i],
260  .srcPitch = src->linesize[i],
261  .dstPitch = dst->linesize[i],
262  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
263  .Height = src->height >> (i ? priv->shift_height : 0),
264  };
265 
266  ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
267  if (ret < 0)
268  goto exit;
269  }
270 
271 exit:
272  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
273 
274  return 0;
275 }
276 
277 static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
278 {
279  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
280 
281  if (hwctx->internal) {
282  CudaFunctions *cu = hwctx->internal->cuda_dl;
283  if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
284  CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
285  hwctx->cuda_ctx = NULL;
286  }
287  cuda_free_functions(&hwctx->internal->cuda_dl);
288  }
289 
290  av_freep(&hwctx->internal);
291 }
292 
294 {
295  AVCUDADeviceContext *hwctx = ctx->hwctx;
296  int ret;
297 
298  if (!hwctx->internal) {
299  hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
300  if (!hwctx->internal)
301  return AVERROR(ENOMEM);
302  }
303 
304  if (!hwctx->internal->cuda_dl) {
305  ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx);
306  if (ret < 0) {
307  av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
308  goto error;
309  }
310  }
311 
312  return 0;
313 
314 error:
315  cuda_device_uninit(ctx);
316  return ret;
317 }
318 
319 static int cuda_device_create(AVHWDeviceContext *device_ctx,
320  const char *device,
321  AVDictionary *opts, int flags)
322 {
323  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
324  CudaFunctions *cu;
325  CUdevice cu_device;
326  CUcontext dummy;
327  int ret, device_idx = 0;
328 
329  if (device)
330  device_idx = strtol(device, NULL, 0);
331 
332  if (cuda_device_init(device_ctx) < 0)
333  goto error;
334 
335  cu = hwctx->internal->cuda_dl;
336 
337  ret = CHECK_CU(cu->cuInit(0));
338  if (ret < 0)
339  goto error;
340 
341  ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
342  if (ret < 0)
343  goto error;
344 
345  ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
346  if (ret < 0)
347  goto error;
348 
349  // Setting stream to NULL will make functions automatically use the default CUstream
350  hwctx->stream = NULL;
351 
352  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
353 
354  hwctx->internal->is_allocated = 1;
355 
356  return 0;
357 
358 error:
359  cuda_device_uninit(device_ctx);
360  return AVERROR_UNKNOWN;
361 }
362 
365  .name = "CUDA",
366 
367  .device_hwctx_size = sizeof(AVCUDADeviceContext),
368  .frames_priv_size = sizeof(CUDAFramesContext),
369 
370  .device_create = cuda_device_create,
371  .device_init = cuda_device_init,
372  .device_uninit = cuda_device_uninit,
373  .frames_get_constraints = cuda_frames_get_constraints,
374  .frames_init = cuda_frames_init,
375  .frames_get_buffer = cuda_get_buffer,
376  .transfer_get_formats = cuda_transfer_get_formats,
377  .transfer_data_to = cuda_transfer_data_to,
378  .transfer_data_from = cuda_transfer_data_from,
379 
380  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
381 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:60
#define NULL
Definition: coverity.c:32
int size
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
misc image utilities
Memory handling functions.
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:486
AVCUDADeviceContextInternal * internal
int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align)
Setup the data pointers and linesizes based on the specified image parameters and the provided array...
Definition: imgutils.c:411
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:228
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)
static int cuda_frames_init(AVHWFramesContext *ctx)
#define src
Definition: vp8dsp.c:254
#define AV_PIX_FMT_P016
Definition: pixfmt.h:437
#define AV_PIX_FMT_P010
Definition: pixfmt.h:436
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
static AVFrame * frame
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:91
const char data[16]
Definition: mxf.c:91
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:400
#define CHECK_CU(x)
#define av_log(a,...)
static int cuda_device_create(AVHWDeviceContext *device_ctx, const char *device, AVDictionary *opts, int flags)
static void cuda_buffer_free(void *opaque, uint8_t *data)
int av_image_get_buffer_size(enum AVPixelFormat pix_fmt, int width, int height, int align)
Return the size in bytes of the amount of data required to store an image with the given parameters...
Definition: imgutils.c:431
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int width
Definition: frame.h:353
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define AVERROR(e)
Definition: error.h:43
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2550
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:365
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
static int cuda_device_init(AVHWDeviceContext *ctx)
#define fail()
Definition: checkasm.h:120
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:148
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
AVFormatContext * ctx
Definition: movenc.c:48
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
HW acceleration through CUDA.
Definition: pixfmt.h:235
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:218
static void error(const char *err)
#define FF_ARRAY_ELEMS(a)
#define CUDA_FRAME_ALIGNMENT
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:368
This struct describes the constraints on hardware frames attached to a given device with a hardware-s...
Definition: hwcontext.h:432
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:123
refcounted data buffer API
enum AVPixelFormat * valid_hw_formats
A list of possible values for format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:437
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:133
#define flags(name, subs,...)
Definition: cbs_av1.c:561
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
common internal and external API header
static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:394
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:189
enum AVPixelFormat * valid_sw_formats
A list of possible values for sw_format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:444
int height
Definition: frame.h:353
#define av_freep(p)
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:334
#define av_malloc_array(a, b)
formats
Definition: signature.h:48
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2438
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:221
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:364