FFmpeg  4.2.2
cuda_runtime.h
Go to the documentation of this file.
1 /*
2  * Minimum CUDA compatibility definitions header
3  *
4  * Copyright (c) 2019 Rodger Combs
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #ifndef COMPAT_CUDA_CUDA_RUNTIME_H
24 #define COMPAT_CUDA_CUDA_RUNTIME_H
25 
26 // Common macros
27 #define __global__ __attribute__((global))
28 #define __device__ __attribute__((device))
29 #define __device_builtin__ __attribute__((device_builtin))
30 #define __align__(N) __attribute__((aligned(N)))
31 #define __inline__ __inline__ __attribute__((always_inline))
32 
33 #define max(a, b) ((a) > (b) ? (a) : (b))
34 #define min(a, b) ((a) < (b) ? (a) : (b))
35 #define abs(x) ((x) < 0 ? -(x) : (x))
36 
37 #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST))
38 
39 // Basic typedefs
40 typedef __device_builtin__ unsigned long long cudaTextureObject_t;
41 
43 {
44  unsigned char x, y;
46 
48 {
49  unsigned short x, y;
51 
52 typedef struct __device_builtin__ uint3
53 {
54  unsigned int x, y, z;
55 } uint3;
56 
57 typedef struct uint3 dim3;
58 
60 {
61  int x, y;
62 } int2;
63 
64 typedef struct __device_builtin__ __align__(4) uchar4
65 {
66  unsigned char x, y, z, w;
68 
69 typedef struct __device_builtin__ __align__(8) ushort4
70 {
71  unsigned char x, y, z, w;
73 
74 typedef struct __device_builtin__ __align__(16) int4
75 {
76  int x, y, z, w;
77 } int4;
78 
79 // Accessors for special registers
80 #define GETCOMP(reg, comp) \
81  asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \
82  ret.comp = tmp;
83 
84 #define GET(name, reg) static inline __device__ uint3 name() {\
85  uint3 ret; \
86  unsigned tmp; \
87  GETCOMP(reg, x) \
88  GETCOMP(reg, y) \
89  GETCOMP(reg, z) \
90  return ret; \
91 }
92 
93 GET(getBlockIdx, ctaid)
94 GET(getBlockDim, ntid)
95 GET(getThreadIdx, tid)
96 
97 // Instead of externs for these registers, we turn access to them into calls into trivial ASM
98 #define blockIdx (getBlockIdx())
99 #define blockDim (getBlockDim())
100 #define threadIdx (getThreadIdx())
101 
102 // Basic initializers (simple macros rather than inline functions)
103 #define make_uchar2(a, b) ((uchar2){.x = a, .y = b})
104 #define make_ushort2(a, b) ((ushort2){.x = a, .y = b})
105 #define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d})
106 #define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d})
107 
108 // Conversions from the tex instruction's 4-register output to various types
109 #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
110 
111 TEX2D(unsigned char, a & 0xFF)
112 TEX2D(unsigned short, a & 0xFFFF)
113 TEX2D(uchar2, make_uchar2(a & 0xFF, b & 0xFF))
114 TEX2D(ushort2, make_ushort2(a & 0xFFFF, b & 0xFFFF))
115 TEX2D(uchar4, make_uchar4(a & 0xFF, b & 0xFF, c & 0xFF, d & 0xFF))
116 TEX2D(ushort4, make_ushort4(a & 0xFFFF, b & 0xFFFF, c & 0xFFFF, d & 0xFFFF))
117 
118 // Template calling tex instruction and converting the output to the selected type
119 template <class T>
120 static inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
121 {
122  T ret;
123  unsigned ret1, ret2, ret3, ret4;
124  asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
125  "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) :
126  "l"(texObject), "f"(x), "f"(y));
127  conv(&ret, ret1, ret2, ret3, ret4);
128  return ret;
129 }
130 
131 #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */
static int conv(int samples, float **pcm, char *buf, int channels)
Definition: libvorbisdec.c:131
ushort2
Definition: cuda_runtime.h:50
const char * b
Definition: vf_curves.c:116
uchar2
Definition: cuda_runtime.h:45
#define GET(name, reg)
Definition: cuda_runtime.h:84
#define __align__(N)
Definition: cuda_runtime.h:30
#define TEX2D(type, ret)
Definition: cuda_runtime.h:109
#define make_ushort2(a, b)
Definition: cuda_runtime.h:104
int2
Definition: cuda_runtime.h:62
unsigned int z
Definition: cuda_runtime.h:54
ushort4
Definition: cuda_runtime.h:72
__device_builtin__ unsigned long long cudaTextureObject_t
Definition: cuda_runtime.h:40
static __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
Definition: cuda_runtime.h:120
#define make_uchar4(a, b, c, d)
Definition: cuda_runtime.h:105
#define __device__
Definition: cuda_runtime.h:28
#define T(x)
Definition: vp56_arith.h:29
uint8_t w
Definition: llviddspenc.c:38
uchar4
Definition: cuda_runtime.h:67
#define __device_builtin__
Definition: cuda_runtime.h:29
#define make_uchar2(a, b)
Definition: cuda_runtime.h:103
static double c[64]
#define make_ushort4(a, b, c, d)
Definition: cuda_runtime.h:106
int4
Definition: cuda_runtime.h:77