Skip to content

Commit eef7904

Browse files
Cuda for descript (#5)
* checkpoint: Adding support for a cuda powered lut3d,colorspace_cuda,tonemap_cuda filters * Adding float3 definitions
1 parent c9f0693 commit eef7904

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

libavfilter/cuda/vector_helpers.cuh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#ifndef AVFILTER_CUDA_VECTORHELPERS_H
2424
#define AVFILTER_CUDA_VECTORHELPERS_H
2525

26+
#include <vector_types.h>
27+
2628
typedef unsigned char uchar;
2729
typedef unsigned short ushort;
2830

@@ -35,7 +37,9 @@ template<> struct vector_helper<ushort2> { typedef float2 ftype; typedef int2 it
3537
template<> struct vector_helper<ushort4> { typedef float4 ftype; typedef int4 itype; };
3638
template<> struct vector_helper<int> { typedef float ftype; typedef int itype; };
3739
template<> struct vector_helper<int2> { typedef float2 ftype; typedef int2 itype; };
40+
template<> struct vector_helper<int3> { typedef float3 ftype; typedef int3 itype; };
3841
template<> struct vector_helper<int4> { typedef float4 ftype; typedef int4 itype; };
42+
template<> struct vector_helper<float3> { typedef float3 ftype; typedef int3 itype; };
3943

4044
#define floatT typename vector_helper<T>::ftype
4145
#define intT typename vector_helper<T>::itype
@@ -77,6 +81,20 @@ OPERATORS4(uchar4)
7781
OPERATORS4(ushort4)
7882
OPERATORS4(float4)
7983

84+
#define OPERATORS3(T) \
85+
template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y, a.z + b.z); } \
86+
template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y, a.z - b.z); } \
87+
template<typename V> inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b, a.z * b); } \
88+
template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b, a.z / b); } \
89+
template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; a.z += b.z; return a; } \
90+
template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; a.z = b.z; } \
91+
template<typename V> inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; a.z = b; } \
92+
template<> inline __device__ float3 to_floatN<T, float3>(const T &a) { return make_float3(a.x, a.y, a.z); } \
93+
template<> inline __device__ T from_floatN<T, float3>(const float3 &a) { return make_ ## T(a.x, a.y, a.z); }
94+
95+
OPERATORS3(int3)
96+
OPERATORS3(float3)
97+
8098
template<typename V> inline __device__ void vec_set(int &a, V b) { a = b; }
8199
template<typename V> inline __device__ void vec_set(float &a, V b) { a = b; }
82100
template<typename V> inline __device__ void vec_set(uchar &a, V b) { a = b; }
@@ -99,6 +117,15 @@ inline __device__ float2 lerp_scalar<float2>(float2 v0, float2 v1, float t) {
99117
);
100118
}
101119

120+
template<>
121+
inline __device__ float3 lerp_scalar<float3>(float3 v0, float3 v1, float t) {
122+
return make_float3(
123+
lerp_scalar(v0.x, v1.x, t),
124+
lerp_scalar(v0.y, v1.y, t),
125+
lerp_scalar(v0.z, v1.z, t)
126+
);
127+
}
128+
102129
template<>
103130
inline __device__ float4 lerp_scalar<float4>(float4 v0, float4 v1, float t) {
104131
return make_float4(

0 commit comments

Comments
 (0)