@@ -38,6 +38,10 @@ int ppInitialize( Api api, ppU32 flags )
38
38
return hipewInit ( HIPEW_INIT_HIP );
39
39
return PP_ERROR_OPEN_FAILED;
40
40
}
41
+ Api ppGetCurAPI (ppU32 flags)
42
+ {
43
+ return s_api;
44
+ }
41
45
42
46
43
47
// =================================
@@ -53,6 +57,11 @@ ppError cu2pp( CUresult a )
53
57
return (ppError)a;
54
58
}
55
59
inline
60
+ ppError cuda2pp (cudaError_t a)
61
+ {
62
+ return (ppError)a;
63
+ }
64
+ inline
56
65
CUcontext* ppCtx2cu ( ppCtx* a )
57
66
{
58
67
return (CUcontext*)a;
@@ -74,12 +83,18 @@ pprtcResult nvrtc2pp( nvrtcResult a )
74
83
}
75
84
76
85
#define __PP_FUNC1 ( cuname, hipname ) if ( s_api == API_CUDA ) return cu2pp( cu##cuname ); if ( s_api == API_HIP ) return hip2pp( hip##hipname );
86
+ #define __PP_FUNC2 ( cudaname, hipname ) if ( s_api == API_CUDA ) return cuda2pp( cuda##cudaname ); if ( s_api == API_HIP ) return hip2pp( hip##hipname );
77
87
// #define __PP_FUNC1( cuname, hipname ) if( s_api == API_CUDA || API == API_CUDA ) return cu2pp( cu##cuname ); if( s_api == API_HIP || API == API_HIP ) return hip2pp( hip##hipname );
78
88
#define __PP_FUNC ( name ) if ( s_api == API_CUDA ) return cu2pp( cu##name ); if ( s_api == API_HIP ) return hip2pp( hip##name );
79
89
#define __PP_CTXT_FUNC ( name ) __PP_FUNC1(Ctx##name, name)
80
90
// #define __PP_CTXT_FUNC( name ) if( s_api == API_CUDA ) return cu2pp( cuCtx##name ); if( s_api == API_HIP ) return hip2pp( hip##name );
81
91
#define __PPRTC_FUNC1 ( cuname, hipname ) if ( s_api == API_CUDA ) return nvrtc2pp( nvrtc##cuname ); if ( s_api == API_HIP ) return hiprtc2pp( hiprtc##hipname );
82
92
93
+ #define __PP_FUNC_INSTANCE ( funcName, args ) \
94
+ template ppError PPAPI funcName <API_AUTOMATIC> args;\
95
+ template ppError PPAPI funcName <API_CUDA> args;\
96
+ template ppError PPAPI funcName <API_HIP> args;
97
+
83
98
84
99
ppError PPAPI ppGetErrorName (ppError error, const char ** pStr)
85
100
{
@@ -106,6 +121,9 @@ ppError PPAPI ppInit(unsigned int Flags)
106
121
__PP_FUNC ( Init (Flags) );
107
122
return ppErrorUnknown;
108
123
}
124
+
125
+ __PP_FUNC_INSTANCE ( ppInit, (unsigned int Flags) );
126
+
109
127
ppError PPAPI ppDriverGetVersion (int * driverVersion)
110
128
{
111
129
__PP_FUNC ( DriverGetVersion (driverVersion) );
@@ -125,12 +143,14 @@ ppError PPAPI ppGetDeviceProperties(ppDeviceProp* props, int deviceId)
125
143
{
126
144
if ( s_api == API_CUDA )
127
145
{
128
- CUdevprop p;
129
- cuDeviceGetProperties ( &p, deviceId );
146
+ cudaDeviceProp p;
147
+ cudaError_t e = cudaGetDeviceProperties ( &p, deviceId );
148
+ if (e != CUDA_SUCCESS)
149
+ return ppErrorUnknown;
130
150
char name[128 ];
131
- cuDeviceGetName ( name, 128 , deviceId );
132
- strcpy ( props->name , name );
151
+ strcpy ( props->name , p.name );
133
152
strcpy ( props->gcnArchName , " " );
153
+ props->totalGlobalMem = p.totalGlobalMem ;
134
154
printf (" todo. implement me\n " );
135
155
return ppSuccess;
136
156
}
@@ -268,6 +288,11 @@ ppError PPAPI ppMalloc(ppDeviceptr* dptr, size_t bytesize)
268
288
__PP_FUNC1 ( MemAlloc (dptr, bytesize), Malloc ( dptr, bytesize ) );
269
289
return ppErrorUnknown;
270
290
}
291
+ ppError PPAPI ppMalloc2 (ppDeviceptr* dptr, size_t bytesize)
292
+ {
293
+ __PP_FUNC2 ( Malloc ((CUdeviceptr*)dptr, bytesize), Malloc (dptr, bytesize) );
294
+ return ppErrorUnknown;
295
+ }
271
296
ppError PPAPI ppMemAllocPitch (ppDeviceptr* dptr, size_t * pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes)
272
297
{
273
298
return ppErrorUnknown;
@@ -277,8 +302,20 @@ ppError PPAPI ppFree(ppDeviceptr dptr)
277
302
__PP_FUNC1 ( MemFree ( dptr ), Free ( dptr ) );
278
303
return ppErrorUnknown;
279
304
}
305
+ ppError PPAPI ppFree2 (ppDeviceptr dptr)
306
+ {
307
+ __PP_FUNC2 ( Free ((CUdeviceptr)dptr), Free (dptr) );
308
+ return ppErrorUnknown;
309
+ }
280
310
281
311
// -------------------
312
+ ppError PPAPI ppMemcpy (void *dstDevice, void * srcHost, size_t ByteCount, ppMemcpyKind kind)
313
+ {
314
+ __PP_FUNC2 ( Memcpy (dstDevice, srcHost, ByteCount, (cudaMemcpyKind)kind),
315
+ Memcpy (dstDevice, srcHost, ByteCount, (hipMemcpyKind)kind) );
316
+ return ppErrorUnknown;
317
+ }
318
+
282
319
ppError PPAPI ppMemcpyHtoD (ppDeviceptr dstDevice, void * srcHost, size_t ByteCount)
283
320
{
284
321
__PP_FUNC1 ( MemcpyHtoD ( dstDevice, srcHost, ByteCount ),
@@ -299,7 +336,7 @@ ppError PPAPI ppMemcpyDtoD(ppDeviceptr dstDevice, ppDeviceptr srcDevice, size_t
299
336
300
337
ppError PPAPI ppMemset (ppDeviceptr dstDevice, unsigned int ui, size_t N)
301
338
{
302
- __PP_FUNC ( MemsetD32 ( dstDevice, ui, N ) );
339
+ __PP_FUNC1 ( MemsetD8 ( (CUdeviceptr) dstDevice, ui, N ), Memset (( void *)dstDevice, ui, N) );
303
340
return ppErrorUnknown;
304
341
}
305
342
@@ -326,6 +363,12 @@ ppError PPAPI ppModuleLaunchKernel(ppFunction f, unsigned int gridDimX, unsigned
326
363
ModuleLaunchKernel ( (hipFunction_t)f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, (hipStream_t)hStream, kernelParams, extra ) );
327
364
return ppErrorUnknown;
328
365
}
366
+ ppError PPAPI ppGetLastError (ppError pp_error)
367
+ {
368
+ __PP_FUNC2 (GetLastError ((cudaError_t)pp_error),
369
+ GetLastError ((hipError_t)pp_error));
370
+ return ppErrorUnknown;
371
+ }
329
372
// -------------------
330
373
pprtcResult PPAPI pprtcGetErrorString (pprtcResult result)
331
374
{
@@ -401,8 +444,9 @@ ppError PPAPI ppPointerGetAttributes(ppPointerAttribute* attr, ppDeviceptr dptr)
401
444
// -----------------
402
445
ppError PPAPI ppStreamCreate (ppStream* stream)
403
446
{
404
- __PP_FUNC1 ( StreamCreate ((CUstream*)stream, CU_STREAM_DEFAULT),
405
- StreamCreate ((hipStream_t*)stream) );
447
+ __PP_FUNC2 (StreamCreate ((cudaStream_t*)stream),
448
+ StreamCreate ((hipStream_t*)stream));
449
+
406
450
return ppErrorUnknown;
407
451
}
408
452
0 commit comments