Skip to content

Commit e191b73

Browse files
takahiroharadajammmshoikeda
authored
Adl changes (#3)
* Add some more API's and cuda runtime stuff * Add missing hipMemcpy define * hipMemcpy fixes * Update cuew for cudart calls * Impmenet ppGetDeviceProperties for CUDA * Add getCurAPI * [RPRNEXT-0] Fix a build error on linux. * [POP-0] Fix a build error on linux. Co-authored-by: Aaryaman Vasishta <aaryaman.vasishta@amd.com> Co-authored-by: Sho Ikeda <Sho.Ikeda@amd.com>
1 parent 187fbe3 commit e191b73

File tree

6 files changed

+369
-22
lines changed

6 files changed

+369
-22
lines changed

Pop/Pop.cpp

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ int ppInitialize( Api api, ppU32 flags )
3838
return hipewInit( HIPEW_INIT_HIP );
3939
return PP_ERROR_OPEN_FAILED;
4040
}
41+
Api ppGetCurAPI(ppU32 flags)
42+
{
43+
return s_api;
44+
}
4145

4246

4347
//=================================
@@ -53,6 +57,11 @@ ppError cu2pp( CUresult a )
5357
return (ppError)a;
5458
}
5559
inline
60+
ppError cuda2pp(cudaError_t a)
61+
{
62+
return (ppError)a;
63+
}
64+
inline
5665
CUcontext* ppCtx2cu( ppCtx* a )
5766
{
5867
return (CUcontext*)a;
@@ -74,12 +83,18 @@ pprtcResult nvrtc2pp( nvrtcResult a )
7483
}
7584

7685
#define __PP_FUNC1( cuname, hipname ) if( s_api == API_CUDA ) return cu2pp( cu##cuname ); if( s_api == API_HIP ) return hip2pp( hip##hipname );
86+
#define __PP_FUNC2( cudaname, hipname ) if( s_api == API_CUDA ) return cuda2pp( cuda##cudaname ); if( s_api == API_HIP ) return hip2pp( hip##hipname );
7787
//#define __PP_FUNC1( cuname, hipname ) if( s_api == API_CUDA || API == API_CUDA ) return cu2pp( cu##cuname ); if( s_api == API_HIP || API == API_HIP ) return hip2pp( hip##hipname );
7888
#define __PP_FUNC( name ) if( s_api == API_CUDA ) return cu2pp( cu##name ); if( s_api == API_HIP ) return hip2pp( hip##name );
7989
#define __PP_CTXT_FUNC( name ) __PP_FUNC1(Ctx##name, name)
8090
//#define __PP_CTXT_FUNC( name ) if( s_api == API_CUDA ) return cu2pp( cuCtx##name ); if( s_api == API_HIP ) return hip2pp( hip##name );
8191
#define __PPRTC_FUNC1( cuname, hipname ) if( s_api == API_CUDA ) return nvrtc2pp( nvrtc##cuname ); if( s_api == API_HIP ) return hiprtc2pp( hiprtc##hipname );
8292

93+
#define __PP_FUNC_INSTANCE( funcName, args ) \
94+
template ppError PPAPI funcName <API_AUTOMATIC> args;\
95+
template ppError PPAPI funcName <API_CUDA> args;\
96+
template ppError PPAPI funcName <API_HIP> args;
97+
8398

8499
ppError PPAPI ppGetErrorName(ppError error, const char** pStr)
85100
{
@@ -106,6 +121,9 @@ ppError PPAPI ppInit(unsigned int Flags)
106121
__PP_FUNC( Init(Flags) );
107122
return ppErrorUnknown;
108123
}
124+
125+
__PP_FUNC_INSTANCE( ppInit, (unsigned int Flags) );
126+
109127
ppError PPAPI ppDriverGetVersion(int* driverVersion)
110128
{
111129
__PP_FUNC( DriverGetVersion(driverVersion) );
@@ -125,12 +143,14 @@ ppError PPAPI ppGetDeviceProperties(ppDeviceProp* props, int deviceId)
125143
{
126144
if( s_api == API_CUDA )
127145
{
128-
CUdevprop p;
129-
cuDeviceGetProperties( &p, deviceId );
146+
cudaDeviceProp p;
147+
cudaError_t e = cudaGetDeviceProperties( &p, deviceId );
148+
if (e != CUDA_SUCCESS)
149+
return ppErrorUnknown;
130150
char name[128];
131-
cuDeviceGetName( name, 128, deviceId );
132-
strcpy( props->name, name );
151+
strcpy( props->name, p.name );
133152
strcpy( props->gcnArchName, "" );
153+
props->totalGlobalMem = p.totalGlobalMem;
134154
printf("todo. implement me\n");
135155
return ppSuccess;
136156
}
@@ -268,6 +288,11 @@ ppError PPAPI ppMalloc(ppDeviceptr* dptr, size_t bytesize)
268288
__PP_FUNC1( MemAlloc(dptr, bytesize), Malloc( dptr, bytesize ) );
269289
return ppErrorUnknown;
270290
}
291+
ppError PPAPI ppMalloc2(ppDeviceptr* dptr, size_t bytesize)
292+
{
293+
__PP_FUNC2( Malloc((CUdeviceptr*)dptr, bytesize), Malloc(dptr, bytesize) );
294+
return ppErrorUnknown;
295+
}
271296
ppError PPAPI ppMemAllocPitch(ppDeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes)
272297
{
273298
return ppErrorUnknown;
@@ -277,8 +302,20 @@ ppError PPAPI ppFree(ppDeviceptr dptr)
277302
__PP_FUNC1( MemFree( dptr ), Free( dptr ) );
278303
return ppErrorUnknown;
279304
}
305+
ppError PPAPI ppFree2(ppDeviceptr dptr)
306+
{
307+
__PP_FUNC2( Free((CUdeviceptr)dptr), Free(dptr) );
308+
return ppErrorUnknown;
309+
}
280310

281311
//-------------------
312+
ppError PPAPI ppMemcpy(void *dstDevice, void* srcHost, size_t ByteCount, ppMemcpyKind kind)
313+
{
314+
__PP_FUNC2( Memcpy(dstDevice, srcHost, ByteCount, (cudaMemcpyKind)kind),
315+
Memcpy(dstDevice, srcHost, ByteCount, (hipMemcpyKind)kind) );
316+
return ppErrorUnknown;
317+
}
318+
282319
ppError PPAPI ppMemcpyHtoD(ppDeviceptr dstDevice, void* srcHost, size_t ByteCount)
283320
{
284321
__PP_FUNC1( MemcpyHtoD( dstDevice, srcHost, ByteCount ),
@@ -299,7 +336,7 @@ ppError PPAPI ppMemcpyDtoD(ppDeviceptr dstDevice, ppDeviceptr srcDevice, size_t
299336

300337
ppError PPAPI ppMemset(ppDeviceptr dstDevice, unsigned int ui, size_t N)
301338
{
302-
__PP_FUNC( MemsetD32( dstDevice, ui, N ) );
339+
__PP_FUNC1( MemsetD8( (CUdeviceptr)dstDevice, ui, N ), Memset((void*)dstDevice, ui, N));
303340
return ppErrorUnknown;
304341
}
305342

@@ -326,6 +363,12 @@ ppError PPAPI ppModuleLaunchKernel(ppFunction f, unsigned int gridDimX, unsigned
326363
ModuleLaunchKernel( (hipFunction_t)f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, (hipStream_t)hStream, kernelParams, extra ) );
327364
return ppErrorUnknown;
328365
}
366+
ppError PPAPI ppGetLastError(ppError pp_error)
367+
{
368+
__PP_FUNC2(GetLastError((cudaError_t)pp_error),
369+
GetLastError((hipError_t)pp_error));
370+
return ppErrorUnknown;
371+
}
329372
//-------------------
330373
pprtcResult PPAPI pprtcGetErrorString(pprtcResult result)
331374
{
@@ -401,8 +444,9 @@ ppError PPAPI ppPointerGetAttributes(ppPointerAttribute* attr, ppDeviceptr dptr)
401444
//-----------------
402445
ppError PPAPI ppStreamCreate(ppStream* stream)
403446
{
404-
__PP_FUNC1( StreamCreate((CUstream*)stream, CU_STREAM_DEFAULT),
405-
StreamCreate((hipStream_t*)stream) );
447+
__PP_FUNC2(StreamCreate((cudaStream_t*)stream),
448+
StreamCreate((hipStream_t*)stream));
449+
406450
return ppErrorUnknown;
407451
}
408452

Pop/Pop.h

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
//
2222
#pragma once
2323

24+
#include <cstddef>
25+
2426
enum Api
2527
{
2628
API_AUTOMATIC,
@@ -34,6 +36,15 @@ enum ppError
3436
ppErrorUnknown = 999,
3537
};
3638

39+
enum ppMemcpyKind
40+
{
41+
ppMemcpyHostToHost = 0,
42+
ppMemcpyHostToDevice = 1,
43+
ppMemcpyDeviceToHost = 2,
44+
ppMemcpyDeviceToDevice = 3,
45+
ppMemcpyDefault = 4
46+
};
47+
3748
typedef unsigned int ppU32;
3849
typedef unsigned long long ppDeviceptr;
3950

@@ -543,10 +554,7 @@ typedef enum hipError_t {
543554
* Stream CallBack struct
544555
*/
545556

546-
#define __PP_FUNC_DEC( funcName, args ) template<Api API=API_AUTOMATIC> ppError PPAPI funcName##args; \
547-
template ppError PPAPI funcName##<API_AUTOMATIC>##args;\
548-
template ppError PPAPI funcName##<API_CUDA>##args;\
549-
template ppError PPAPI funcName##<API_HIP>##args;
557+
#define __PP_FUNC_DEC( funcName, args ) template<Api API=API_AUTOMATIC> ppError PPAPI funcName args
550558

551559

552560
ppError PPAPI ppGetErrorName(ppError error, const char** pStr);
@@ -589,8 +597,10 @@ ppError PPAPI ppModuleGetGlobal(ppDeviceptr* dptr, size_t* bytes, ppModule hmod,
589597
//ppError PPAPI ppModuleGetTexRef(textureReference** pTexRef, ppModule hmod, const char* name);
590598
ppError PPAPI ppMemGetInfo(size_t* free, size_t* total);
591599
ppError PPAPI ppMalloc(ppDeviceptr* dptr, size_t bytesize);
600+
ppError PPAPI ppMalloc2(ppDeviceptr* dptr, size_t bytesize);
592601
ppError PPAPI ppMemAllocPitch(ppDeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
593602
ppError PPAPI ppFree(ppDeviceptr dptr);
603+
ppError PPAPI ppFree2(ppDeviceptr dptr);
594604
//ppError PPAPI ppMemGetAddressRange(ppDeviceptr* pbase, size_t* psize, ppDeviceptr dptr);
595605
//ppError PPAPI ppHostMalloc(void** pp, size_t bytesize, unsigned int flags);
596606
//ppError PPAPI ppHostFree(void* p);
@@ -601,7 +611,7 @@ ppError PPAPI ppFree(ppDeviceptr dptr);
601611
//ppError PPAPI ppDeviceGetByPCIBusId(hipDevice_t* dev, const char* pciBusId);
602612
//ppError PPAPI ppDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t dev);
603613
//ppError PPAPI ppMemHostUnregister(void* p);
604-
//ppError PPAPI ppMemcpy(ppDeviceptr dst, ppDeviceptr src, size_t ByteCount);
614+
ppError PPAPI ppMemcpy(void *dst, void *src, size_t ByteCount, ppMemcpyKind kind);
605615
//ppError PPAPI ppMemcpyPeer(ppDeviceptr dstDevice, hipCtx_t dstContext, ppDeviceptr srcDevice, hipCtx_t srcContext, size_t ByteCount);
606616
ppError PPAPI ppMemcpyHtoD(ppDeviceptr dstDevice, void* srcHost, size_t ByteCount);
607617
ppError PPAPI ppMemcpyDtoH(void* dstHost, ppDeviceptr srcDevice, size_t ByteCount);
@@ -672,6 +682,7 @@ ppError PPAPI ppModuleLaunchKernel(ppFunction f, unsigned int gridDimX, unsigned
672682
//ppError PPAPI ppGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, ppStream hStream);
673683
//ppError PPAPI ppGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
674684
//ppError PPAPI ppGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
685+
ppError PPAPI ppGetLastError(ppError pp_error);
675686
pprtcResult PPAPI pprtcGetErrorString(pprtcResult result);
676687
pprtcResult PPAPI pprtcAddNameExpression(pprtcProgram prog, const char* name_expression);
677688
pprtcResult PPAPI pprtcCompileProgram(pprtcProgram prog, int numOptions, const char** options);
@@ -693,16 +704,17 @@ enum {
693704

694705

695706
int ppInitialize( Api api, ppU32 flags );
707+
Api ppGetCurAPI( ppU32 flags );
696708

697709

698710
#include <stdint.h>
699711

700-
typedef struct dim3 {
701-
uint32_t x; ///< x
702-
uint32_t y; ///< y
703-
uint32_t z; ///< z
704-
#ifdef __cplusplus
705-
constexpr dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
706-
#endif
707-
} dim3;
712+
//typedef struct dim3 {
713+
// uint32_t x; ///< x
714+
// uint32_t y; ///< y
715+
// uint32_t z; ///< z
716+
//#ifdef __cplusplus
717+
// constexpr dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
718+
//#endif
719+
//} dim3;
708720

0 commit comments

Comments
 (0)