Skip to content

Commit 40bb9c1

Browse files
Draft of the CUDA driver API (#26)
1 parent c00ae97 commit 40bb9c1

File tree

10 files changed

+490
-0
lines changed

10 files changed

+490
-0
lines changed

source/dcompute/driver/cuda650/TODO

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
cuLink.*
2+
cuIpc.*
3+
cuTexRef.*
4+
cuTexObj.*
5+
cuSurfRef.*
6+
cuSurfObj.*
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
module dcompute.driver.cuda650.buffer;
2+
3+
struct Buffer(T)
4+
{
5+
size_t raw;
6+
7+
this(size_t elems)
8+
{
9+
status = cast(Status)cuMemAlloc(&raw,elems * T.sizeof);
10+
checkErrors();
11+
}
12+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
module dcompute.driver.cuda650.context;
2+
3+
struct Context
4+
{
5+
void* raw;
6+
this(Device dev, uint flags)
7+
{
8+
status = cast(Status)cuCtxCreate(&raw, flags,dev);
9+
checkErrors();
10+
}
11+
12+
static void push(Context ctx)
13+
{
14+
status = cast(Status)cuCtxPushCurrent(ctx.raw);
15+
checkErrors();
16+
}
17+
18+
static Context pop()
19+
{
20+
Context ret;
21+
status = cast(Status)cuCtxPopCurrent(&ret.raw);
22+
checkErrors();
23+
}
24+
static Context @property current()
25+
{
26+
Context ret;
27+
status = cast(Status)cuCtxGetCurrent(&ret.raw);
28+
checkErrors();
29+
}
30+
31+
static void @property current(Context ctx)
32+
{
33+
status = cast(Status)cuCtxSetCurrent(ctx.raw);
34+
checkErrors();
35+
}
36+
37+
static void sync()
38+
{
39+
status = cast(Status)cuCtxSynchronize();
40+
checkErrors();
41+
}
42+
//CUlimit
43+
enum Limit
44+
{
45+
stackSize,
46+
printfFifoSize,
47+
mallocHeapSize,
48+
deviceRuntimeSyncDepth,
49+
deviceRuntimePendingLaunchCount
50+
}
51+
52+
static void @property limit(Limit what)(size_t lim)
53+
{
54+
status = cast(Status)cuCtxSetLimit(what,lim);
55+
checkErrors();
56+
}
57+
58+
static size_t @property limit(Limit what)()
59+
{
60+
size_t ret;
61+
status = cast(Status)cuCtxSetLimit(&ret,what);
62+
checkErrors();
63+
return ret;
64+
}
65+
//CUfunc_cache
66+
enum CacheConfig
67+
{
68+
preferNone,
69+
preferShared,
70+
preferL1,
71+
preferEqual,
72+
}
73+
74+
static @property void cacheConfig(CacheConfig cc)
75+
{
76+
status = cast(Status)cuCtxSetSharedMemConfig(cc);
77+
checkErrors();
78+
}
79+
80+
81+
static @property CacheConfig cacheConfig()
82+
{
83+
CacheConfig ret;
84+
status = cast(Status)cuCtxSetSharedMemConfig(&ret);
85+
checkErrors();
86+
return ret;
87+
}
88+
89+
@property uint apiVersion()
90+
{
91+
uint ret;
92+
status = cast(Status)cuCtxGetApiVersion(&ret);
93+
checkErrors();
94+
return ret;
95+
}
96+
97+
static void getQueuePriorityRange(out int lo, out int hi)
98+
{
99+
status = cast(Status)cuCtxGetStreamPriorityRange(&lo,&hi);
100+
checkErrors();
101+
}
102+
103+
void detach()
104+
{
105+
status = cast(Status)cuCtxDetach(raw);
106+
checkErrors();
107+
}
108+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
module dcompute.driver.cuda650.device;
2+
3+
struct Device
4+
{
5+
int raw;
6+
//struct CUdevprop
7+
static struct Info
8+
{
9+
@(1) int maxThreadsPerBlock;
10+
@(2) int maxThreadsDimX;
11+
@(3) int maxThreadsDimY;
12+
@(4) int maxThreadsDimZ;
13+
@(5) int maxGridSizeX;
14+
@(6) int maxGridSizeY;
15+
@(7) int maxGridSizeZ;
16+
@(8) int sharedMemPerBlock;
17+
@(9) int totalConstantMemory;
18+
@(10) int SIMDWidth; // warp size
19+
@(11) int maxPitch;
20+
@(12) int regsPerBlock;
21+
@(13) int clockRate;
22+
@(14) int textureAlign;
23+
@(15) int GPUOverlap;
24+
@(16) int multiprocessorCount;
25+
@(17) int kernelExecTimeout;
26+
@(18) int integrated;
27+
@(19) int canMapHostMemeory;
28+
@(20) int computeMode;
29+
@(21) int maxTexture1DWidth;
30+
@(22) int maxTexture2DWidth;
31+
@(23) int maxTexture2DHeight;
32+
@(24) int maxTexture3DWidth;
33+
@(25) int maxTexture3DHeight;
34+
@(26) int maxTexture3DDepth;
35+
@(27) int maxTexture2DLayeredWidth;
36+
@(28) int maxTexture2DLayeredHeight;
37+
@(29) int maxTexture2DLayeredLayers;
38+
@(27) int maxTexture2DArrayWidth;
39+
@(28) int maxTexture2DArrayHeight;
40+
@(29) int maxTexture2DArrayNumSlices;
41+
@(30) int surfaceAlignment;
42+
@(31) int concurrentKernels;
43+
@(32) int eccEnabled;
44+
@(33) int PCIBusID;
45+
@(34) int PCIDeviceID;
46+
@(35) int tccDriver;
47+
@(36) int memoryClockRate;
48+
@(37) int globalMemoryBusWidth;
49+
@(38) int L2CacheSize;
50+
@(39) int maxThreadPerMultiProcessor;
51+
@(40) int asyncEngineCount;
52+
@(41) int unifiedAddressing;
53+
@(42) int maxTexture1DLayeredWidth;
54+
@(43) int maxTexture1DLayeredLayers;
55+
@(44) int canTex2DGather;
56+
@(45) int maxTextrue2DGatherWidth;
57+
@(46) int maxTextrue2DGatherHeight;
58+
@(47) int maxTexture3DWidthAlternative;
59+
@(48) int maxTexture3DHeightAlternative;
60+
@(49) int maxTexture3DDepthAlternative;
61+
@(50) int PICDomainID;
62+
@(51) int texturePitchAlignment;
63+
@(52) int textureCubemapWidth;
64+
@(53) int textureCubemapLayeredWidths;
65+
@(54) int textureCubemapLayeredLayers;
66+
@(55) int maxSurface1DWidth;
67+
@(56) int maxSurface2DWidth;
68+
@(57) int maxSurface2DHeight;
69+
@(58) int maxSurface3DWidth;
70+
@(59) int maxSurface3DHeight;
71+
@(60) int maxSurface3DDepth;
72+
@(61) int maxSurface1DLayeredWidth;
73+
@(62) int maxSurface1DLayeredLayers;
74+
@(63) int maxSurface2DLayeredWidth;
75+
@(64) int maxSurface2DLayeredHeight;
76+
@(65) int maxSurface2DLayeredLayers;
77+
@(66) int maxSurfaceCubemapWidth;
78+
@(67) int maxSurfaceCubemapLayeredWidth;
79+
@(68) int maxSurfaceCubemapLayeredLayers;
80+
@(69) int maxTaxture1DLinearWidth;
81+
@(70) int maxTaxture2DLinearWidth;
82+
@(71) int maxTaxture2DLinearHeight;
83+
@(72) int maxTaxture2DLinearPitch;
84+
@(73) int maxTaxture2DMipmappedWidth;
85+
@(74) int maxTaxture2DMipmappedHeight;
86+
@(75) int computeCapabilityMajor;
87+
@(76) int computeCapabilityMinor;
88+
@(77) int maxTaxture1DMipmappedWidth;
89+
@(78) int streamPrioritiesSupported;
90+
@(79) int globalL1CacheSupported;
91+
@(80) int localL1CacheSupported;
92+
@(81) int maxSharedMemoryPerMultiprocessor;
93+
@(82) int maxRegistorsPerMultiprocessor
94+
@(83) int managedMemory;
95+
@(84) int multiGPUBoard;
96+
@(85) int multiGPUBoardGroupID;
97+
}
98+
99+
@proprty size_t totalMemory()
100+
{
101+
size_t ret;
102+
status = cast(Status)cuDeviceTotalMem(&ret,raw);
103+
checkErrors();
104+
return ret;
105+
}
106+
107+
//char[] name : cuDeviceGetName
108+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
module dcompute.driver.cuda650.kernel;
2+
3+
struct Kernel(F) if (is(F==function)|| is(F==void))
4+
{
5+
void* raw;
6+
7+
static struct Attributes
8+
{
9+
@(0) int maxThreadsPerBlock;
10+
// in Bytes
11+
@(1) int sharedSize;
12+
@(2) int constSize;
13+
@(3) int localSize;
14+
15+
@(4) int numRegs;
16+
@(5) int ptxVersion;
17+
@(6) int binaryVersion;
18+
@(7) int cacheModeCa;
19+
}
20+
21+
22+
23+
enum MemoryBankConfig : int
24+
{
25+
default_,
26+
fourBytes,
27+
eightBytes,
28+
}
29+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
module dcompute.driver.cuda650.memory;
2+
3+
import dcompute.driver.error;
4+
5+
// void pointer like
6+
struct MemoryPointer
7+
{
8+
size_t raw;
9+
static MemoryPointer allocate(size_t nbytes)
10+
{
11+
Memory ret;
12+
status = cast(Status)cuMemAlloc(&ret.raw,nbytes);
13+
checkErrors();
14+
return ret;
15+
}
16+
//static MemoryPointer allocatePitch(T)(size_t nbytes)
17+
18+
Memory addressRange()
19+
{
20+
Memory ret;
21+
status = cast(Status)cuMemGetAddressRange(&ret.raw,&ret.length,raw);
22+
checkErrors();
23+
}
24+
25+
}
26+
27+
// void[] like
28+
struct Memory
29+
{
30+
MemoryPointer ptr;
31+
size_t length;
32+
33+
enum CopySource
34+
{
35+
Host,
36+
Device,
37+
Array
38+
}
39+
40+
// cuMemcpy and friends
41+
// TODO: implement this properly
42+
template copy(T, CopySource from, CopySource to, int dimentions = 1,
43+
Flag!"peer" _peer = No.peer)
44+
{
45+
auto copy(Memory to)
46+
{
47+
status = cast(Status)cuMemcpy(to.ptr.raw,ptr.raw,length);
48+
checkErrors();
49+
}
50+
}
51+
52+
// TODO: cuMemset & frineds
53+
54+
}

source/dcompute/driver/cuda650/package.d

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
module dcompute.driver.cuda650.platform;
2+
3+
import dcompute.driver.error;
4+
import std.allocator.typed;
5+
6+
struct Platform
7+
{
8+
static void initialise(uint flags =0)
9+
{
10+
status = cast(Status)cuInit(flags);
11+
checkErrors();
12+
}
13+
14+
Device[] devices(A)(A a)
15+
{
16+
int len;
17+
TypedAllocator!(A) allocator;
18+
status = cast(Status)cuDeviceGetCount(&len);
19+
checkErrors();
20+
21+
Device[] ret = allocator.makeArray!(Device)(len);
22+
foreach(int i; 0 .. len)
23+
{
24+
status = cast(Status)cuDeviceGet(&ret[i].raw,i);
25+
checkErrors();
26+
}
27+
return ret;
28+
}
29+
30+
}

0 commit comments

Comments
 (0)