@@ -27,6 +27,39 @@ public static DeviceMemory<float> AllocateDevice([NotNull] this Gpu gpu, in Tens
27
27
: throw new InvalidOperationException ( $ "Failed to copy the source data on the target GPU device, [CUDA ERROR] { result } ") ;
28
28
}
29
29
30
+ /// <summary>
31
+ /// Allocates a memory area on device memory, reading the target values at a given offset from the input <see cref="Tensor"/>
32
+ /// </summary>
33
+ /// <param name="gpu">The <see cref="Gpu"/> device to use</param>
34
+ /// <param name="source">The source <see cref="Tensor"/> with the data to copy</param>
35
+ /// <param name="offset">The column offset for the data to read from each row</param>
36
+ /// <param name="length"></param>
37
+ [ MustUseReturnValue , NotNull ]
38
+ public static unsafe DeviceMemory < float > AllocateDevice ( [ NotNull ] this Gpu gpu , in Tensor source , int offset , int length )
39
+ {
40
+ // Checks
41
+ if ( source . Length - offset < length ) throw new ArgumentOutOfRangeException ( nameof ( offset ) , "The input offset isn't valid" ) ;
42
+
43
+ // Memory copy
44
+ DeviceMemory < float > result_gpu = gpu . AllocateDevice < float > ( source . Entities * length ) ;
45
+ CUDAInterop . CUDA_MEMCPY2D_st * ptSt = stackalloc CUDAInterop . CUDA_MEMCPY2D_st [ 1 ] ;
46
+ ptSt [ 0 ] = new CUDAInterop . CUDA_MEMCPY2D_st
47
+ {
48
+ srcMemoryType = CUDAInterop . CUmemorytype_enum . CU_MEMORYTYPE_HOST ,
49
+ srcHost = source . Ptr + sizeof ( float ) * offset ,
50
+ srcPitch = new IntPtr ( sizeof ( float ) * source . Length ) ,
51
+ dstMemoryType = CUDAInterop . CUmemorytype_enum . CU_MEMORYTYPE_DEVICE ,
52
+ dstDevice = result_gpu . Handle ,
53
+ dstPitch = new IntPtr ( sizeof ( float ) * length ) ,
54
+ WidthInBytes = new IntPtr ( sizeof ( float ) * length ) ,
55
+ Height = new IntPtr ( source . Entities )
56
+ } ;
57
+ CUDAInterop . cudaError_enum result = CUDAInterop . cuMemcpy2D ( ptSt ) ;
58
+ return result == CUDAInterop . cudaError_enum . CUDA_SUCCESS
59
+ ? result_gpu
60
+ : throw new InvalidOperationException ( $ "Failed to copy the source data on the given destination, [CUDA ERROR] { result } ") ;
61
+ }
62
+
30
63
/// <summary>
31
64
/// Copies the contents of the input <see cref="DeviceMemory{T}"/> instance to the target host memory area
32
65
/// </summary>
@@ -40,6 +73,37 @@ public static void CopyTo([NotNull] this DeviceMemory<float> source, in Tensor d
40
73
throw new InvalidOperationException ( $ "Failed to copy the source data on the given destination, [CUDA ERROR] { result } ") ;
41
74
}
42
75
76
+ /// <summary>
77
+ /// Copies the source data into the target <see cref="Tensor"/>, splitting each individual entry into its own row
78
+ /// </summary>
79
+ /// <param name="source">The source memory area with the concatenated data for each entry</param>
80
+ /// <param name="destination">The destination <see cref="Tensor"/> that will store the data</param>
81
+ /// <param name="offset">The column offset for the data for each entry</param>
82
+ /// <param name="length">The number of values to copy for each entry</param>
83
+ public static unsafe void CopyTo ( [ NotNull ] this DeviceMemory < float > source , in Tensor destination , int offset , int length )
84
+ {
85
+ // Checks
86
+ if ( source . Length / length != destination . Entities ) throw new ArgumentOutOfRangeException ( nameof ( length ) , "The input length doesn't match the given arguments" ) ;
87
+ if ( destination . Length - offset < length ) throw new ArgumentOutOfRangeException ( nameof ( offset ) , "The input offset isn't valid" ) ;
88
+
89
+ // Memory copy
90
+ CUDAInterop . CUDA_MEMCPY2D_st * ptSt = stackalloc CUDAInterop . CUDA_MEMCPY2D_st [ 1 ] ;
91
+ ptSt [ 0 ] = new CUDAInterop . CUDA_MEMCPY2D_st
92
+ {
93
+ srcMemoryType = CUDAInterop . CUmemorytype_enum . CU_MEMORYTYPE_DEVICE ,
94
+ srcDevice = source . Handle ,
95
+ srcPitch = new IntPtr ( sizeof ( float ) * length ) ,
96
+ dstMemoryType = CUDAInterop . CUmemorytype_enum . CU_MEMORYTYPE_HOST ,
97
+ dstHost = destination . Ptr + sizeof ( float ) * offset ,
98
+ dstPitch = new IntPtr ( sizeof ( float ) * destination . Length ) ,
99
+ WidthInBytes = new IntPtr ( sizeof ( float ) * length ) ,
100
+ Height = new IntPtr ( destination . Entities )
101
+ } ;
102
+ CUDAInterop . cudaError_enum result = CUDAInterop . cuMemcpy2D ( ptSt ) ;
103
+ if ( result != CUDAInterop . cudaError_enum . CUDA_SUCCESS )
104
+ throw new InvalidOperationException ( $ "Failed to copy the source data on the given destination, [CUDA ERROR] { result } ") ;
105
+ }
106
+
43
107
/// <summary>
44
108
/// Copies the contents of the input <see cref="DeviceMemory{T}"/> to a new memory area on the unmanaged heap
45
109
/// </summary>
0 commit comments