@@ -16,7 +16,7 @@ public:
16
16
const std::string DriverVersion = Device.get_info <device::driver_version>();
17
17
18
18
if (Device.is_gpu () && (DriverVersion.find (" CUDA" ) != std::string::npos)) {
19
- std::cout << " CUDA device found " << std::endl ;
19
+ std::cout << " CUDA device found \n " ;
20
20
return 1 ;
21
21
};
22
22
return -1 ;
@@ -41,23 +41,23 @@ int main(int argc, char *argv[]) {
41
41
// Create a SYCL context for interoperability with CUDA Runtime API
42
42
// This is temporary until the property extension is implemented
43
43
const bool UsePrimaryContext = true ;
44
- sycl:: device dev{CUDASelector ().select_device ()};
45
- sycl:: context myContext{dev, {}, UsePrimaryContext};
46
- sycl:: queue myQueue{myContext, dev};
44
+ device dev{CUDASelector ().select_device ()};
45
+ context myContext{dev, {}, UsePrimaryContext};
46
+ queue myQueue{myContext, dev};
47
47
48
48
{
49
49
buffer<double > bA{range<1 >(n)};
50
50
buffer<double > bB{range<1 >(n)};
51
51
buffer<double > bC{range<1 >(n)};
52
52
53
53
{
54
- auto h_a = bA.get_access <access::mode::write>();
55
- auto h_b = bB.get_access <access::mode::write>();
54
+ auto hA = bA.get_access <access::mode::write>();
55
+ auto hB = bB.get_access <access::mode::write>();
56
56
57
57
// Initialize vectors on host
58
58
for (int i = 0 ; i < n; i++) {
59
- h_a [i] = sin (i) * sin (i);
60
- h_b [i] = cos (i) * cos (i);
59
+ hA [i] = sin (i) * sin (i);
60
+ hB [i] = cos (i) * cos (i);
61
61
}
62
62
}
63
63
@@ -68,28 +68,29 @@ int main(int argc, char *argv[]) {
68
68
auto accC = bC.get_access <access::mode::write>(h);
69
69
70
70
h.interop_task ([=](interop_handler ih) {
71
- auto d_a = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accA));
72
- auto d_b = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accB));
73
- auto d_c = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accC));
71
+ auto dA = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accA));
72
+ auto dB = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accB));
73
+ auto dC = reinterpret_cast <double *>(ih.get_mem <backend::cuda>(accC));
74
74
75
75
int blockSize, gridSize;
76
76
// Number of threads in each thread block
77
77
blockSize = 1024 ;
78
78
// Number of thread blocks in grid
79
- gridSize = ( int ) ceil (( float )n / blockSize);
79
+ gridSize = static_cast < int >( ceil (static_cast < float >(n) / blockSize) );
80
80
// Call the CUDA kernel directly from SYCL
81
- vecAdd<<<gridSize, blockSize>>> (d_a, d_b, d_c , n);
81
+ vecAdd<<<gridSize, blockSize>>> (dA, dB, dC , n);
82
82
});
83
83
});
84
84
85
85
{
86
- auto h_c = bC.get_access <access::mode::read>();
86
+ auto hC = bC.get_access <access::mode::read>();
87
87
// Sum up vector c and print result divided by n, this should equal 1 within
88
88
// error
89
89
double sum = 0 ;
90
- for (int i = 0 ; i < n; i++)
91
- sum += h_c[i];
92
- printf (" final result: %f\n " , sum / n);
90
+ for (int i = 0 ; i < n; i++) {
91
+ sum += hC[i];
92
+ }
93
+ std::cout << " Final result " << sum / n << std::endl;
93
94
}
94
95
}
95
96
0 commit comments