11#include < cstdio>
22#include < cuda_runtime.h>
3- #include < random >
3+ #include < vector >
44
55__global__ void saxpy (int n, float a, float *x, float *y){
66 // threadIdx.x: thread index within the block
@@ -31,7 +31,6 @@ int main() {
3131 // Set up data
3232 const int N = 100 ;
3333 float alpha = 3 .14f ;
34- float *h_x, *h_y;
3534 float *d_x, *d_y;
3635 size_t size = N * sizeof (float );
3736
@@ -40,17 +39,15 @@ int main() {
4039 cudaMalloc (&d_y, size);
4140
4241 // Initialize host data
43- h_x = (float *)malloc (size);
44- h_y = (float *)malloc (size);
45-
46- for (int i = 0 ; i < N; i++) {
47- h_x[i] = rand () / (float )RAND_MAX;
48- h_y[i] = rand () / (float )RAND_MAX;
42+ std::vector<float > h_x (N), h_y (N);
43+ for (int i = 0 ; i < N; ++i) {
44+ h_x[i] = std::rand () / (float )RAND_MAX;
45+ h_y[i] = std::rand () / (float )RAND_MAX;
4946 }
5047
5148 // Copy data to device
52- cudaMemcpy (d_x, h_x, size, cudaMemcpyHostToDevice);
53- cudaMemcpy (d_y, h_y, size, cudaMemcpyHostToDevice);
49+ cudaMemcpy (d_x, h_x. data () , size, cudaMemcpyHostToDevice);
50+ cudaMemcpy (d_y, h_y. data () , size, cudaMemcpyHostToDevice);
5451
5552 // Define block size (number of threads per block)
5653 int blockSize = 4 ;
@@ -63,14 +60,12 @@ int main() {
6360 cudaDeviceSynchronize ();
6461
6562 // Copy result back to host
66- cudaMemcpy (h_y, d_y, size, cudaMemcpyDeviceToHost);
63+ cudaMemcpy (h_y. data () , d_y, size, cudaMemcpyDeviceToHost);
6764 for (int i = 0 ; i < N; i++) {
6865 printf (" h_y[%d] = %f\n " , i, h_y[i]);
6966 }
7067
7168 // Clean up
72- free (h_x);
73- free (h_y);
7469 cudaFree (d_x);
7570 cudaFree (d_y);
7671
0 commit comments