Skip to content

Commit 79e48c4

Browse files
committed
a few tweaks
1 parent f159d87 commit 79e48c4

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

crates/cuda_hello/saxpy.cu

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <cstdio>
22
#include <cuda_runtime.h>
3-
#include <random>
3+
#include <vector>
44

55
__global__ void saxpy(int n, float a, float *x, float *y){
66
// threadIdx.x: thread index within the block
@@ -31,7 +31,6 @@ int main() {
3131
// Set up data
3232
const int N = 100;
3333
float alpha = 3.14f;
34-
float *h_x, *h_y;
3534
float *d_x, *d_y;
3635
size_t size = N * sizeof(float);
3736

@@ -40,17 +39,15 @@ int main() {
4039
cudaMalloc(&d_y, size);
4140

4241
// Initialize host data
43-
h_x = (float*)malloc(size);
44-
h_y = (float*)malloc(size);
45-
46-
for (int i = 0; i < N; i++) {
47-
h_x[i] = rand() / (float)RAND_MAX;
48-
h_y[i] = rand() / (float)RAND_MAX;
42+
std::vector<float> h_x(N), h_y(N);
43+
for (int i = 0; i < N; ++i) {
44+
h_x[i] = std::rand() / (float)RAND_MAX;
45+
h_y[i] = std::rand() / (float)RAND_MAX;
4946
}
5047

5148
// Copy data to device
52-
cudaMemcpy(d_x, h_x, size, cudaMemcpyHostToDevice);
53-
cudaMemcpy(d_y, h_y, size, cudaMemcpyHostToDevice);
49+
cudaMemcpy(d_x, h_x.data(), size, cudaMemcpyHostToDevice);
50+
cudaMemcpy(d_y, h_y.data(), size, cudaMemcpyHostToDevice);
5451

5552
// Define block size (number of threads per block)
5653
int blockSize = 4;
@@ -63,14 +60,12 @@ int main() {
6360
cudaDeviceSynchronize();
6461

6562
// Copy result back to host
66-
cudaMemcpy(h_y, d_y, size, cudaMemcpyDeviceToHost);
63+
cudaMemcpy(h_y.data(), d_y, size, cudaMemcpyDeviceToHost);
6764
for (int i = 0; i < N; i++) {
6865
printf("h_y[%d] = %f\n", i, h_y[i]);
6966
}
7067

7168
// Clean up
72-
free(h_x);
73-
free(h_y);
7469
cudaFree(d_x);
7570
cudaFree(d_y);
7671

0 commit comments

Comments
 (0)