diff --git a/svm/CUDA/cuSVM/cuSVMSolver.cu b/svm/CUDA/cuSVM/cuSVMSolver.cu index 3a59438b..d0185db9 100644 --- a/svm/CUDA/cuSVM/cuSVMSolver.cu +++ b/svm/CUDA/cuSVM/cuSVMSolver.cu @@ -549,25 +549,23 @@ void SVMTrain(float *mexalpha,float* beta,float*y,float *x ,float _C, float _ker printf("_C %f\n", _C); - std::chrono::time_point start_ct1; - std::chrono::time_point stop_ct1; + std::chrono::time_point start_clock_init; + std::chrono::time_point start_clock_exec; + std::chrono::time_point stop_clock; - - start_ct1 = std::chrono::high_resolution_clock::now(); + start_clock_init = std::chrono::steady_clock::now(); + // Creating and recording an event implicitly initialises device queue cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); + cudaEventRecord(start,0); - - //cudaSetDevice(0); + start_clock_exec = std::chrono::steady_clock::now(); mxArray *mexelapsed =mxCreateNumericMatrix(1, 1,mxSINGLE_CLASS, mxREAL); float * elapsed=(float *)mxGetData(mexelapsed); - - cudaEventRecord(start,0); - int numBlocks=64; dim3 ReduceGrid(numBlocks, 1, 1); dim3 ReduceBlock(256, 1, 1); @@ -831,11 +829,12 @@ void SVMTrain(float *mexalpha,float* beta,float*y,float *x ,float _C, float _ker cudaEventElapsedTime(elapsed, start, stop); - stop_ct1 = std::chrono::high_resolution_clock::now(); + stop_clock = std::chrono::steady_clock::now(); - //stop.wait_and_throw(); - float duration = std::chrono::duration(stop_ct1 - start_ct1).count(); - printf("Total run time: %f seconds\n", duration/1000.00); + float duration_compute = std::chrono::duration(stop_clock - start_clock_exec).count(); + float duration_total = std::chrono::duration(stop_clock - start_clock_init).count(); + printf("Compute time: %f seconds\n", duration_compute/1000.00); + printf("Total run time: %f seconds\n", duration_total/1000.00); printf("Iter:%i\n", iter); printf("M:%i\n", m); diff --git a/svm/HIP/cuSVM/cuSVMSolver.cpp b/svm/HIP/cuSVM/cuSVMSolver.cpp index c2acacf8..48a95630 100644 --- a/svm/HIP/cuSVM/cuSVMSolver.cpp +++ b/svm/HIP/cuSVM/cuSVMSolver.cpp @@ -522,21 +522,23 @@ void SVMTrain(float *mexalpha,float* beta,float*y,float *x ,float _C, float _ker printf("_C %f\n", _C); + std::chrono::time_point start_clock_init; + std::chrono::time_point start_clock_exec; + std::chrono::time_point stop_clock; + + start_clock_init = std::chrono::steady_clock::now(); + + // Creating and recording an event implicitly initialises device queue hipEvent_t start, stop; hipEventCreate(&start); hipEventCreate(&stop); + hipEventRecord(start,0); - std::chrono::time_point start_ct1; - std::chrono::time_point stop_ct1; - - start_ct1 = std::chrono::high_resolution_clock::now(); + start_clock_exec = std::chrono::steady_clock::now(); mxArray *mexelapsed =mxCreateNumericMatrix(1, 1,mxSINGLE_CLASS, mxREAL); float * elapsed=(float *)mxGetData(mexelapsed); - - hipEventRecord(start,0); - int numBlocks=64; dim3 ReduceGrid(numBlocks, 1, 1); dim3 ReduceBlock(256, 1, 1); @@ -800,11 +802,12 @@ void SVMTrain(float *mexalpha,float* beta,float*y,float *x ,float _C, float _ker hipEventElapsedTime(elapsed, start, stop); - stop_ct1 = std::chrono::high_resolution_clock::now(); + stop_clock = std::chrono::steady_clock::now(); - //stop.wait_and_throw(); - float duration = std::chrono::duration(stop_ct1 - start_ct1).count(); - printf("Total run time: %f seconds\n", duration/1000.00); + float duration_compute = std::chrono::duration(stop_clock - start_clock_exec).count(); + float duration_total = std::chrono::duration(stop_clock - start_clock_init).count(); + printf("Compute time: %f seconds\n", duration_compute/1000.00); + printf("Total run time: %f seconds\n", duration_total/1000.00); printf("Iter:%i\n", iter); printf("M:%i\n", m); diff --git a/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp b/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp index 57232293..cba09ed5 100644 --- a/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp +++ b/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp @@ -794,18 +794,16 @@ extern "C" void SVMTrain(float *mexalpha, float *beta, float *y, float *x, sycl::event queue_event; sycl::event start, stop; - std::chrono::time_point start_ct1; - std::chrono::time_point stop_ct1; - - start_ct1 = std::chrono::high_resolution_clock::now(); + std::chrono::time_point start_clock_init; + std::chrono::time_point start_clock_exec; + std::chrono::time_point stop_clock; + start_clock_init = std::chrono::steady_clock::now(); + // Select device and initialise the queue sycl::device selected_device = sycl::device(sycl::default_selector()); sycl::context context({selected_device}); - auto max_wgroup_size = selected_device.get_info(); - printf("Workgroup Size: %lu\n", max_wgroup_size); - #if KERNEL_USE_PROFILE auto propList = sycl::property_list{sycl::property::queue::enable_profiling()}; sycl::queue q_ct1(context, selected_device, propList); @@ -813,6 +811,7 @@ extern "C" void SVMTrain(float *mexalpha, float *beta, float *y, float *x, sycl::queue q_ct1(context, selected_device); #endif + start_clock_exec = std::chrono::steady_clock::now(); mxArray *mexelapsed =mxCreateNumericMatrix(1, 1,mxSINGLE_CLASS, mxREAL); float * elapsed=(float *)mxGetData(mexelapsed); @@ -1280,12 +1279,12 @@ _kernelwidth*=-1; q_ct1.memcpy(mexalpha, d_alpha, m * sizeof(float)).wait(); - stop_ct1 = std::chrono::high_resolution_clock::now(); + stop_clock = std::chrono::steady_clock::now(); - //stop.wait_and_throw(); - float duration = std::chrono::duration(stop_ct1 - start_ct1).count(); - printf("Total run time: %f seconds\n", duration/1000.00); - + float duration_compute = std::chrono::duration(stop_clock - start_clock_exec).count(); + float duration_total = std::chrono::duration(stop_clock - start_clock_init).count(); + printf("Compute time: %f seconds\n", duration_compute/1000.00); + printf("Total run time: %f seconds\n", duration_total/1000.00); printf("Iter:%i\n", iter); printf("M:%i\n", m);