Skip to content

Commit 0755044

Browse files
author
Y
committed
cudasift code updates for SYCL HIP and CUDA versions
1 parent b2aff3f commit 0755044

File tree

7 files changed

+278
-284
lines changed

7 files changed

+278
-284
lines changed

cudaSift/CUDA/cudaSiftH.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double init
222222
printf("Total time for sift extraction = %.2f us\n\n", totTime);
223223
#endif
224224
}
225-
printf("Number of Points after sift extraction = %d\n\n", siftData.numPts);
225+
// printf("Number of Points after sift extraction = %d\n\n", siftData.numPts);
226226
}
227227

228228
int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale,

cudaSift/CUDA/cudautils.h

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -65,32 +65,6 @@ inline bool deviceInit(int dev)
6565
return true;
6666
}
6767

68-
class TimerGPU
69-
{
70-
public:
71-
cudaEvent_t start, stop;
72-
cudaStream_t stream;
73-
TimerGPU(cudaStream_t stream_ = 0) : stream(stream_)
74-
{
75-
cudaEventCreate(&start);
76-
cudaEventCreate(&stop);
77-
cudaEventRecord(start, stream);
78-
}
79-
~TimerGPU()
80-
{
81-
cudaEventDestroy(start);
82-
cudaEventDestroy(stop);
83-
}
84-
float read()
85-
{
86-
cudaEventRecord(stop, stream);
87-
cudaEventSynchronize(stop);
88-
float time;
89-
cudaEventElapsedTime(&time, start, stop);
90-
return time;
91-
}
92-
};
93-
9468
class TimerCPU
9569
{
9670
static const int bits = 10;

cudaSift/CUDA/mainSift.cpp

Lines changed: 87 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -62,103 +62,112 @@ int main(int argc, char **argv)
6262
float matchingTime = 0.0;
6363
float ioReadTime = 0.0;
6464
float dataVerificationTime = 0.0;
65+
int data_verification_flag = 0;
66+
int iterations = 50; //// Running for 50 iterations to avg out intial heavy loading time in cuda
6567

66-
// Read images using OpenCV
67-
cv::Mat limg, rimg;
68-
auto ioRead_start = std::chrono::steady_clock::now();
69-
if (imgSet)
70-
{
71-
cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1);
72-
cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1);
73-
}
74-
else
68+
auto setDevice_start = std::chrono::steady_clock::now();
69+
cudaSetDevice(0);
70+
auto setDevice_stop = std::chrono::steady_clock::now();
71+
std::cout << "cudaSetDevice Time is " << std::chrono::duration<float, std::milli>(setDevice_stop - setDevice_start).count() << " ms" << std::endl;
72+
73+
for (int i = 0; i < iterations; ++i)
7574
{
76-
cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1);
77-
cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1);
78-
}
79-
auto ioRead_stop = std::chrono::steady_clock::now();
80-
ioReadTime = std::chrono::duration<float, std::micro>(ioRead_stop - ioRead_start).count();
75+
// Read images using OpenCV
76+
cv::Mat limg, rimg;
77+
auto ioRead_start = std::chrono::steady_clock::now();
78+
if (imgSet)
79+
{
80+
cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1);
81+
cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1);
82+
}
83+
else
84+
{
85+
cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1);
86+
cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1);
87+
}
88+
auto ioRead_stop = std::chrono::steady_clock::now();
89+
ioReadTime = std::chrono::duration<float, std::micro>(ioRead_stop - ioRead_start).count();
8190

82-
unsigned int w = limg.cols;
83-
unsigned int h = limg.rows;
84-
std::cout << "Image size = (" << w << "," << h << ")" << std::endl;
91+
unsigned int w = limg.cols;
92+
unsigned int h = limg.rows;
93+
std::cout << "Image size = (" << w << "," << h << ")" << std::endl;
8594

86-
// Initial Cuda images and download images to device
87-
std::cout << "Initializing data..." << std::endl;
88-
cudaSetDevice(0);
89-
CudaImage img1, img2;
95+
// Initial Cuda images and download images to device
96+
std::cout << "Initializing data..." << std::endl;
97+
// cudaSetDevice(0);
98+
CudaImage img1, img2;
9099

91-
img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data);
92-
img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data);
93-
img1.Download(imageInitTime);
94-
img2.Download(imageInitTime);
100+
img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data);
101+
img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data);
102+
img1.Download(imageInitTime);
103+
img2.Download(imageInitTime);
95104

96-
// Extract Sift features from images
97-
SiftData siftData1, siftData2;
98-
float initBlur = 1.0f;
99-
float thresh = (imgSet ? 4.5f : 2.0f);
105+
// Extract Sift features from images
106+
SiftData siftData1, siftData2;
107+
float initBlur = 1.0f;
108+
float thresh = (imgSet ? 4.5f : 2.0f);
100109

101-
InitSiftData(siftData1, imageInitTime, 32768, true, true);
102-
InitSiftData(siftData2, imageInitTime, 32768, true, true);
110+
InitSiftData(siftData1, imageInitTime, 32768, true, true);
111+
InitSiftData(siftData2, imageInitTime, 32768, true, true);
103112

104-
// A bit of benchmarking
105-
// for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
106-
float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false);
107-
for (int i = 0; i < 50; i++)
108-
{
109-
float time = 0.0f; // set total time to init time
110-
ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
111-
extractSiftTime += time;
112-
time = 0.0f;
113-
ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
114-
extractSiftTime += time;
115-
}
116-
FreeSiftTempMemory(memoryTmp);
113+
// A bit of benchmarking
114+
// for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) {
115+
float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false);
116+
for (int i = 0; i < 50; i++)
117+
{
118+
float time = 0.0f; // set total time to init time
119+
ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
120+
extractSiftTime += time;
121+
time = 0.0f;
122+
ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp);
123+
extractSiftTime += time;
124+
}
125+
FreeSiftTempMemory(memoryTmp);
117126

118-
// Match Sift features and find a homography
119-
for (int i = 0; i < 1; i++)
120-
MatchSiftData(siftData1, siftData2, matchingTime);
121-
float homography[9];
122-
int numMatches;
123-
FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0);
124-
int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0);
125-
float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts);
127+
// Match Sift features and find a homography
128+
for (int i = 0; i < 1; i++)
129+
MatchSiftData(siftData1, siftData2, matchingTime);
130+
float homography[9];
131+
int numMatches;
132+
FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0);
133+
int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0);
134+
float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts);
126135

127-
std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
128-
std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n"
129-
<< std::endl;
136+
std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl;
137+
std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n"
138+
<< std::endl;
130139

131140
#ifdef DEVICE_TIMER
132-
totTime = imageInitTime + extractSiftTime + matchingTime;
141+
totTime = imageInitTime + extractSiftTime + matchingTime;
133142

134-
std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
135-
std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
136-
std::cout << "Matching time = " << matchingTime / 1000 << " ms"
137-
<< "\n"
138-
<< std::endl;
139-
std::cout << "Total Deivce Time = " << totTime / 1000 << " ms"
140-
<< "\n"
141-
<< std::endl;
143+
std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl;
144+
std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl;
145+
std::cout << "Matching time = " << matchingTime / 1000 << " ms"
146+
<< "\n"
147+
<< std::endl;
148+
std::cout << "Total Deivce Time = " << totTime / 1000 << " ms"
149+
<< "\n"
150+
<< std::endl;
142151
#endif
143152

144-
// data validation
145-
auto dataVerficationTimer_start = std::chrono::steady_clock::now();
146-
int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
147-
auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
148-
dataVerificationTime = std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
149-
// // Print out and store summary data
150-
// // PrintMatchData(siftData1, siftData2, img1);
151-
// cv::imwrite("data/limg_pts.pgm", limg);
153+
// data validation
154+
auto dataVerficationTimer_start = std::chrono::steady_clock::now();
155+
data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage);
156+
auto dataVerficationTimer_stop = std::chrono::steady_clock::now();
157+
dataVerificationTime += std::chrono::duration<float, std::micro>(dataVerficationTimer_stop - dataVerficationTimer_start).count();
158+
// // Print out and store summary data
159+
// // PrintMatchData(siftData1, siftData2, img1);
160+
// cv::imwrite("data/limg_pts.pgm", limg);
152161

153-
// MatchAll(siftData1, siftData2, homography);
154-
155-
// Free Sift data from device
156-
FreeSiftData(siftData1);
157-
FreeSiftData(siftData2);
162+
// MatchAll(siftData1, siftData2, homography);
158163

164+
// Free Sift data from device
165+
FreeSiftData(siftData1);
166+
FreeSiftData(siftData2);
167+
}
159168
auto totalProgTimer_end = std::chrono::steady_clock::now();
160169
float totalProgramTime = std::chrono::duration<float, std::micro>(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime;
161-
std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms"
170+
std::cout << "Avg workload time = " << totalProgramTime / (1000 * iterations) << " ms"
162171
<< "\n"
163172
<< std::endl;
164173
return data_verification_flag;

cudaSift/HIP/cudaSiftH.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double init
244244
#endif
245245
}
246246
#endif
247-
printf("Number of Points after sift extraction = %d\n\n", siftData.numPts);
247+
// printf("Number of Points after sift extraction = %d\n\n", siftData.numPts);
248248
}
249249

250250
int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale,

0 commit comments

Comments
 (0)