Skip to content

Commit 5c4cd52

Browse files
authored
Merge pull request #96 from rafbiels/dl-cifar-fix-host-overhead
[dl-cifar] Reduce the impact of host computations on benchmark results
2 parents 6555657 + 39e398a commit 5c4cd52

File tree

4 files changed

+25
-10
lines changed

4 files changed

+25
-10
lines changed

dl-cifar/CUDA/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ option(USE_SM "Specifies which streaming multiprocessor archite
3434
option(DEVICE_TIMER "Build using Device Timer" OFF)
3535

3636
set(DEF_WL_CXX_FLAGS " ")
37-
set(DEF_GENERAL_CXX_FLAGS " -O2 ")
37+
set(DEF_GENERAL_CXX_FLAGS " -O3 -ffast-math ")
3838
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
3939

4040
set(SOURCES
@@ -114,4 +114,4 @@ set(CUDA_SEPARABLE_COMPILATION ON)
114114
message(STATUS "CXX Compilation flags to: ${CMAKE_CXX_FLAGS}")
115115
link_libraries(stdc++fs cublas cudnn)
116116
cuda_add_executable(${PROJECT_NAME} ${SOURCES})
117-
target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})
117+
target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})

dl-cifar/HIP/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ option(DEVICE_TIMER "Build using Device Timer" OFF)
3333

3434

3535
set(DEF_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ")
36-
set(DEF_GENERAL_CXX_FLAGS " -Wall -O3 -Wextra ")
36+
set(DEF_GENERAL_CXX_FLAGS " -Wall -O3 -ffast-math -Wextra ")
3737
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
3838

3939

dl-cifar/common/image_processing.h

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <cassert>
2929
#include <vector>
3030
#include <exception>
31+
#include <memory>
3132
#include "tracing.h"
3233
#include"handle.h"
3334
#include "upsample.h"
@@ -102,10 +103,24 @@ namespace dl_cifar::common {
102103
static void initImage(float* image, int imageSize) {
103104
Tracer::func_begin("ImageProcessor::initImage");
104105

105-
unsigned seed = 123456789;
106-
for (int index = 0; index < imageSize; index++) {
106+
static size_t cacheSize{0};
107+
static std::unique_ptr<float[]> cacheImage{};
108+
static unsigned seed = 123456789;
109+
110+
// grow the cache allocation to image size
111+
if (imageSize > cacheSize) {
112+
auto newCacheImage = std::make_unique<float[]>(imageSize);
113+
std::memcpy(newCacheImage.get(), cacheImage.get(), cacheSize*sizeof(float));
114+
cacheImage.reset(newCacheImage.release());
115+
}
116+
117+
// fill image with cached data and compute the remaining part
118+
std::memcpy(image, cacheImage.get(), std::min(cacheSize,static_cast<size_t>(imageSize))*sizeof(float));
119+
while (cacheSize < imageSize) {
120+
++cacheSize;
107121
seed = (1103515245 * seed + 12345) & 0xffffffff;
108-
image[index] = float(seed) * 2.3283064e-10; // 2^-32
122+
cacheImage[cacheSize-1] = float(seed) * 2.3283064e-10; // 2^-32
123+
image[cacheSize-1] = cacheImage[cacheSize-1];
109124
}
110125
Tracer::func_end("ImageProcessor::initImage");
111126

dl-cifar/common/vit/vit.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,10 @@ namespace dl_cifar::common {
181181
ImageProcessor::resize(langHandle, d_cifarRawImgs, d_resizedImgs, selectedVitParams.batchSize,
182182
VitConfigs::cifarNoOfChannels, VitConfigs::cifarImgWidth, VitConfigs::cifarImgWidth,
183183
selectedVitParams.imgWidth, selectedVitParams.imgHeight);
184-
ImageProcessor::resizeInHost(langHandle, h_cifarRawImgs, h_resizedImgs, selectedVitParams.batchSize,
185-
VitConfigs::cifarNoOfChannels, VitConfigs::cifarImgWidth, VitConfigs::cifarImgWidth,
186-
selectedVitParams.imgWidth, selectedVitParams.imgHeight);
187-
langHandle->memCpyH2D(d_resizedImgs, h_resizedImgs, sizeof(float) * resizedSize, true);
184+
// ImageProcessor::resizeInHost(langHandle, h_cifarRawImgs, h_resizedImgs, selectedVitParams.batchSize,
185+
// VitConfigs::cifarNoOfChannels, VitConfigs::cifarImgWidth, VitConfigs::cifarImgWidth,
186+
// selectedVitParams.imgWidth, selectedVitParams.imgHeight);
187+
// langHandle->memCpyH2D(d_resizedImgs, h_resizedImgs, sizeof(float) * resizedSize, true);
188188

189189

190190

0 commit comments

Comments
 (0)