Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 5194ea4

Browse files
Extract CudaProfiler from test_harness and graduate to cuda.h
1 parent ff658b9 commit 5194ea4

File tree

3 files changed

+25
-23
lines changed

3 files changed

+25
-23
lines changed

include/tc/core/cuda/cuda.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <stdexcept>
2828

2929
#include <cuda.h>
30+
#include <cuda_profiler_api.h>
3031
#include <cuda_runtime.h>
3132

3233
#include <glog/logging.h>
@@ -70,6 +71,8 @@
7071

7172
namespace tc {
7273

74+
DECLARE_bool(use_nvprof);
75+
7376
struct WithDevice {
7477
WithDevice(size_t g) : newGpu(g) {
7578
int dev;
@@ -111,4 +114,17 @@ class CudaGPUInfo {
111114
std::vector<size_t> sharedMemSizes_;
112115
};
113116

117+
struct CudaProfiler {
118+
CudaProfiler() {
119+
if (FLAGS_use_nvprof) {
120+
cudaProfilerStart();
121+
}
122+
}
123+
~CudaProfiler() {
124+
if (FLAGS_use_nvprof) {
125+
cudaProfilerStop();
126+
}
127+
}
128+
};
129+
114130
} // namespace tc

src/core/cuda/cuda.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "tc/core/flags.h"
2727

2828
namespace tc {
29+
DEFINE_bool(use_nvprof, false, "Start / stop nvprof");
30+
2931
namespace {
3032

3133
std::tuple<std::vector<std::string>, std::vector<size_t>> init() {

test/test_harness.h

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
*/
1616
#pragma once
1717

18-
#include <cuda_profiler_api.h>
1918
#include <gtest/gtest.h>
2019
#include <mutex>
2120
#include <string>
@@ -32,23 +31,8 @@
3231
#include "tc/c2/tc_op.h"
3332
#include "tc/core/cuda/cuda.h"
3433

35-
DEFINE_bool(use_nvprof, false, "Start / stop nvprof");
36-
3734
namespace caffe2 {
3835

39-
struct CudaProfiler {
40-
CudaProfiler() {
41-
if (FLAGS_use_nvprof) {
42-
cudaProfilerStart();
43-
}
44-
}
45-
~CudaProfiler() {
46-
if (FLAGS_use_nvprof) {
47-
cudaProfilerStop();
48-
}
49-
}
50-
};
51-
5236
caffe2::TensorCPU context2tensor(caffe2::CPUContext& ctx) {
5337
return caffe2::TensorCPU();
5438
}
@@ -315,7 +299,7 @@ struct TestHarness {
315299

316300
void RunReference() {
317301
ASSERT_TRUE(net_ref.get());
318-
CudaProfiler p;
302+
tc::CudaProfiler p;
319303
ASSERT_TRUE(net_ref->Run());
320304
}
321305

@@ -326,7 +310,7 @@ struct TestHarness {
326310

327311
void Run() {
328312
ASSERT_TRUE(op_test.get());
329-
CudaProfiler p;
313+
tc::CudaProfiler p;
330314
ASSERT_TRUE(op_test->Run());
331315
}
332316

@@ -406,7 +390,7 @@ struct TestHarness {
406390
unique_ptr<OperatorBase> op_g(CreateOperator(g_op, &w));
407391
ASSERT_TRUE(op_g.get());
408392
{
409-
CudaProfiler p;
393+
tc::CudaProfiler p;
410394
ASSERT_TRUE(op_g->Run());
411395
}
412396
}
@@ -424,7 +408,7 @@ struct TestHarness {
424408
unique_ptr<NetBase> ref_net(CreateNet(ref_net_def, &w1));
425409
ASSERT_TRUE(ref_net.get());
426410
{
427-
CudaProfiler p;
411+
tc::CudaProfiler p;
428412
ASSERT_TRUE(ref_net->Run());
429413
}
430414

@@ -433,7 +417,7 @@ struct TestHarness {
433417
unique_ptr<NetBase> net(CreateNet(net_def, &w2));
434418
ASSERT_TRUE(net.get());
435419
{
436-
CudaProfiler p;
420+
tc::CudaProfiler p;
437421
ASSERT_TRUE(net->Run());
438422
}
439423

@@ -467,7 +451,7 @@ struct TestHarness {
467451
unique_ptr<NetBase> net(CreateNet(net_def, &w1));
468452
ASSERT_TRUE(net.get());
469453
{
470-
CudaProfiler p;
454+
tc::CudaProfiler p;
471455
ASSERT_TRUE(net->Run());
472456
}
473457
RunGradient(w1, *net_def.mutable_op()->Mutable(0));
@@ -477,7 +461,7 @@ struct TestHarness {
477461
unique_ptr<OperatorBase> op(CreateOperator(op_def, &w2));
478462
ASSERT_TRUE(op.get());
479463
{
480-
CudaProfiler p;
464+
tc::CudaProfiler p;
481465
ASSERT_TRUE(op->Run());
482466
}
483467
OperatorDef def = op_def;

0 commit comments

Comments
 (0)