Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 67b5056

Browse files
Merge pull request #317 from facebookresearch/pr/warning
shut up some comparison between signed and unsigned integer expressions warnings
2 parents 20f696b + 68ac216 commit 67b5056

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+284
-272
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ if (WITH_CAFFE2)
176176
include_directories(third-party/caffe2/third_party/eigen)
177177

178178
find_path(CAFFE2_INCLUDE_DIR NAMES caffe2)
179-
include_directories(${CAFFE2_INCLUDE_DIR})
179+
include_directories(SYSTEM ${CAFFE2_INCLUDE_DIR})
180180
find_library(CAFFE2_CPU_LIBRARIES NAMES caffe2 PATHS ${CMAKE_INSTALL_PREFIX} PATH_SUFFIXES lib lib64)
181181
message(STATUS "Found Caffe2_CPU: ${CAFFE2_CPU_LIBRARIES}")
182182
if (WITH_CUDA)

tc/aten/aten_compiler-inl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ void prepareOutputs(
4949
throw lang::ErrorReport(func) << "expected " << tensorInfo.size()
5050
<< " outputs but found " << outputs.size();
5151
}
52-
for (int i = 0; i < tensorInfo.size(); ++i) {
52+
for (size_t i = 0; i < tensorInfo.size(); ++i) {
5353
auto info = tensorInfo[i];
5454
auto stype = at::toScalarType(info->dtype);
5555
if (outputs.size() < tensorInfo.size()) {
@@ -121,7 +121,7 @@ void ATenCompilationUnit<ExecutorType>::uncheckedRun(
121121
constexpr auto kReservedSize = 8;
122122
std::vector<const void*> I(kReservedSize, nullptr);
123123
std::vector<void*> O(kReservedSize, nullptr);
124-
int i;
124+
size_t i;
125125
for (i = 0; i < inputs.size(); ++i) {
126126
if (i < kReservedSize) {
127127
I[i] = inputs[i].data_ptr();

tc/autotuner/genetic_autotuner.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ llvm::Optional<CudaMappingOptions> GeneticAutotuner::tune(
9898
CudaMappingOptions baseMapping,
9999
std::vector<CudaMappingOptions> startingPoints,
100100
const TuningParameterFixer& fixedParams) {
101-
CHECK_EQ(1, tcNameMap_.count(tcName)) << "Error looking up " << tcName;
101+
CHECK_EQ(1u, tcNameMap_.count(tcName)) << "Error looking up " << tcName;
102102
enableOrLoadCache(cacheFileName);
103103

104104
if (FLAGS_tuner_gen_restore_from_proto && !(cacheFileName.empty())) {
105-
CHECK_GT(inputs.size(), 0);
105+
CHECK_GT(inputs.size(), 0u);
106106

107107
auto restoredCandidates = load(
108108
cacheFileName,
@@ -185,7 +185,7 @@ llvm::Optional<CudaMappingOptions> GeneticAutotuner::tune(
185185
ee.define(tc_);
186186
auto outputPtrs = ee.inferOutputTensorInfo(tcName, inputs.begin()->second);
187187

188-
CHECK_GT(inputs.size(), 0);
188+
CHECK_GT(inputs.size(), 0u);
189189
return tc::autotune::getBestOptions(
190190
canonicalTc(tcNameMap_.at(tcName)), inputs.begin()->second, outputPtrs);
191191
}

tc/autotuner/genetic_search.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ template <typename RNG>
4949
void mutate(
5050
CandidateConfiguration& candidate,
5151
double rate,
52-
int iterations,
52+
size_t iterations,
5353
RNG& rng) {
5454
auto shouldMutate = [&]() -> bool {
5555
return std::discrete_distribution<int>{static_cast<double>(100 - rate),
@@ -179,7 +179,7 @@ GeneticSearch::GeneticSearch(
179179
}
180180
if (kMaxPopulationSize - population.size() > 0) {
181181
auto oldSize = population.size();
182-
for (int i = oldSize; i < kMaxPopulationSize; ++i) {
182+
for (size_t i = oldSize; i < kMaxPopulationSize; ++i) {
183183
population.emplace_back(
184184
make_unique<CandidateConfiguration>(*population.front()));
185185
}
@@ -202,7 +202,7 @@ GeneticSearch::GeneticSearch(
202202
rng{std::random_device{}()} {
203203
restoreRngState(rng);
204204
VALIDATE();
205-
for (int i = 0; i < kMaxPopulationSize; ++i) {
205+
for (size_t i = 0; i < kMaxPopulationSize; ++i) {
206206
population.emplace_back(make_unique<CandidateConfiguration>(conf));
207207
}
208208
randomizePopulation(population.begin(), population.end(), rng);
@@ -320,18 +320,18 @@ void GeneticSearch::updateParameters() {
320320
"when autotuning a TC operating on small tensors. The next "
321321
"generation will be randomly initialized.";
322322
population.resize(0);
323-
for (int i = 0; i < kMaxPopulationSize; ++i) {
323+
for (size_t i = 0; i < kMaxPopulationSize; ++i) {
324324
population.emplace_back(
325325
make_unique<CandidateConfiguration>(lastBestConf));
326326
}
327327
// Don't lose the first one which was the best from before
328-
CHECK_LT(0, population.size());
328+
CHECK_LT(0u, population.size());
329329
randomizePopulation(population.begin() + 1, population.end(), rng);
330330
return;
331331
}
332332

333333
breed();
334-
for (int i = kNumberElites; i < population.size(); ++i) {
334+
for (size_t i = kNumberElites; i < population.size(); ++i) {
335335
mutate(*population[i], kMutationRate, kMutateIterations, rng);
336336
}
337337
}

tc/autotuner/genetic_tuning_harness.cc

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ std::vector<size_t> inputDivisorsAndPowers2(
146146
}
147147

148148
size_t largestDim(const std::vector<const DLTensor*>& inputs) {
149-
CHECK_GE(inputs.size(), 0);
149+
CHECK_GE(inputs.size(), 0u);
150150
auto maxElement = std::max_element(
151151
inputs.begin(), inputs.end(), [](const DLTensor* a, const DLTensor* b) {
152152
return a->ndim < b->ndim;
@@ -157,7 +157,7 @@ size_t largestDim(const std::vector<const DLTensor*>& inputs) {
157157
} // namespace
158158

159159
void GeneticTunerHarness::setupTuningParameters() {
160-
CHECK_GT(kInputs_.size(), 0);
160+
CHECK_GT(kInputs_.size(), 0u);
161161
auto range = inputDivisorsAndPowers2(kInputs_.begin()->second);
162162
auto rangeUpTo64 = filterHigherThan(range, 64);
163163

@@ -208,6 +208,9 @@ std::vector<size_t> parseGpus() {
208208
LOG(GSTREAM) << line; \
209209
}
210210

211+
constexpr size_t GeneticTunerHarness::kEarlyPruneFactor;
212+
constexpr size_t GeneticTunerHarness::kCatastrophicPerfFactor;
213+
211214
// This function is ran on a single pre-determined GPU, in a single thread
212215
// It takes the input/output DLTensor objects that reside on that GPU
213216
//
@@ -222,7 +225,7 @@ bool GeneticTunerHarness::warmupOrPrune(
222225
const std::vector<DLTensor*>& outputs,
223226
const std::vector<const DLTensor*>& inputs,
224227
size_t handle,
225-
size_t bestTimeSoFar) {
228+
Duration bestTimeSoFar) {
226229
// Pruning based on number of threads: if you don't hit at least k warps
227230
// (default k = 8; 256 total threads, controlled by
228231
// FLAGS_tuner_min_launch_total_threads) then it's likely the kernel is not
@@ -276,10 +279,8 @@ bool GeneticTunerHarness::warmupOrPrune(
276279
}
277280

278281
// 1.b.
279-
constexpr size_t kCatastrophicPerfFactor = 100;
280-
if (bestTimeSoFar < std::numeric_limits<size_t>::max() and
281-
prof >= std::chrono::microseconds(
282-
(kCatastrophicPerfFactor * bestTimeSoFar))) {
282+
if (bestTimeSoFar < Duration::max() and
283+
prof >= kCatastrophicPerfFactor * bestTimeSoFar) {
283284
return true;
284285
}
285286

@@ -291,8 +292,8 @@ bool GeneticTunerHarness::warmupOrPrune(
291292
// 2. After reasonable warmup, look at the performance and prune with
292293
// kEarlyPruneFactor
293294
prof = engine.run(handle, inputs, outputs, true);
294-
if (bestTimeSoFar < std::numeric_limits<size_t>::max() and
295-
prof >= std::chrono::microseconds((kEarlyPruneFactor * bestTimeSoFar))) {
295+
if (bestTimeSoFar < Duration::max() and
296+
prof >= kEarlyPruneFactor * bestTimeSoFar) {
296297
return true;
297298
}
298299

@@ -346,9 +347,9 @@ void GeneticTunerHarness::doGpuWork(
346347
ExecutorType& engine,
347348
Printer& printer) {
348349
WithDevice wd(gpu);
349-
CHECK_EQ(1, kInputs_.count(gpu));
350+
CHECK_EQ(1u, kInputs_.count(gpu));
350351
auto& inputs = kInputs_.at(gpu);
351-
CHECK_EQ(1, outputs_.count(gpu));
352+
CHECK_EQ(1u, outputs_.count(gpu));
352353
auto& outputs = outputs_.at(gpu);
353354

354355
while (true) {
@@ -394,7 +395,7 @@ void GeneticTunerHarness::doGpuWork(
394395

395396
std::vector<Duration> runtimes;
396397
try {
397-
size_t bestTimeSoFar;
398+
Duration bestTimeSoFar;
398399
{
399400
std::lock_guard<std::mutex> lock(bestTimeMtx_);
400401
bestTimeSoFar = bestTime_;
@@ -451,8 +452,8 @@ void GeneticTunerHarness::doGpuWork(
451452
// Save best time under lock
452453
{
453454
std::lock_guard<std::mutex> lock(bestTimeMtx_);
454-
if (prof_us < bestTime_) {
455-
bestTime_ = prof_us;
455+
if (prof < bestTime_) {
456+
bestTime_ = prof;
456457
bestCudaMappingOptions_ = options;
457458
}
458459
}
@@ -484,7 +485,7 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
484485
currentCompilationJob_.store(0);
485486
numEvaluations_.store(0);
486487
readyToEvaluate_.resize(0);
487-
for (int i = 0; i < kMaxPopulationSize; ++i) {
488+
for (size_t i = 0; i < kMaxPopulationSize; ++i) {
488489
readyToEvaluate_.emplace_back();
489490
readyToEvaluate_[i].store(false);
490491
}
@@ -509,7 +510,7 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
509510
cpuCompilationThread.join();
510511
}
511512
});
512-
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
513+
for (size_t i = 0; i < FLAGS_tuner_threads; ++i) {
513514
cpuCompilationThreads.emplace_back(
514515
[this, &engine]() { this->doCompile(engine); });
515516
}

tc/autotuner/genetic_tuning_harness.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class GeneticTunerHarness {
6363
const std::vector<DLTensor*>& outputs,
6464
const std::vector<const DLTensor*>& inputs,
6565
size_t handle,
66-
size_t bestTimeSoFar);
66+
Duration bestTimeSoFar);
6767

6868
/// Helper function to delegate compiling on the cpu to different threads
6969
template <typename ExecutorType>
@@ -85,7 +85,8 @@ class GeneticTunerHarness {
8585
public:
8686
static constexpr int kReducedWarmupIterations = 2;
8787
static constexpr int kReducedBenchmarkIterations = 10;
88-
static constexpr int kEarlyPruneFactor = 5;
88+
static constexpr size_t kEarlyPruneFactor = 5;
89+
static constexpr size_t kCatastrophicPerfFactor = 100;
8990

9091
const size_t kMaxPopulationSize;
9192
const uint8_t kCrossOverRate;
@@ -96,7 +97,7 @@ class GeneticTunerHarness {
9697

9798
private:
9899
std::mutex bestTimeMtx_;
99-
size_t bestTime_ = std::numeric_limits<size_t>::max();
100+
Duration bestTime_ = Duration::max();
100101
CudaMappingOptions bestCudaMappingOptions_;
101102

102103
const lang::TreeRef kTc_;

tc/autotuner/parameters.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ RangeParameter& RangeParameter::operator=(const RangeParameter& other) {
9797
}
9898

9999
void BoolParameter::selectOption(size_t idx) {
100-
CHECK_LE(idx, 1);
100+
CHECK_LE(idx, 1u);
101101
selectValue(idx);
102102
}
103103

tc/benchmarks/benchmark_fixture.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,14 @@ struct Benchmark : public ::testing::Test {
121121
auto handle = atCompl.compile(name, inputs, mappingOptions);
122122
atCompl.run(name, inputs, outputs, handle);
123123
EXPECT_TRUE(checkFun(inputs, outputs));
124-
for (int i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
124+
for (size_t i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
125125
atCompl.run(name, inputs, outputs, handle);
126126
}
127127
std::vector<tc::Duration> kernelTimes;
128128
kernelTimes.reserve(tc::FLAGS_benchmark_iterations);
129129
std::vector<tc::Duration> totalTimes;
130130
totalTimes.reserve(tc::FLAGS_benchmark_iterations);
131-
for (int i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
131+
for (size_t i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
132132
kernelTimes.push_back(atCompl.run(name, inputs, outputs, handle, true));
133133
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
134134
auto time(std::chrono::system_clock::now());
@@ -201,12 +201,12 @@ struct Benchmark : public ::testing::Test {
201201
template <typename InitFunction, typename InplaceFunction>
202202
void Reference(InitFunction init, InplaceFunction compute) {
203203
auto res = init();
204-
for (int i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
204+
for (size_t i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
205205
compute(res);
206206
}
207207
std::vector<tc::Duration> times;
208208
times.reserve(tc::FLAGS_benchmark_iterations);
209-
for (int i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
209+
for (size_t i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
210210
auto time(std::chrono::system_clock::now());
211211
compute(res);
212212
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
@@ -278,14 +278,14 @@ struct Benchmark : public ::testing::Test {
278278
std::vector<at::Tensor> outputs;
279279
atCompl.run(name, inputs, outputs, handle);
280280
EXPECT_TRUE(checkFun(inputs, outputs));
281-
for (int i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
281+
for (size_t i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
282282
atCompl.run(name, inputs, outputs, handle);
283283
}
284284
std::vector<tc::Duration> kernelTimes;
285285
kernelTimes.reserve(tc::FLAGS_benchmark_iterations);
286286
std::vector<tc::Duration> totalTimes;
287287
totalTimes.reserve(tc::FLAGS_benchmark_iterations);
288-
for (int i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
288+
for (size_t i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
289289
kernelTimes.push_back(atCompl.run(name, inputs, outputs, handle, true));
290290
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
291291
auto time(std::chrono::system_clock::now());
@@ -389,14 +389,14 @@ struct Benchmark : public ::testing::Test {
389389
std::vector<at::Tensor> outputs;
390390
atCompl.run(kernelName, inputs, outputs, handle);
391391
EXPECT_TRUE(checkFun(inputs, outputs));
392-
for (int i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
392+
for (size_t i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
393393
atCompl.run(kernelName, inputs, outputs, handle);
394394
}
395395
std::vector<tc::Duration> kernelTimes;
396396
kernelTimes.reserve(tc::FLAGS_benchmark_iterations);
397397
std::vector<tc::Duration> totalTimes;
398398
totalTimes.reserve(tc::FLAGS_benchmark_iterations);
399-
for (int i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
399+
for (size_t i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
400400
kernelTimes.push_back(
401401
atCompl.run(kernelName, inputs, outputs, handle, true));
402402
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());

tc/c2/tc_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class TcOp : public Operator<Context> {
8585
virtual void setupDefaultGradCudaMappingOptions() {}
8686

8787
void prepareOutputs(const std::vector<const DLTensor*> tensorInfo) {
88-
for (int i = 0; i < tensorInfo.size(); ++i) {
88+
for (size_t i = 0; i < tensorInfo.size(); ++i) {
8989
auto info = tensorInfo[i];
9090
std::vector<int64_t> shape(info->shape, info->shape + info->ndim);
9191
Output(i)->Resize(shape);

tc/core/cuda/cuda_mapping_options-inl.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ namespace tc {
2020
// CudaDimView & CudaDim
2121
//
2222
CudaDim::CudaDim(std::vector<uint64_t> il) : ownedProto_(), view(ownedProto_) {
23-
CHECK_GT(il.size(), 0) << "list of values in CudaDimView must be non-empty";
24-
CHECK_LE(il.size(), 3) << "at most 3 values allowed in CudaDimView";
23+
CHECK_GT(il.size(), 0u) << "list of values in CudaDimView must be non-empty";
24+
CHECK_LE(il.size(), 3u) << "at most 3 values allowed in CudaDimView";
2525

2626
switch (il.size()) {
2727
case 3:
@@ -80,13 +80,13 @@ std::array<uint64_t, 3> CudaDimView::extractDefaultedArray() const {
8080
CudaDimView::defaultDim,
8181
CudaDimView::defaultDim};
8282
auto v = extractVector();
83-
CHECK_LE(v.size(), 3);
83+
CHECK_LE(v.size(), 3u);
8484
std::copy(v.begin(), v.end(), arr.begin());
8585
return arr;
8686
}
8787

8888
ValueAccessor<uint64_t> CudaDimView::operator[](size_t i) {
89-
CHECK_LT(i, 3) << "index overflow";
89+
CHECK_LT(i, 3u) << "index overflow";
9090
if (i == 0) {
9191
return ValueAccessor<uint64_t>(
9292
[this](uint64_t u) { this->proto.set_x(u); },
@@ -109,7 +109,7 @@ ValueAccessor<uint64_t> CudaDimView::operator[](size_t i) {
109109
}
110110

111111
uint64_t CudaDimView::operator[](size_t i) const {
112-
CHECK_LT(i, 3) << "index overflow";
112+
CHECK_LT(i, 3u) << "index overflow";
113113
if (i == 0) {
114114
return proto.x();
115115
} else if (i == 1) {
@@ -192,8 +192,8 @@ CudaMappingOptions::mapToThreads(uint64_t x, uint64_t y, uint64_t z) {
192192

193193
CudaMappingOptions& CudaMappingOptions::mapToThreads(
194194
const std::vector<uint64_t>& threads) {
195-
CHECK_GT(threads.size(), 0) << "expected at least one thread size";
196-
CHECK_LE(threads.size(), 3) << "expected at most three thread sizes";
195+
CHECK_GT(threads.size(), 0u) << "expected at least one thread size";
196+
CHECK_LE(threads.size(), 3u) << "expected at most three thread sizes";
197197

198198
uint64_t x = threads[0];
199199
uint64_t y = threads.size() > 1 ? threads[1] : CudaDimView::defaultDim;
@@ -216,8 +216,8 @@ CudaMappingOptions::mapToBlocks(uint64_t x, uint64_t y, uint64_t z) {
216216

217217
CudaMappingOptions& CudaMappingOptions::mapToBlocks(
218218
const std::vector<uint64_t>& blocks) {
219-
CHECK_GT(blocks.size(), 0) << "expected at least one thread size";
220-
CHECK_LE(blocks.size(), 3) << "expected at most three thread sizes";
219+
CHECK_GT(blocks.size(), 0u) << "expected at least one thread size";
220+
CHECK_LE(blocks.size(), 3u) << "expected at most three thread sizes";
221221

222222
uint64_t x = blocks[0];
223223
uint64_t y = blocks.size() > 1 ? blocks[1] : CudaDimView::defaultDim;

0 commit comments

Comments
 (0)