@@ -248,52 +248,68 @@ void TuningHarness<Backend>::runOneIteration(
248
248
CHECK (executors_.empty ());
249
249
CHECK (configurations_.empty ());
250
250
auto & candidates = searchStrategy.candidatesOfStep (step);
251
- // Initialize for this round
252
- currentCompilationJob_.store (0 );
253
- numEvaluations_.store (0 );
254
- Printer printer (
255
- iteration,
256
- step,
257
- candidates.size (),
258
- currentCompilationJob_,
259
- numEvaluations_);
260
- auto logIterations = FLAGS_tuner_gen_log_generations;
261
- ScopeGuard sgPrinter ([logIterations, &printer]() {
262
- printer.stop ();
263
- if (logIterations) {
264
- printer.printAll ();
265
- }
251
+ auto firstNew = std::partition (
252
+ candidates.begin (),
253
+ candidates.end (),
254
+ [](const std::unique_ptr<CandidateConfiguration>& c) {
255
+ return c->runtime != Duration::zero ();
256
+ });
257
+ GeneticSearch::Population newCandidates (
258
+ std::distance (firstNew, candidates.end ()));
259
+ std::move (firstNew, candidates.end (), newCandidates.begin ());
260
+ ScopeGuard candidatesSG ([&]() {
261
+ std::move (newCandidates.begin (), newCandidates.end (), firstNew);
266
262
});
267
263
268
- // Just spawn and join new threads for each iteration
269
- std::vector<std::thread> cpuCompilationThreads;
270
- cpuCompilationThreads.reserve (FLAGS_tuner_threads);
271
- ScopeGuard sgCompilationThreads ([&cpuCompilationThreads]() {
272
- for (auto & cpuCompilationThread : cpuCompilationThreads) {
273
- cpuCompilationThread.join ();
274
- }
275
- });
276
- for (size_t i = 0 ; i < FLAGS_tuner_threads; ++i) {
277
- cpuCompilationThreads.emplace_back (
278
- [this , &candidates]() { this ->doCompile (candidates); });
279
- }
264
+ if (not newCandidates.empty ()) {
265
+ auto populationSize = newCandidates.size ();
266
+ // Initialize for this round
267
+ currentCompilationJob_.store (0 );
268
+ numEvaluations_.store (0 );
269
+ Printer printer (
270
+ iteration,
271
+ step,
272
+ populationSize,
273
+ currentCompilationJob_,
274
+ numEvaluations_);
275
+ auto logIterations = FLAGS_tuner_gen_log_generations;
276
+ ScopeGuard sgPrinter ([logIterations, &printer]() {
277
+ printer.stop ();
278
+ if (logIterations) {
279
+ printer.printAll ();
280
+ }
281
+ });
280
282
281
- // Just spawn and join new threads for each device
282
- std::vector<std::thread> workerThreads;
283
- workerThreads.reserve (devices.size ());
284
- LOG_IF (INFO, tc::FLAGS_debug_tuner)
285
- << " Start evaluation: " << devices.size () << " " << executors_.size ()
286
- << " " << configurations_.size ();
287
- ScopeGuard sgDeviceWorkerThreads ([&workerThreads]() {
288
- for (auto & workerThread : workerThreads) {
289
- workerThread.join ();
283
+ // Just spawn and join new threads for each iteration
284
+ std::vector<std::thread> cpuCompilationThreads;
285
+ cpuCompilationThreads.reserve (FLAGS_tuner_threads);
286
+ ScopeGuard sgCompilationThreads ([&cpuCompilationThreads]() {
287
+ for (auto & cpuCompilationThread : cpuCompilationThreads) {
288
+ cpuCompilationThread.join ();
289
+ }
290
+ });
291
+ for (size_t i = 0 ; i < FLAGS_tuner_threads; ++i) {
292
+ cpuCompilationThreads.emplace_back (
293
+ [this , &newCandidates]() { this ->doCompile (newCandidates); });
290
294
}
291
- });
292
- auto populationSize = candidates.size ();
293
- for (auto device : devices) {
294
- workerThreads.emplace_back ([this , device, populationSize, &printer]() {
295
- this ->doEvaluate (device, populationSize, printer);
295
+
296
+ // Just spawn and join new threads for each device
297
+ std::vector<std::thread> workerThreads;
298
+ workerThreads.reserve (devices.size ());
299
+ LOG_IF (INFO, tc::FLAGS_debug_tuner)
300
+ << " Start evaluation: " << devices.size () << " "
301
+ << executors_.size () << " " << configurations_.size ();
302
+ ScopeGuard sgDeviceWorkerThreads ([&workerThreads]() {
303
+ for (auto & workerThread : workerThreads) {
304
+ workerThread.join ();
305
+ }
296
306
});
307
+ for (auto device : devices) {
308
+ workerThreads.emplace_back (
309
+ [this , device, populationSize, &printer]() {
310
+ this ->doEvaluate (device, populationSize, printer);
311
+ });
312
+ }
297
313
}
298
314
}
299
315
searchStrategy.finishStep (step);
0 commit comments