facebookresearch
diff --git a/‎tc/aten/aten_autotuner-inl.h
Lines changed: 2 additions & 4 deletions b/‎tc/aten/aten_autotuner-inl.h
Lines changed: 2 additions & 4 deletions
diff --git a/‎tc/aten/aten_autotuner.h
Lines changed: 1 addition & 2 deletions b/‎tc/aten/aten_autotuner.h
Lines changed: 1 addition & 2 deletions
diff --git a/‎tc/autotuner/autotuner-inl.h
Lines changed: 8 additions & 71 deletions b/‎tc/autotuner/autotuner-inl.h
Lines changed: 8 additions & 71 deletions
diff --git a/‎tc/autotuner/autotuner.h
Lines changed: 6 additions & 4 deletions b/‎tc/autotuner/autotuner.h
Lines changed: 6 additions & 4 deletions
diff --git a/‎tc/autotuner/options_cache-inl.h
Lines changed: 36 additions & 9 deletions b/‎tc/autotuner/options_cache-inl.h
Lines changed: 36 additions & 9 deletions
diff --git a/‎tc/autotuner/options_cache.h
Lines changed: 41 additions & 3 deletions b/‎tc/autotuner/options_cache.h
Lines changed: 41 additions & 3 deletions
diff --git a/‎tc/benchmarks/MLP_model.cc
Lines changed: 5 additions & 40 deletions b/‎tc/benchmarks/MLP_model.cc
Lines changed: 5 additions & 40 deletions
@@ -52,8 +52,7 @@ std::vector<typename Backend::MappingOptionsType>
 ATenAutotuner<Backend, Search>::tune(
     const std::string& tcName,
     const std::vector<at::Tensor>& inputs,
-    const typename Backend::MappingOptionsType& baseMapping,
-    const std::string& cacheFileName,
+    const std::vector<typename Backend::MappingOptionsType>& baseMappings,
     const tc::autotune::TuningParameterFixer& fixedParams) {
   // TODO: some checks that inputs memory lives on the proper Backend device
 
@@ -91,8 +90,7 @@ ATenAutotuner<Backend, Search>::tune(
       tcName,
       rawInputsPerDevice,
       rawOutputsPerDevice,
-      baseMapping,
-      cacheFileName,
+      baseMappings,
       fixedParams);
 }
 } // namespace aten
 
@@ -79,8 +79,7 @@ class ATenAutotuner : public tc::autotune::Autotuner<Backend, SearchStrategy> {
   std::vector<MappingOptionsType> tune(
       const std::string& tcEntryPoint,
       const std::vector<at::Tensor>& inputs,
-      const MappingOptionsType& baseMapping,
-      const std::string& cacheFileName = "",
+      const std::vector<MappingOptionsType>& baseMappings,
       const tc::autotune::TuningParameterFixer& fixedParams = {});
 
  protected:
 
@@ -321,45 +321,6 @@ namespace {
 volatile std::sig_atomic_t sigint_ = 0;
 volatile std::sig_atomic_t sigterm_ = 0;
 
-template <typename Backend>
-std::vector<typename Backend::MappingOptionsType> loadThroughCache(
-    lang::TreeRef tree,
-    std::shared_ptr<OptionsCache<Backend>> optionsCache,
-    const std::string& cacheFileName,
-    const std::vector<const DLConstTensor*>& inputs,
-    const size_t numCandidates) {
-  LOG_IF(INFO, FLAGS_debug_tuner)
-      << "Loading proto from: " << tc::makeOptionsFilename(cacheFileName)
-      << std::endl;
-  if (!cacheFileName.empty()) {
-    optionsCache->loadCacheFromFile(tc::makeOptionsFilename(cacheFileName));
-  }
-  auto outputs = tc::detail::inferOutputTensorInfo(tree, inputs);
-  return optionsCache->getTopKOptions(
-      canonicalTc(tree),
-      makeTensorInfoVector(inputs),
-      outputs,
-      Backend::backendString(),
-      numCandidates);
-}
-
-template <typename Backend>
-void storeTopKInCache(
-    const std::shared_ptr<OptionsCache<Backend>>& optionsCache,
-    const std::string& cacheFilename) {
-  if (cacheFilename.empty()) {
-    LOG_IF(INFO, FLAGS_debug_tuner)
-        << "No filepath provided, not saving cache" << std::endl;
-  } else {
-    LOG_IF(INFO, FLAGS_debug_tuner)
-        << "Dumping cache to " << tc::makeOptionsFilename(cacheFilename)
-        << std::endl;
-    OptionsCache<Backend> cache(*optionsCache);
-    cache.pruneKeepTopK(tc::FLAGS_tuner_save_best_candidates_count);
-    cache.storeCacheToFile(tc::makeOptionsFilename(cacheFilename));
-  }
-}
-
 void removeDuplicates(std::vector<size_t>& v) {
   std::sort(v.begin(), v.end());
   v.erase(std::unique(v.begin(), v.end()), v.end());
@@ -416,7 +377,7 @@ void setupTuningParameters(
 
 template <typename Backend, typename SearchStrategy>
 Autotuner<Backend, SearchStrategy>::Autotuner()
-    : optionsCache_(new OptionsCache<Backend>()) {}
+    : optionsCache(new OptionsCache<Backend>()) {}
 
 template <typename Backend, typename SearchStrategy>
 std::vector<typename Backend::MappingOptionsType>
@@ -425,8 +386,7 @@ Autotuner<Backend, SearchStrategy>::tune(
     const std::string& tcEntryPoint,
     const std::unordered_map<size_t, std::vector<const DLConstTensor*>>& inputs,
     std::unordered_map<size_t, std::vector<const DLTensor*>>& outputs,
-    const typename Backend::MappingOptionsType& baseMapping,
-    const std::string& cacheFileName,
+    const std::vector<typename Backend::MappingOptionsType>& baseMappings,
     const TuningParameterFixer& fixedParams) {
   std::map<std::string, lang::TreeRef> tcEntryPointMap(tc::detail::parse(tc));
   TC_CHECK_EQ(tcEntryPointMap.count(tcEntryPoint), 1u)
@@ -438,28 +398,13 @@ Autotuner<Backend, SearchStrategy>::tune(
   setupTuningParameters(inputs.begin()->second, modelConfiguration);
   modelConfiguration.fixParameters(fixedParams);
 
-  // Build starting points from baseMapping + whatever we recover from cache
-  std::vector<typename Backend::MappingOptionsType> startingPoints{baseMapping};
-  auto restoredCandidates = loadThroughCache<Backend>(
-      tcEntryPointMap.at(tcEntryPoint),
-      optionsCache_,
-      cacheFileName,
-      inputs.begin()->second,
-      FLAGS_tuner_gen_restore_number);
-  if (restoredCandidates.size() > 0) {
-    startingPoints.reserve(1 + restoredCandidates.size());
-    std::move(
-        restoredCandidates.begin(),
-        restoredCandidates.end(),
-        std::back_inserter(startingPoints));
-  }
-
   // Create initial configs based on options + model configuration
+  const std::vector<typename Backend::MappingOptionsType> options{baseMappings};
   std::vector<TuningConfiguration> configs;
-  configs.reserve(startingPoints.size());
+  configs.reserve(options.size());
   std::transform(
-      startingPoints.begin(),
-      startingPoints.end(),
+      options.begin(),
+      options.end(),
       std::back_inserter(configs),
       [this, &fixedParams, &modelConfiguration](
           const typename Backend::MappingOptionsType& options) {
@@ -484,9 +429,9 @@ Autotuner<Backend, SearchStrategy>::tune(
       tcEntryPointMap.at(tcEntryPoint),
       inputs,
       outputs,
-      baseMapping,
+      options[0],
       fixedParams,
-      optionsCache_);
+      optionsCache);
 
   // Setup handlers
   sigterm_ = 0;
@@ -505,10 +450,6 @@ Autotuner<Backend, SearchStrategy>::tune(
     try {
       tuningHarness.run(searchStrategy);
     } catch (const std::exception& e) {
-      std::cerr << "Exception during autotuning: " << e.what()
-                << "\n dumping cache to "
-                << tc::makeOptionsFilename(cacheFileName) << std::endl;
-      storeTopKInCache<Backend>(optionsCache_, cacheFileName);
       tuningHarnessThreadEx = std::current_exception();
     }
     tuningHarnessFinished = true;
@@ -517,11 +458,9 @@ Autotuner<Backend, SearchStrategy>::tune(
     std::this_thread::sleep_for(std::chrono::milliseconds(100));
     if (sigint_) {
       tuningHarness.stopAfterCurrentIteration();
-      storeTopKInCache<Backend>(optionsCache_, cacheFileName);
     }
     if (sigterm_) {
       std::cerr << "Autotuning aborted." << std::endl;
-      storeTopKInCache<Backend>(optionsCache_, cacheFileName);
       std::abort();
     }
   }
@@ -532,8 +471,6 @@ Autotuner<Backend, SearchStrategy>::tune(
     std::rethrow_exception(tuningHarnessThreadEx);
   }
 
-  storeTopKInCache<Backend>(optionsCache_, cacheFileName);
-
   return {tuningHarness.bestMappingOptions()};
 }
 } // namespace autotune
 
@@ -164,12 +164,14 @@ class Autotuner {
       const std::unordered_map<size_t, std::vector<const DLConstTensor*>>&
           inputs,
       std::unordered_map<size_t, std::vector<const DLTensor*>>& outputs,
-      const MappingOptionsType& baseMapping,
-      const std::string& cacheFileName = "",
+      const std::vector<MappingOptionsType>& baseMapping,
       const TuningParameterFixer& fixedParams = TuningParameterFixer());
 
- private:
-  std::shared_ptr<OptionsCache<Backend>> optionsCache_;
+ public:
+  /// This is accessed by multiple threads in the tuning harness.
+  /// Even though manipulations are threadsafe, you want to be sure tuning
+  /// has finished before accessing the optionsCache.
+  std::shared_ptr<OptionsCache<Backend>> optionsCache;
 };
 
 /// Helper functions that need specializing for various backends.
 
@@ -27,6 +27,7 @@
 #include <llvm/ADT/Optional.h>
 
 #include "tc/core/check.h"
+#include "tc/core/compiler.h"
 #include "tc/core/tensor.h"
 #include "tc/core/utils/math.h"
 #include "tc/core/utils/time.h"
@@ -163,12 +164,10 @@ void OptionsCache<Backend>::storeCacheToFile(
     std::lock_guard<std::mutex> lock(mutex);
     std::fstream serialized(
         filename, std::ios::binary | std::ios::trunc | std::ios::out);
-    if (!serialized.is_open()) {
-      LOG(ERROR) << "Failed to open the output stream for dumping protobuf: "
-                 << filename;
-    } else {
-      proto.SerializePartialToOstream(&serialized);
-    }
+    TC_CHECK(serialized.is_open(), std::invalid_argument)
+        << "Failed to open the output stream for dumping protobuf: "
+        << filename;
+    proto.SerializePartialToOstream(&serialized);
   }
 }
 
@@ -317,9 +316,37 @@ void OptionsCache<Backend>::fromProtobuf(
   }
 }
 
-} // namespace autotune
+template <typename Backend>
+std::vector<typename Backend::MappingOptionsType> loadTopKFromCacheFile(
+    const std::string& tc,
+    const std::string& entryPoint,
+    const std::string& cacheFilename,
+    const std::vector<const DLConstTensor*>& inputs,
+    size_t count) {
+  OptionsCache<Backend> optionsCache;
+  optionsCache.loadCacheFromFile(cacheFilename);
+  auto outputs = tc::inferOutputTensorInfo(tc, entryPoint, inputs);
+  return optionsCache.getTopKOptions(
+      lang::canonicalTc(tc::detail::parse(tc).at(entryPoint)),
+      tc::makeTensorInfoVector(inputs),
+      outputs,
+      Backend::backendString(),
+      count);
+}
 
-inline std::string makeOptionsFilename(const std::string& fn) {
-  return fn + ".options";
+template <typename Backend>
+void appendTopKToCacheFile(
+    const std::shared_ptr<OptionsCache<Backend>>& cache,
+    const std::string& cacheFilename,
+    uint32_t count) {
+  OptionsCache<Backend> copy(*cache);
+  copy.pruneKeepTopK(count);
+  auto proto = copy.toProtobuf();
+  OptionsCache<Backend> optionsCache;
+  optionsCache.loadCacheFromFile(cacheFilename);
+  optionsCache.fromProtobuf(proto);
+  optionsCache.storeCacheToFile(cacheFilename);
 }
+
+} // namespace autotune
 } // namespace tc
@@ -140,7 +140,10 @@ struct OptionsCache {
       const std::string& backendStr,
       size_t K) const;
 
-  /// Drops the (N - K) worst performing options
+  /// Drops the (N - K) worst performing options for each key in the cache.
+  /// That is, for each unique tuple(
+  ///    CanonicalTcString, input TensorInfo, output TensorInfo, backend string)
+  /// this function keeps the best K.
   void pruneKeepTopK(size_t K);
 
  protected:
@@ -162,10 +165,45 @@ struct OptionsCache {
       OptionsCacheValue<Backend>,
       OptionsCacheKeyHash>
       store_;
+
+  template <typename BackendType>
+  friend void appendTopKToCacheFile(
+      const std::shared_ptr<OptionsCache<BackendType>>& cache,
+      const std::string& cacheFilename,
+      uint32_t count);
 };
-} // namespace autotune
 
-std::string makeOptionsFilename(const std::string& fn);
+/// Loads at most `count' bets entries from the file `cacheFilename', for the
+/// TC definition corresponding to the entryPoint.
+///
+/// Note that the file manipulation is threadsafe but not IPC-safe.
+/// TODO: implement using a filesystem lock (e.g. flock) if more safety is
+/// needed.
+template <typename Backend>
+std::vector<typename Backend::MappingOptionsType> loadTopKFromCacheFile(
+    const std::string& tc,
+    const std::string& entryPoint,
+    const std::string& cacheFilename,
+    const std::vector<const DLConstTensor*>& inputs,
+    size_t count);
+
+/// Stores at most `count' best entries from the cache into the file
+/// `cacheFilename', if that filename can be written to; otherwise throws.
+/// To avoid spuriously overwriting previous results, this ***appends*** the
+/// at most `count' best entries from cache to the cache loaded from
+/// cacheFilename. If the file is empty or does not exist then this function
+/// just writes instead of appending.
+///
+/// Note that the file manipulation is threadsafe but not IPC-safe.
+/// TODO: implement using a filesystem lock (e.g. flock) if more safety is
+/// needed.
+template <typename Backend>
+void appendTopKToCacheFile(
+    const std::shared_ptr<OptionsCache<Backend>>& cache,
+    const std::string& cacheFilename,
+    uint32_t count);
+
+} // namespace autotune
 } // namespace tc
 
 #include "tc/autotuner/options_cache-inl.h"
@@ -257,14 +257,7 @@ def _1LUT(float(E1, D) LUT1, int32(B, L1) I1) -> (O1) {
         std::to_string(FLAGS_E1);
     std::vector<tc::CudaMappingOptions> bestOptions{options};
     if (FLAGS_autotune) {
-      bestOptions = autotune(
-          FLAGS_save_tuner_proto_prefix + std::string("/1LUT_cache") + suffix,
-          FLAGS_save_tuner_proto_prefix + std::string("/1LUT_best") + suffix,
-          tc,
-          "_1LUT",
-          inputs,
-          options,
-          check_fun);
+      bestOptions = autotune(tc, "_1LUT", inputs, options, check_fun);
       TC_CHECK_GE(bestOptions.size(), 1u);
     }
     Check(tc, "_1LUT", options, inputs, check_fun);
@@ -375,14 +368,7 @@ def _2LUT(float(E1, D) LUT1, int32(B, L1) I1, float(E2, D) LUT2, int32(B, L2) I2
         std::to_string(FLAGS_E2);
     std::vector<tc::CudaMappingOptions> bestOptions{options};
     if (FLAGS_autotune) {
-      bestOptions = autotune(
-          FLAGS_save_tuner_proto_prefix + std::string("/2LUT_cache") + suffix,
-          FLAGS_save_tuner_proto_prefix + std::string("/2LUT_best") + suffix,
-          tc,
-          "_2LUT",
-          inputs,
-          options,
-          check_fun);
+      bestOptions = autotune(tc, "_2LUT", inputs, options, check_fun);
       TC_CHECK_GE(bestOptions.size(), 1u);
     }
     Check(tc, "_2LUT", bestOptions[0], inputs, check_fun);
@@ -444,14 +430,7 @@ def _C3(float(B,WX) I, float(WY, WX) W) -> (C3) {
       std::to_string(FLAGS_WY);
   std::vector<tc::CudaMappingOptions> bestOptions{options};
   if (FLAGS_autotune) {
-    bestOptions = autotune(
-        FLAGS_save_tuner_proto_prefix + std::string("/_C3_cache") + suffix,
-        FLAGS_save_tuner_proto_prefix + std::string("/_C3_best") + suffix,
-        tc,
-        "_C3",
-        inputs,
-        options,
-        check_fun);
+    bestOptions = autotune(tc, "_C3", inputs, options, check_fun);
     TC_CHECK_GE(bestOptions.size(), 1u);
   }
   Check(tc, "_C3", bestOptions[0], inputs, check_fun);
@@ -509,14 +488,7 @@ def mlp1(float(B,M) I, float(M, N) W1, float(N) B1) -> (O1) {
       std::to_string(FLAGS_N);
   std::vector<tc::CudaMappingOptions> bestOptions{options};
   if (FLAGS_autotune) {
-    bestOptions = autotune(
-        FLAGS_save_tuner_proto_prefix + std::string("/mlp1_cache") + suffix,
-        FLAGS_save_tuner_proto_prefix + std::string("/mlp1_best") + suffix,
-        tc,
-        "mlp1",
-        inputs,
-        options,
-        check_fun);
+    bestOptions = autotune(tc, "mlp1", inputs, options, check_fun);
     TC_CHECK_GE(bestOptions.size(), 1u);
   }
   Check(tc, "mlp1", bestOptions[0], inputs, check_fun);
@@ -592,14 +564,7 @@ def mlp3(float(B,N) I, float(O,N) W2, float(O) B2, float(P,O) W3, float(P) B3,
       std::to_string(FLAGS_N);
   std::vector<tc::CudaMappingOptions> bestOptions{options};
   if (FLAGS_autotune) {
-    bestOptions = autotune(
-        FLAGS_save_tuner_proto_prefix + std::string("/mlp3_cache") + suffix,
-        FLAGS_save_tuner_proto_prefix + std::string("/mlp3_best") + suffix,
-        tc,
-        "mlp3",
-        inputs,
-        options,
-        check_fun);
+    bestOptions = autotune(tc, "mlp3", inputs, options, check_fun);
     TC_CHECK_GE(bestOptions.size(), 1u);
   }
   Check(tc, "mlp3", bestOptions[0], inputs, check_fun);