Split backend compilation cache

nicolasvasilache · Theodoros Theodoridis · commit 4f90354d89fc · 2018-04-11T11:04:27.000+02:00
This is the first step in simplifying compilation and reusing caching across backends
diff --git a/tc/core/CMakeLists.txt b/tc/core/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(
 
   SHARED
 
+  compilation_cache.cc
   flags.cc
   mapping_options.cc
   mapping_options_cpp_printer.cc
diff --git a/tc/core/compilation_cache.cc b/tc/core/compilation_cache.cc
@@ -0,0 +1,126 @@
+/**
+ * Copyright (c) 2017-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "tc/core/cuda/cuda_compilation_cache.h"
+
+#include <version.h>
+
+#include <cstdint>
+#include <fstream>
+#include <numeric>
+#include <tuple>
+
+#include "tc/core/cuda/cuda_mapping_options.h"
+#include "tc/core/utils/math.h"
+
+namespace tc {
+
+namespace {
+uint64_t GetDLTensorAlignment(const DLTensor* t) {
+  return (reinterpret_cast<std::uintptr_t>(t->data) + t->byte_offset) % 256;
+}
+} // namespace
+
+detail::TensorInfo::TensorInfo(const DLTensor* t)
+    : alignment{GetDLTensorAlignment(t)}, dType(t->dtype) {
+  shape.reserve(t->ndim);
+  std::copy(t->shape, t->shape + t->ndim, std::back_inserter(shape));
+  if (not t->strides) {
+    return;
+  }
+  strides.reserve(t->ndim);
+  std::copy(t->strides, t->strides + t->ndim, std::back_inserter(strides));
+}
+
+detail::TensorInfo::TensorInfo(const TensorInfoProto& buf)
+    : shape{buf.shape().begin(), buf.shape().end()},
+      strides{buf.strides().begin(), buf.strides().end()},
+      alignment{buf.alignment()},
+      dType{static_cast<uint8_t>(buf.dtype().code()),
+            static_cast<uint8_t>(buf.dtype().bits()),
+            static_cast<uint16_t>(buf.dtype().lanes())} {}
+
+TensorInfoProto detail::TensorInfo::toProtobuf() const {
+  TensorInfoProto buf;
+  buf.mutable_shape()->Reserve(shape.size());
+  std::copy(
+      shape.begin(),
+      shape.end(),
+      google::protobuf::RepeatedFieldBackInserter(buf.mutable_shape()));
+  buf.mutable_strides()->Reserve(strides.size());
+  std::copy(
+      strides.begin(),
+      strides.end(),
+      google::protobuf::RepeatedFieldBackInserter(buf.mutable_strides()));
+  buf.set_alignment(alignment);
+  buf.mutable_dtype()->set_code(dType.code);
+  buf.mutable_dtype()->set_bits(dType.bits);
+  buf.mutable_dtype()->set_lanes(dType.lanes);
+  return buf;
+}
+
+bool detail::TensorInfo::operator==(const DLTensor* t) const {
+  if (t->ndim != static_cast<int>(shape.size())) {
+    return false;
+  }
+
+  auto res = std::mismatch(shape.begin(), shape.end(), t->shape);
+  if (res.first != shape.end() || res.second != t->shape + t->ndim) {
+    return false;
+  }
+
+  if (t->strides == nullptr) {
+    if (strides.size() > 0) {
+      return false;
+    }
+  } else {
+    if (t->ndim != static_cast<int>(strides.size())) {
+      return false;
+    }
+
+    res = std::mismatch(strides.begin(), strides.end(), t->strides);
+    if (res.first != strides.end() || res.second != t->strides + t->ndim) {
+      return false;
+    }
+  }
+
+  /*This should be enabled when/if tc starts using alignment information
+   *if (GetDLTensorAlignment(t) != alignment) {
+   *  return false;
+   *}
+   */
+  return std::tie(t->dtype.code, t->dtype.bits, t->dtype.lanes) ==
+      std::tie(dType.code, dType.bits, dType.lanes);
+}
+
+bool operator==(const DLDataType& a, const DLDataType& b) {
+  return a.code == b.code and a.bits == b.bits and a.lanes == b.lanes;
+}
+
+bool operator<(const DLDataType& a, const DLDataType& b) {
+  return a.code < b.code and a.bits < b.bits and a.lanes < b.lanes;
+}
+
+bool detail::TensorInfo::operator==(const TensorInfo& t) const {
+  return alignment == t.alignment and dType == t.dType and shape == t.shape and
+      strides == t.strides;
+}
+
+bool detail::TensorInfo::operator<(const TensorInfo& t) const {
+  return alignment < t.alignment and dType < t.dType and shape < t.shape and
+      strides < t.strides;
+}
+
+} // namespace tc
diff --git a/tc/core/compilation_cache.h b/tc/core/compilation_cache.h
@@ -0,0 +1,91 @@
+/**
+ * Copyright (c) 2017-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <dlpack/dlpack.h>
+
+#include <compcache.pb.h>
+
+#include "tc/core/cuda/cuda.h"
+#include "tc/core/cuda/cuda_mapping_options.h"
+#include "tc/core/cuda/cuda_rtc.h"
+#include "tc/core/utils/time.h"
+
+namespace tc {
+
+namespace detail {
+/**
+ * TensorInfo wraps the necessary bits of DLTensor that are used as part of the
+ * CompilationCache's entry keys.
+ *
+ * It is serializable to protobuf and stored directly in the cache.
+ */
+struct TensorInfo {
+  std::vector<int64_t> shape;
+  std::vector<int64_t> strides;
+  uint64_t alignment;
+  DLDataType dType;
+
+  TensorInfo(const DLTensor* t);
+  TensorInfo(const TensorInfoProto& buf);
+
+  bool operator==(const DLTensor* t) const;
+  bool operator==(const TensorInfo& t) const;
+  bool operator<(const TensorInfo& t) const;
+  TensorInfoProto toProtobuf() const;
+};
+} // namespace detail
+
+template <typename CC>
+class Cache {
+ public:
+  static void enableCache();
+  static void disableCache();
+  static void dumpCacheToProtobuf(const std::string& filename);
+  static void loadCacheFromProtobuf(const std::string& filename);
+  template <typename Protobuf>
+  static void loadCacheFromProtobuf(const Protobuf& buf);
+  static std::shared_ptr<CC> getCache();
+  static bool cacheEnabled();
+
+  size_t size() const;
+  void clear();
+
+  mutable int numberAttemptedRetrievals = 0;
+  mutable int numberSuccessfulRetrievals = 0;
+  mutable int numberCacheAttemps = 0;
+
+ protected:
+  // XXX:this should be a std or boost shared_mutex
+  mutable std::mutex mtx_;
+};
+
+class CacheEntrySameKeyDifferentValue : public std::invalid_argument {
+ public:
+  explicit CacheEntrySameKeyDifferentValue(const std::string& what_arg)
+      : invalid_argument(what_arg) {}
+  explicit CacheEntrySameKeyDifferentValue(const char* what_arg)
+      : invalid_argument(what_arg) {}
+};
+
+} // namespace tc
diff --git a/tc/core/cuda/cuda_compilation_cache.cc b/tc/core/cuda/cuda_compilation_cache.cc
@@ -27,102 +27,6 @@
 
 namespace tc {
 
-namespace {
-uint64_t GetDLTensorAlignment(const DLTensor* t) {
-  return (reinterpret_cast<std::uintptr_t>(t->data) + t->byte_offset) % 256;
-}
-} // namespace
-
-detail::TensorInfo::TensorInfo(const DLTensor* t)
-    : alignment{GetDLTensorAlignment(t)}, dType(t->dtype) {
-  shape.reserve(t->ndim);
-  std::copy(t->shape, t->shape + t->ndim, std::back_inserter(shape));
-  if (not t->strides) {
-    return;
-  }
-  strides.reserve(t->ndim);
-  std::copy(t->strides, t->strides + t->ndim, std::back_inserter(strides));
-}
-
-detail::TensorInfo::TensorInfo(const TensorInfoProto& buf)
-    : shape{buf.shape().begin(), buf.shape().end()},
-      strides{buf.strides().begin(), buf.strides().end()},
-      alignment{buf.alignment()},
-      dType{static_cast<uint8_t>(buf.dtype().code()),
-            static_cast<uint8_t>(buf.dtype().bits()),
-            static_cast<uint16_t>(buf.dtype().lanes())} {}
-
-TensorInfoProto detail::TensorInfo::toProtobuf() const {
-  TensorInfoProto buf;
-  buf.mutable_shape()->Reserve(shape.size());
-  std::copy(
-      shape.begin(),
-      shape.end(),
-      google::protobuf::RepeatedFieldBackInserter(buf.mutable_shape()));
-  buf.mutable_strides()->Reserve(strides.size());
-  std::copy(
-      strides.begin(),
-      strides.end(),
-      google::protobuf::RepeatedFieldBackInserter(buf.mutable_strides()));
-  buf.set_alignment(alignment);
-  buf.mutable_dtype()->set_code(dType.code);
-  buf.mutable_dtype()->set_bits(dType.bits);
-  buf.mutable_dtype()->set_lanes(dType.lanes);
-  return buf;
-}
-
-bool detail::TensorInfo::operator==(const DLTensor* t) const {
-  if (t->ndim != static_cast<int>(shape.size())) {
-    return false;
-  }
-
-  auto res = std::mismatch(shape.begin(), shape.end(), t->shape);
-  if (res.first != shape.end() || res.second != t->shape + t->ndim) {
-    return false;
-  }
-
-  if (t->strides == nullptr) {
-    if (strides.size() > 0) {
-      return false;
-    }
-  } else {
-    if (t->ndim != static_cast<int>(strides.size())) {
-      return false;
-    }
-
-    res = std::mismatch(strides.begin(), strides.end(), t->strides);
-    if (res.first != strides.end() || res.second != t->strides + t->ndim) {
-      return false;
-    }
-  }
-
-  /*This should be enabled when/if tc starts using alignment information
-   *if (GetDLTensorAlignment(t) != alignment) {
-   *  return false;
-   *}
-   */
-  return std::tie(t->dtype.code, t->dtype.bits, t->dtype.lanes) ==
-      std::tie(dType.code, dType.bits, dType.lanes);
-}
-
-bool operator==(const DLDataType& a, const DLDataType& b) {
-  return a.code == b.code and a.bits == b.bits and a.lanes == b.lanes;
-}
-
-bool operator<(const DLDataType& a, const DLDataType& b) {
-  return a.code < b.code and a.bits < b.bits and a.lanes < b.lanes;
-}
-
-bool detail::TensorInfo::operator==(const TensorInfo& t) const {
-  return alignment == t.alignment and dType == t.dType and shape == t.shape and
-      strides == t.strides;
-}
-
-bool detail::TensorInfo::operator<(const TensorInfo& t) const {
-  return alignment < t.alignment and dType < t.dType and shape < t.shape and
-      strides < t.strides;
-}
-
 namespace {
 std::vector<detail::TensorInfo> DLTensorToTensorInfoVector(
     const std::vector<const DLTensor*>& ts) {
@@ -134,9 +38,6 @@ std::vector<detail::TensorInfo> DLTensorToTensorInfoVector(
       });
   return iis;
 }
-} // namespace
-
-namespace {
 std::vector<detail::TensorInfo> ProtoToTensorInfoVector(
     const google::protobuf::RepeatedPtrField<TensorInfoProto>& buf) {
   std::vector<detail::TensorInfo> iis;
@@ -148,9 +49,6 @@ std::vector<detail::TensorInfo> ProtoToTensorInfoVector(
       [](const TensorInfoProto& iip) { return detail::TensorInfo{iip}; });
   return iis;
 }
-} // namespace
-
-namespace {
 template <typename Array, typename Buf>
 void WriteProtobufArray(const Array& arr, Buf* buf) {
   google::protobuf::RepeatedField<typename Array::value_type> data(
@@ -174,21 +72,18 @@ bool operator==(
   return true;
 }
 
-namespace {
-std::shared_ptr<CudaCache> cudaCache_;
-std::shared_ptr<OptionsCache> optionsCache_;
-std::shared_ptr<ManualCudaCache> manualCudaCache_;
-} // namespace
-
 std::shared_ptr<CudaCache>& CudaCache::getGlobalSharedCache() {
+  static std::shared_ptr<CudaCache> cudaCache_;
   return cudaCache_;
 }
 
 std::shared_ptr<OptionsCache>& OptionsCache::getGlobalSharedCache() {
+  static std::shared_ptr<OptionsCache> optionsCache_;
   return optionsCache_;
 }
 
 std::shared_ptr<ManualCudaCache>& ManualCudaCache::getGlobalSharedCache() {
+  static std::shared_ptr<ManualCudaCache> manualCudaCache_;
   return manualCudaCache_;
 }
 
diff --git a/tc/core/cuda/cuda_compilation_cache.h b/tc/core/cuda/cuda_compilation_cache.h