Refactor cacheKernel call

nicolasvasilache · Theodoros Theodoridis · commit 9499c50db1b1 · 2018-04-11T11:04:27.000+02:00
This changeset makes the cacheKernel methods take a CacheEntry&amp;&amp;
This simplifies the API and gets rid of the discrepancy in the parameter ordering between the constructor of a CacheEntry and the passing of the same parameters to cacheKernel.
diff --git a/tc/core/cuda/cuda_compilation_cache.cc b/tc/core/cuda/cuda_compilation_cache.cc
@@ -111,39 +111,26 @@ CudaCachedEntry::CudaCachedEntry(const CudaCacheEntryProto& buf)
              Grid(buf.grid_dims()),
              Block(buf.block_dims())} {}
 
-void CudaCache::cacheKernel(
-    const std::string& id,
-    const CudaMappingOptions& options,
-    const std::vector<const DLTensor*>& inputs,
-    const std::vector<const DLTensor*>& outputs,
-    const std::string& kernelSpecializedName,
-    const std::vector<int>& kernelParameters,
-    const std::string& cudaSource,
-    const Grid& grid,
-    const Block& block) {
+void CudaCache::cacheKernel(CudaCachedEntry&& entry) {
   std::lock_guard<std::mutex> lock(mtx_);
   ++numberCacheAttemps;
-  auto entry = searchKernel(id, options, inputs, outputs);
-  if (entry) {
-    if (entry->values.cudaSource == cudaSource or entry->values.grid == grid or
-        entry->values.block == block) {
+  auto retrievedEntry = searchKernel(
+      entry.key.id,
+      entry.key.mappingOptions,
+      entry.key.inputs,
+      entry.key.outputs);
+  if (retrievedEntry) {
+    if (retrievedEntry->values.cudaSource == entry.values.cudaSource or
+        retrievedEntry->values.grid == entry.values.grid or
+        retrievedEntry->values.block == entry.values.block) {
       throw CacheEntrySameKeyDifferentValue(
-          "CudaCache::CacheKernel: a kernel matching the id, options and inputs was previously cached with different cuda source or block or grid dimensions.");
+          "CudaCache::CacheKernel: a kernel matching the id, options and "
+          "inputs was previously cached with different cuda source or block "
+          "or grid dimensions.");
     }
     return;
   }
-
-  entries_.emplace_back(
-      id,
-      kernelSpecializedName,
-      kernelParameters,
-      grid,
-      block,
-      options,
-      inputs,
-      outputs,
-      cudaSource,
-      CudaGPUInfo::GPUInfo().GetCudaDeviceStr());
+  entries_.emplace_back(entry);
 }
 
 CudaCachedEntry* CudaCache::searchKernel(
@@ -552,6 +539,13 @@ std::unique_ptr<CudaCacheRetrievalResult> ManualCudaCache::retrieveKernel(
                                    entry->values.block});
 }
 
+ManualCudaCachedEntry* ManualCudaCache::searchKernel(
+    const std::string& id,
+    const std::vector<detail::TensorInfo>& inputs,
+    const std::vector<detail::TensorInfo>& outputs) {
+  return searchKernelImpl(*this, id, inputs, outputs);
+}
+
 ManualCudaCachedEntry* ManualCudaCache::searchKernel(
     const std::string& id,
     const std::vector<const DLTensor*>& inputs,
@@ -566,38 +560,23 @@ const ManualCudaCachedEntry* ManualCudaCache::searchKernel(
   return searchKernelImpl(*this, id, inputs, outputs);
 }
 
-void ManualCudaCache::cacheKernel(
-    const std::string& id,
-    const std::vector<const DLTensor*>& inputs,
-    const std::vector<const DLTensor*>& outputs,
-    const std::string& kernelSpecializedName,
-    const std::vector<int>& kernelParameters,
-    const std::string& cudaSource,
-    const Grid& grid,
-    const Block& block) {
+void ManualCudaCache::cacheKernel(ManualCudaCachedEntry&& entry) {
   std::lock_guard<std::mutex> lock(mtx_);
   ++numberCacheAttemps;
-  auto entry = searchKernel(id, inputs, outputs);
-  if (entry) {
-    entry->values.grid = grid;
-    entry->values.block = block;
-    entry->values.cudaSource = cudaSource;
-    entry->values.kernelSpecializedName = kernelSpecializedName;
-    entry->values.kernelParameters = kernelParameters;
+  auto retrievedEntry =
+      searchKernel(entry.key.id, entry.key.inputs, entry.key.outputs);
+  if (retrievedEntry) {
+    retrievedEntry->values.grid = entry.values.grid;
+    retrievedEntry->values.block = entry.values.block;
+    retrievedEntry->values.cudaSource = entry.values.cudaSource;
+    retrievedEntry->values.kernelSpecializedName =
+        entry.values.kernelSpecializedName;
+    retrievedEntry->values.kernelParameters = entry.values.kernelParameters;
     return;
   }
-
-  entries_.emplace_back(
-      id,
-      kernelSpecializedName,
-      kernelParameters,
-      grid,
-      block,
-      inputs,
-      outputs,
-      cudaSource,
-      CudaGPUInfo::GPUInfo().GetCudaDeviceStr());
+  entries_.emplace_back(entry);
 }
+
 ManualCudaCachedEntry::ManualCudaCachedEntry(
     const std::string& id,
     const std::string& kernelSpecializedName,
diff --git a/tc/core/cuda/cuda_compilation_cache.h b/tc/core/cuda/cuda_compilation_cache.h
@@ -147,16 +147,7 @@ class CudaCache : public Cache<CudaCache, CudaCachedEntry> {
    * target device are the same then this is a noop
    * Else (cudaSource, grid, block) is stored in the cache
    */
-  void cacheKernel(
-      const std::string& id,
-      const CudaMappingOptions& options,
-      const std::vector<const DLTensor*>& inputs,
-      const std::vector<const DLTensor*>& outputs,
-      const std::string& kernelSpecializedName,
-      const std::vector<int>& kernelParameters,
-      const std::string& cudaSource,
-      const Grid& grid,
-      const Block& block);
+  void cacheKernel(CudaCachedEntry&& entry);
 
   /**
    * Returns the cache entry that matches op (id, isl options, target device)
@@ -393,15 +384,7 @@ class ManualCudaCache : public Cache<ManualCudaCache, ManualCudaCachedEntry> {
    *target device). If the key already exist in the cache,
    *the values are replaced.
    */
-  void cacheKernel(
-      const std::string& id,
-      const std::vector<const DLTensor*>& inputs,
-      const std::vector<const DLTensor*>& outputs,
-      const std::string& kernelSpecializedName,
-      const std::vector<int>& kernelParameters,
-      const std::string& cudaSource,
-      const Grid& grid,
-      const Block& block);
+  void cacheKernel(ManualCudaCachedEntry&& entry);
 
   /*
    *Returns the cache entry that matches
diff --git a/tc/core/cuda/cuda_tc_executor.cc b/tc/core/cuda/cuda_tc_executor.cc
@@ -92,16 +92,17 @@ void CudaTcExecutor::compile(const tc::CudaMappingOptions& options) {
     if (CudaCache::cacheEnabled()) {
       LOG_IF(INFO, FLAGS_debug_tc_mapper) << "original grid: " << grid;
       LOG_IF(INFO, FLAGS_debug_tc_mapper) << "original block: " << block;
-      CudaCache::getCache()->cacheKernel(
+      CudaCache::getCache()->cacheKernel(CudaCachedEntry(
           cacheKeyId_,
+          kernelSpecializedName,
+          executionInfo_.kernelParams,
+          grid,
+          block,
           options,
           extractRawPtrs(executionInfo_.inputsInfo),
           extractRawPtrs(executionInfo_.outputsInfo),
-          kernelSpecializedName,
-          executionInfo_.kernelParams,
           cudaSource,
-          grid,
-          block);
+          CudaGPUInfo::GPUInfo().GetCudaDeviceStr()));
     }
   }
 
diff --git a/tensor_comprehensions/pybinds/pybind_engine.cc b/tensor_comprehensions/pybinds/pybind_engine.cc
@@ -25,6 +25,7 @@
 
 #include "pybind_utils.h"
 #include "tc/aten/aten_compiler.h"
+#include "tc/core/cuda/cuda.h"
 #include "tc/core/cuda/cuda_compilation_cache.h"
 #include "tc/core/cuda/cuda_mapping_options.h"
 #include "tc/core/cuda/cuda_tc_executor.h"
@@ -126,14 +127,16 @@ PYBIND11_MODULE(tc, m) {
                 [&]() { tc::deleteDlmTensors(tensorsPair.second); });
             auto outTensorInfo = instance.inferOutputTensorInfo(name, atInputs);
             tc::ManualCudaCache::getCache()->cacheKernel(
-                name,
-                tensorsPair.first,
-                outTensorInfo,
-                injectedKernelName,
-                {},
-                cudaSource,
-                tc::Grid(grid),
-                tc::Block(block));
+                tc::ManualCudaCachedEntry(
+                    name,
+                    injectedKernelName,
+                    {},
+                    tc::Grid(grid),
+                    tc::Block(block),
+                    tensorsPair.first,
+                    outTensorInfo,
+                    cudaSource,
+                    tc::CudaGPUInfo::GPUInfo().GetCudaDeviceStr()));
           });
 }
 
diff --git a/test/cuda/test_compilation_cache.cc b/test/cuda/test_compilation_cache.cc