@@ -141,152 +141,6 @@ CachedEntryType* searchKernel(
141
141
}
142
142
} // namespace
143
143
144
- // //////////////////////////////////////////////////////////////////////////////
145
- // CudaCache
146
- // //////////////////////////////////////////////////////////////////////////////
147
- std::shared_ptr<CudaCache>& CudaCache::getGlobalSharedCache () {
148
- static std::shared_ptr<CudaCache> cudaCache_;
149
- return cudaCache_;
150
- }
151
-
152
- CudaCachedEntry::CudaCachedEntry (
153
- const std::string& id,
154
- const std::string& kernelSpecializedName,
155
- const std::vector<int >& kernelParameters,
156
- const Grid& grid,
157
- const Block& block,
158
- const CudaMappingOptions& mappingOptions,
159
- const std::vector<const DLTensor*>& inputs,
160
- const std::vector<const DLTensor*>& outputs,
161
- const std::string& cudaSource,
162
- const std::string& deviceStr)
163
- : key{id,
164
- mappingOptions,
165
- DLTensorToTensorInfoVector (inputs),
166
- DLTensorToTensorInfoVector (outputs),
167
- deviceStr,
168
- git_version},
169
- values{cudaSource, kernelSpecializedName, kernelParameters, grid, block} {
170
- }
171
-
172
- CudaCachedEntry::CudaCachedEntry (const CudaCacheEntryProto& buf)
173
- : key{buf.id (),
174
- CudaMappingOptions{buf.kernel_options ()},
175
- ProtoToTensorInfoVector (buf.inputs ()),
176
- ProtoToTensorInfoVector (buf.outputs ()),
177
- buf.device_str (),
178
- buf.git_version ()},
179
- values{buf.cuda_source (),
180
- buf.specialized_name (),
181
- std::vector<int >{buf.parameters ().begin (), buf.parameters ().end ()},
182
- Grid (buf.grid_dims ()),
183
- Block (buf.block_dims ())} {}
184
-
185
- CudaCache::CudaCache (const CudaCacheProto& buf) {
186
- entries_.reserve (buf.entries_size ());
187
- for (const auto & entry_buf : buf.entries ())
188
- entries_.emplace_back (entry_buf);
189
- }
190
-
191
- void CudaCache::cacheKernel (CudaCachedEntry&& entry) {
192
- std::lock_guard<std::mutex> lock (mtx_);
193
- ++numberCacheAttemps;
194
- auto retrievedEntry = searchKernel (
195
- entries_,
196
- entry.key .id ,
197
- entry.key .mappingOptions ,
198
- entry.key .inputs ,
199
- entry.key .outputs );
200
- if (retrievedEntry) {
201
- if (retrievedEntry->values .cudaSource != entry.values .cudaSource or
202
- retrievedEntry->values .grid != entry.values .grid or
203
- retrievedEntry->values .block != entry.values .block ) {
204
- throw CacheEntrySameKeyDifferentValue (
205
- " CudaCache::CacheKernel: a kernel matching the id, options and "
206
- " inputs was previously cached with different cuda source or block "
207
- " or grid dimensions." );
208
- }
209
- return ;
210
- }
211
- entries_.emplace_back (std::move (entry));
212
- }
213
-
214
- std::unique_ptr<CudaCacheRetrievalResult> CudaCache::retrieveKernel (
215
- const std::string& id,
216
- const CudaMappingOptions& options,
217
- const std::vector<const DLTensor*>& inputs,
218
- const std::vector<const DLTensor*>& outputs) const {
219
- std::lock_guard<std::mutex> lock (mtx_);
220
- ++numberAttemptedRetrievals;
221
- auto entry = searchKernel (entries_, id, options, inputs, outputs);
222
- if (not entry) {
223
- return nullptr ;
224
- }
225
- ++numberSuccessfulRetrievals;
226
- return std::unique_ptr<CudaCacheRetrievalResult>(
227
- new CudaCacheRetrievalResult{entry->values .cudaSource ,
228
- entry->values .kernelSpecializedName ,
229
- entry->values .kernelParameters ,
230
- entry->values .grid ,
231
- entry->values .block });
232
- }
233
-
234
- void CudaCache::removeEntriesNotInOptionsCache (const OptionsCache& oc) {
235
- std::vector<CudaCachedEntry> newEntries;
236
- for (const auto & entry : oc) {
237
- for (const auto & options : entry.values ) {
238
- auto cudaEntry = searchKernel (
239
- entries_,
240
- entry.key .id ,
241
- options.mappingOptions ,
242
- entry.key .inputs ,
243
- entry.key .outputs );
244
- if (cudaEntry) {
245
- newEntries.push_back (std::move (*cudaEntry));
246
- }
247
- }
248
- }
249
- entries_ = std::move (newEntries);
250
- }
251
-
252
- CudaCacheProto CudaCache::toProtobuf () const {
253
- CudaCacheProto buf;
254
- auto * entriesBuf = buf.mutable_entries ();
255
- entriesBuf->Reserve (entries_.size ());
256
- std::transform (
257
- entries_.begin (),
258
- entries_.end (),
259
- google::protobuf::RepeatedPtrFieldBackInserter (entriesBuf),
260
- [](const CudaCachedEntry& entry) { return entry.toProtobuf (); });
261
- return buf;
262
- }
263
-
264
- CudaCacheEntryProto CudaCachedEntry::toProtobuf () const {
265
- CudaCacheEntryProto buf;
266
- buf.set_id (key.id );
267
- *buf.mutable_kernel_options () = key.mappingOptions .proto ();
268
- std::transform (
269
- key.inputs .begin (),
270
- key.inputs .end (),
271
- google::protobuf::RepeatedPtrFieldBackInserter (buf.mutable_inputs ()),
272
- [](const detail::TensorInfo& input) { return input.toProtobuf (); });
273
- std::transform (
274
- key.outputs .begin (),
275
- key.outputs .end (),
276
- google::protobuf::RepeatedPtrFieldBackInserter (buf.mutable_outputs ()),
277
- [](const detail::TensorInfo& output) { return output.toProtobuf (); });
278
- buf.set_device_str (key.deviceStr );
279
- buf.set_git_version (key.gitVersion );
280
-
281
- buf.set_cuda_source (values.cudaSource );
282
- *buf.mutable_grid_dims () = values.grid .view .proto ;
283
- *buf.mutable_block_dims () = values.block .view .proto ;
284
- buf.set_specialized_name (values.kernelSpecializedName );
285
- WriteProtobufArray (values.kernelParameters , buf.mutable_parameters ());
286
-
287
- return buf;
288
- }
289
-
290
144
// //////////////////////////////////////////////////////////////////////////////
291
145
// OptionsCache
292
146
// //////////////////////////////////////////////////////////////////////////////
@@ -566,66 +420,4 @@ void OptionsCache::keepOnlyBestCandidates(size_t numberToKeep) {
566
420
}
567
421
}
568
422
}
569
-
570
- // //////////////////////////////////////////////////////////////////////////////
571
- // ManualCudaCache
572
- // //////////////////////////////////////////////////////////////////////////////
573
- std::shared_ptr<ManualCudaCache>& ManualCudaCache::getGlobalSharedCache () {
574
- static std::shared_ptr<ManualCudaCache> manualCudaCache_;
575
- return manualCudaCache_;
576
- }
577
-
578
- ManualCudaCachedEntry::ManualCudaCachedEntry (
579
- const std::string& id,
580
- const std::string& kernelSpecializedName,
581
- const std::vector<int >& kernelParameters,
582
- const Grid& grid,
583
- const Block& block,
584
- const std::vector<const DLTensor*>& inputs,
585
- const std::vector<const DLTensor*>& outputs,
586
- const std::string& cudaSource,
587
- const std::string& deviceStr)
588
- : key{id,
589
- DLTensorToTensorInfoVector (inputs),
590
- DLTensorToTensorInfoVector (outputs),
591
- deviceStr,
592
- git_version},
593
- values{cudaSource, kernelSpecializedName, kernelParameters, grid, block} {
594
- }
595
-
596
- void ManualCudaCache::cacheKernel (ManualCudaCachedEntry&& entry) {
597
- std::lock_guard<std::mutex> lock (mtx_);
598
- ++numberCacheAttemps;
599
- auto retrievedEntry =
600
- searchKernel (entries_, entry.key .id , entry.key .inputs , entry.key .outputs );
601
- if (retrievedEntry) {
602
- retrievedEntry->values .grid = entry.values .grid ;
603
- retrievedEntry->values .block = entry.values .block ;
604
- retrievedEntry->values .cudaSource = entry.values .cudaSource ;
605
- retrievedEntry->values .kernelSpecializedName =
606
- entry.values .kernelSpecializedName ;
607
- retrievedEntry->values .kernelParameters = entry.values .kernelParameters ;
608
- return ;
609
- }
610
- entries_.emplace_back (std::move (entry));
611
- }
612
-
613
- std::unique_ptr<ManualCudaCacheRetrievalResult> ManualCudaCache::retrieveKernel (
614
- const std::string& id,
615
- const std::vector<const DLTensor*>& inputs,
616
- const std::vector<const DLTensor*>& outputs) const {
617
- std::lock_guard<std::mutex> lock (mtx_);
618
- ++numberAttemptedRetrievals;
619
- auto entry = searchKernel (entries_, id, inputs, outputs);
620
- if (not entry) {
621
- return nullptr ;
622
- }
623
- ++numberSuccessfulRetrievals;
624
- return std::unique_ptr<ManualCudaCacheRetrievalResult>(
625
- new ManualCudaCacheRetrievalResult{entry->values .cudaSource ,
626
- entry->values .kernelSpecializedName ,
627
- entry->values .kernelParameters ,
628
- entry->values .grid ,
629
- entry->values .block });
630
- }
631
423
} // namespace tc
0 commit comments