|
| 1 | +/** |
| 2 | + * Copyright (c) 2017-present, Facebook, Inc. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +#pragma once |
| 17 | + |
| 18 | +#include <map> |
| 19 | +#include <string> |
| 20 | +#include <vector> |
| 21 | + |
| 22 | +#include "tc/core/mapping_options.h" |
| 23 | +#include "tc/core/tensor.h" |
| 24 | +#include "tc/lang/tree.h" |
| 25 | + |
| 26 | +/** |
| 27 | + * This provides a simple functional-style C++ API with multi-backend |
| 28 | + * capabilities to: |
| 29 | + * 1. compile a TC function and return an Executor for the specified Backend |
| 30 | + * on which the run method can be called; |
| 31 | + * 2. infer actual tmp/output tensor shapes given input tensor shapes; |
| 32 | + * 3. parse a TC definition and retrieve the map of TC function to parsed TC |
| 33 | + * trees. |
| 34 | + * |
| 35 | + * Compilation is backed by a compilation cache, its correspondance is: |
| 36 | + * 1 TcExecutor <-> 1 compiled tuple<TC function, input shapes, MappingOptions> |
| 37 | + * |
| 38 | + * The compile function is templated by the Backend type. |
| 39 | + * For each backend, the specific Backend type lives in |
| 40 | + * backendname/backendname_backend.h and declares all the required dependent |
| 41 | + * **derived** types. |
| 42 | + * For example: |
| 43 | + * CudaBackend is declared in core/cuda/cuda_backend.h |
| 44 | + * |
| 45 | + * struct CudaBackend { |
| 46 | + * using ExecutorType = CudaTcExecutor; |
| 47 | + * using MappingOptionsType = CudaMappingOptions; |
| 48 | + * using CompilationResultType = CudaCompilationResult; |
| 49 | + * using RTCFunctionType = CudaRTCFunction; |
| 50 | + * }; |
| 51 | + * |
| 52 | + * Sketching usage resembles: |
| 53 | + * std::string someTc = "..."; |
| 54 | + * auto pExecutor = tc::compile<CudaBackend>( |
| 55 | + * someTc, tcFunctionName, inputs, mappingOptions); |
| 56 | + * auto profilingInfo = pExecutor->profile(handle, inputs, outputs, true); |
| 57 | + * // alternatively: |
| 58 | + * // auto kernelTiming = pExecutor->uncheckedRun(inputs, outputs, true); |
| 59 | + */ |
| 60 | +namespace tc { |
| 61 | +/// Given a TC string containing multiple functions and a TC function name |
| 62 | +/// "entryPoint", this function compiles a new TcExecutor for the specified |
| 63 | +/// Backend. For now, contiguous output sizes are inferred given input sizes. |
| 64 | +/// If you need another kernel for another entryPoint or other inputs or |
| 65 | +// other options then just compile another TcExecutor; because atm we fully |
| 66 | +/// JIT specialize on all sizes. |
| 67 | +/// \returns a new TcExecutor on which the run method can be called to run |
| 68 | +/// entryPoint |
| 69 | +template <typename Backend> |
| 70 | +std::unique_ptr<typename Backend::ExecutorType> compile( |
| 71 | + const std::string& tc, |
| 72 | + const std::string& entryPoint, |
| 73 | + const std::vector<const DLConstTensor*>& inputs, |
| 74 | + /* TODO: in the future also pass outputs for stride and alignment info */ |
| 75 | + const typename Backend::MappingOptionsType& options); |
| 76 | + |
| 77 | +/// Given a TC representation as a TC + TC function name entryPoint and a list |
| 78 | +/// of input tensors that match the definition in the TC function definition |
| 79 | +/// (in positional order), this generates the output TensorInfo resulting from |
| 80 | +/// running inference. |
| 81 | +/// The typical flow is to infer output sizes, allocate/resize them within |
| 82 | +/// you favorite ML framework/tensor library and then call compile and run. |
| 83 | +/// \returns a vector of TensorInfo which can be used for allocating and |
| 84 | +/// performing output shape validation. |
| 85 | +std::vector<TensorInfo> inferOutputTensorInfo( |
| 86 | + const std::string& tc, |
| 87 | + const std::string& entryPoint, |
| 88 | + const std::vector<const DLConstTensor*> inputs); |
| 89 | + |
| 90 | +namespace detail { |
| 91 | +/// Given a TC representation, this parses the TC functions into a map of |
| 92 | +/// TreeRef indexed by TC function names. |
| 93 | +/// \returns an ordered map of TC function name to parsed TC tree |
| 94 | +std::map<std::string, lang::TreeRef> parse(const std::string& tc); |
| 95 | + |
| 96 | +/// Given a TC representation as a TreeRef, this function compiles a new |
| 97 | +/// TcExecutor for the specified Backend. |
| 98 | +/// For now, contiguous output sizes are inferred given input sizes. |
| 99 | +/// If you need another kernel for another TC or other inputs or options then |
| 100 | +/// just compile another TcExecutor; because atm we fully JIT specialize on all |
| 101 | +/// sizes. |
| 102 | +/// \returns a new TcExecutor on which the run method can be called |
| 103 | +template <typename Backend> |
| 104 | +std::unique_ptr<typename Backend::ExecutorType> compile( |
| 105 | + lang::TreeRef tcDefinition, |
| 106 | + const std::vector<const DLConstTensor*>& inputs, |
| 107 | + /* TODO: in the future also pass outputs for stride and alignment info */ |
| 108 | + const typename Backend::MappingOptionsType& options); |
| 109 | + |
| 110 | +/// Given a TC representation as a TreeRef and a list of input tensors that |
| 111 | +/// match the definition in the TC function definition (in positional order), |
| 112 | +/// this generates the output TensorInfo resulting from running inference. |
| 113 | +/// The typical flow is to infer output sizes, allocate/resize them within |
| 114 | +/// you favorite ML framework/tensor library and then call compile and run. |
| 115 | +/// \returns a vector of TensorInfo which can be used for allocating and |
| 116 | +/// performing output shape validation. |
| 117 | +std::vector<TensorInfo> inferOutputTensorInfo( |
| 118 | + lang::TreeRef tcDefinition, |
| 119 | + const std::vector<const DLConstTensor*> inputs); |
| 120 | +} // namespace detail |
| 121 | +} // namespace tc |
| 122 | + |
| 123 | +#include "tc/core/compiler-inl.h" |
0 commit comments