Skip to content

Commit b1fa5ac

Browse files
committed
[mlgo] Factor out TensorSpec
This is a simple datatype with a few JSON utilities, and is independent of the underlying executor. The main motivation is to allow taking a dependency on it on the AOT side, and allow us build a correctly-sized buffer in the cases when the requested feature isn't supported by the model. This, in turn, allows us to grow the feature set supported by the compiler in a backward-compatible way; and also collect traces exposing the new features, but starting off the older model, and continue training from those new traces. Differential Revision: https://reviews.llvm.org/D124417
1 parent 5cbf516 commit b1fa5ac

File tree

8 files changed

+338
-267
lines changed

8 files changed

+338
-267
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
//===- TensorSpec.h - type descriptor for a tensor --------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
#ifndef LLVM_ANALYSIS_TENSORSPEC_H
10+
#define LLVM_ANALYSIS_TENSORSPEC_H
11+
12+
#include "llvm/Config/llvm-config.h"
13+
14+
#include "llvm/ADT/StringMap.h"
15+
#include "llvm/IR/LLVMContext.h"
16+
#include "llvm/Support/JSON.h"
17+
18+
#include <memory>
19+
#include <vector>
20+
21+
namespace llvm {
22+
/// TensorSpec encapsulates the specification of a tensor: its dimensions, or
23+
/// "shape" (row-major), its type (see TensorSpec::getDataType specializations
24+
/// for supported types), its name and port (see "TensorFlow: Large-Scale
25+
/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2:
26+
/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
27+
///
28+
/// Known tensor types. The left part is the C type, the right is a name we
29+
/// can use to identify the type (to implement TensorSpec equality checks), and
30+
/// to use, if needed, when mapping to an underlying evaluator's type system.
31+
/// The main requirement is that the C type we use has the same size and
32+
/// encoding (e.g. endian-ness) as the one used by the evaluator.
33+
#define SUPPORTED_TENSOR_TYPES(M) \
34+
M(float, Float) \
35+
M(double, Double) \
36+
M(int8_t, Int8) \
37+
M(uint8_t, UInt8) \
38+
M(int16_t, Int16) \
39+
M(uint16_t, UInt16) \
40+
M(int32_t, Int32) \
41+
M(uint32_t, UInt32) \
42+
M(int64_t, Int64) \
43+
M(uint64_t, UInt64)
44+
45+
enum class TensorType {
46+
Invalid,
47+
#define _TENSOR_TYPE_ENUM_MEMBERS(_, Name) Name,
48+
SUPPORTED_TENSOR_TYPES(_TENSOR_TYPE_ENUM_MEMBERS)
49+
#undef _TENSOR_TYPE_ENUM_MEMBERS
50+
};
51+
52+
class TensorSpec final {
53+
public:
54+
template <typename T>
55+
static TensorSpec createSpec(const std::string &Name,
56+
const std::vector<int64_t> &Shape,
57+
int Port = 0) {
58+
return TensorSpec(Name, Port, getDataType<T>(), sizeof(T), Shape);
59+
}
60+
61+
const std::string &name() const { return Name; }
62+
int port() const { return Port; }
63+
TensorType type() const { return Type; }
64+
const std::vector<int64_t> &shape() const { return Shape; }
65+
66+
bool operator==(const TensorSpec &Other) const {
67+
return Name == Other.Name && Port == Other.Port && Type == Other.Type &&
68+
Shape == Other.Shape;
69+
}
70+
71+
bool operator!=(const TensorSpec &Other) const { return !(*this == Other); }
72+
73+
/// Get the number of elements in a tensor with this shape.
74+
size_t getElementCount() const { return ElementCount; }
75+
/// Get the size, in bytes, of one element.
76+
size_t getElementByteSize() const { return ElementSize; }
77+
78+
template <typename T> bool isElementType() const {
79+
return getDataType<T>() == Type;
80+
}
81+
82+
private:
83+
TensorSpec(const std::string &Name, int Port, TensorType Type,
84+
size_t ElementSize, const std::vector<int64_t> &Shape);
85+
86+
template <typename T> static TensorType getDataType();
87+
88+
std::string Name;
89+
int Port = 0;
90+
TensorType Type = TensorType::Invalid;
91+
std::vector<int64_t> Shape;
92+
size_t ElementCount = 0;
93+
size_t ElementSize = 0;
94+
};
95+
96+
/// Construct a TensorSpec from a JSON dictionary of the form:
97+
/// { "name": <string>,
98+
/// "port": <int>,
99+
/// "type": <string. Use LLVM's types, e.g. float, double, int64_t>,
100+
/// "shape": <array of ints> }
101+
/// For the "type" field, see the C++ primitive types used in
102+
/// TFUTILS_SUPPORTED_TYPES.
103+
Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
104+
const json::Value &Value);
105+
106+
struct LoggedFeatureSpec {
107+
TensorSpec Spec;
108+
Optional<std::string> LoggingName;
109+
const std::string &getLoggingName() const {
110+
return LoggingName ? *LoggingName : Spec.name();
111+
}
112+
};
113+
114+
/// Load the output specs. If SpecFileOverride is not empty, that path is used.
115+
/// Otherwise, the file is assumed to be called 'output_spec.json' and be found
116+
/// under ModelPath (the model directory).
117+
/// The first output tensor name must match ExpectedDecisionName.
118+
/// In case of error, the return is None and the error is logged.
119+
Optional<std::vector<LoggedFeatureSpec>>
120+
loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
121+
StringRef ModelPath, StringRef SpecFileOverride = StringRef());
122+
123+
#define TFUTILS_GETDATATYPE_DEF(T, Name) \
124+
template <> TensorType TensorSpec::getDataType<T>();
125+
SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_DEF)
126+
127+
#undef TFUTILS_GETDATATYPE_DEF
128+
} // namespace llvm
129+
130+
#endif // LLVM_ANALYSIS_TENSORSPEC_H

llvm/include/llvm/Analysis/Utils/TFUtils.h

Lines changed: 1 addition & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#ifdef LLVM_HAVE_TF_API
1515
#include "llvm/ADT/StringMap.h"
16+
#include "llvm/Analysis/TensorSpec.h"
1617
#include "llvm/IR/LLVMContext.h"
1718
#include "llvm/Support/JSON.h"
1819

@@ -38,110 +39,6 @@ namespace llvm {
3839
class TFModelEvaluatorImpl;
3940
class EvaluationResultImpl;
4041

41-
/// TensorSpec encapsulates the specification of a tensor: its dimensions, or
42-
/// "shape" (row-major), its type (see TensorSpec::getDataType specializations
43-
/// for supported types), its name and port (see "TensorFlow: Large-Scale
44-
/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2:
45-
/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
46-
///
47-
/// TensorSpec is used to set up a TFModelEvaluator by describing the expected
48-
/// inputs and outputs.
49-
50-
/// Known tensor types. The left part is the C type, the right is a name we
51-
/// can use to identify the type (to implement TensorSpec equality checks), and
52-
/// to use, if needed, when mapping to an underlying evaluator's type system.
53-
/// The main requirement is that the C type we use has the same size and
54-
/// encoding (e.g. endian-ness) as the one used by the evaluator.
55-
#define SUPPORTED_TENSOR_TYPES(M) \
56-
M(float, Float) \
57-
M(double, Double) \
58-
M(int8_t, Int8) \
59-
M(uint8_t, UInt8) \
60-
M(int16_t, Int16) \
61-
M(uint16_t, UInt16) \
62-
M(int32_t, Int32) \
63-
M(uint32_t, UInt32) \
64-
M(int64_t, Int64) \
65-
M(uint64_t, UInt64)
66-
67-
enum class TensorType {
68-
Invalid,
69-
#define _TENSOR_TYPE_ENUM_MEMBERS(_, Name) Name,
70-
SUPPORTED_TENSOR_TYPES(_TENSOR_TYPE_ENUM_MEMBERS)
71-
#undef _TENSOR_TYPE_ENUM_MEMBERS
72-
};
73-
74-
class TensorSpec final {
75-
public:
76-
template <typename T>
77-
static TensorSpec createSpec(const std::string &Name,
78-
const std::vector<int64_t> &Shape,
79-
int Port = 0) {
80-
return TensorSpec(Name, Port, getDataType<T>(), sizeof(T), Shape);
81-
}
82-
83-
const std::string &name() const { return Name; }
84-
int port() const { return Port; }
85-
TensorType type() const { return Type; }
86-
const std::vector<int64_t> &shape() const { return Shape; }
87-
88-
bool operator==(const TensorSpec &Other) const {
89-
return Name == Other.Name && Port == Other.Port && Type == Other.Type &&
90-
Shape == Other.Shape;
91-
}
92-
93-
bool operator!=(const TensorSpec &Other) const { return !(*this == Other); }
94-
95-
/// Get the number of elements in a tensor with this shape.
96-
size_t getElementCount() const { return ElementCount; }
97-
/// Get the size, in bytes, of one element.
98-
size_t getElementByteSize() const { return ElementSize; }
99-
100-
template <typename T> bool isElementType() const {
101-
return getDataType<T>() == Type;
102-
}
103-
104-
private:
105-
TensorSpec(const std::string &Name, int Port, TensorType Type,
106-
size_t ElementSize, const std::vector<int64_t> &Shape);
107-
108-
template <typename T> static TensorType getDataType();
109-
110-
std::string Name;
111-
int Port = 0;
112-
TensorType Type = TensorType::Invalid;
113-
std::vector<int64_t> Shape;
114-
size_t ElementCount = 0;
115-
size_t ElementSize = 0;
116-
};
117-
118-
/// Construct a TensorSpec from a JSON dictionary of the form:
119-
/// { "name": <string>,
120-
/// "port": <int>,
121-
/// "type": <string. Use LLVM's types, e.g. float, double, int64_t>,
122-
/// "shape": <array of ints> }
123-
/// For the "type" field, see the C++ primitive types used in
124-
/// TFUTILS_SUPPORTED_TYPES.
125-
Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
126-
const json::Value &Value);
127-
128-
struct LoggedFeatureSpec {
129-
TensorSpec Spec;
130-
Optional<std::string> LoggingName;
131-
const std::string &getLoggingName() const {
132-
return LoggingName ? *LoggingName : Spec.name();
133-
}
134-
};
135-
136-
/// Load the output specs. If SpecFileOverride is not empty, that path is used.
137-
/// Otherwise, the file is assumed to be called 'output_spec.json' and be found
138-
/// under ModelPath (the model directory).
139-
/// The first output tensor name must match ExpectedDecisionName.
140-
/// In case of error, the return is None and the error is logged.
141-
Optional<std::vector<LoggedFeatureSpec>>
142-
loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
143-
StringRef ModelPath, StringRef SpecFileOverride = StringRef());
144-
14542
/// Logging utility - given an ordered specification of features, and assuming
14643
/// a scalar reward, allow logging feature values and rewards, and then print
14744
/// as tf.train.SequenceExample text protobuf.
@@ -286,11 +183,6 @@ class TFModelEvaluator final {
286183
std::unique_ptr<TFModelEvaluatorImpl> Impl;
287184
};
288185

289-
#define TFUTILS_GETDATATYPE_DEF(T, Name) \
290-
template <> TensorType TensorSpec::getDataType<T>();
291-
SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_DEF)
292-
293-
#undef TFUTILS_GETDATATYPE_DEF
294186
} // namespace llvm
295187

296188
#endif // LLVM_HAVE_TF_API

llvm/lib/Analysis/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ add_llvm_component_library(LLVMAnalysis
131131
SyncDependenceAnalysis.cpp
132132
SyntheticCountsUtils.cpp
133133
TFUtils.cpp
134+
TensorSpec.cpp
134135
TargetLibraryInfo.cpp
135136
TargetTransformInfo.cpp
136137
Trace.cpp

llvm/lib/Analysis/TFUtils.cpp

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -132,113 +132,6 @@ class EvaluationResultImpl {
132132
std::vector<TF_Tensor *> Output;
133133
};
134134

135-
TensorSpec::TensorSpec(const std::string &Name, int Port, TensorType Type,
136-
size_t ElementSize, const std::vector<int64_t> &Shape)
137-
: Name(Name), Port(Port), Type(Type), Shape(Shape),
138-
ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
139-
std::multiplies<int64_t>())),
140-
ElementSize(ElementSize) {}
141-
142-
Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
143-
const json::Value &Value) {
144-
auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
145-
std::string S;
146-
llvm::raw_string_ostream OS(S);
147-
OS << Value;
148-
Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
149-
return None;
150-
};
151-
// FIXME: accept a Path as a parameter, and use it for error reporting.
152-
json::Path::Root Root("tensor_spec");
153-
json::ObjectMapper Mapper(Value, Root);
154-
if (!Mapper)
155-
return EmitError("Value is not a dict");
156-
157-
std::string TensorName;
158-
int TensorPort = -1;
159-
std::string TensorType;
160-
std::vector<int64_t> TensorShape;
161-
162-
if (!Mapper.map<std::string>("name", TensorName))
163-
return EmitError("'name' property not present or not a string");
164-
if (!Mapper.map<std::string>("type", TensorType))
165-
return EmitError("'type' property not present or not a string");
166-
if (!Mapper.map<int>("port", TensorPort))
167-
return EmitError("'port' property not present or not an int");
168-
if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
169-
return EmitError("'shape' property not present or not an int array");
170-
171-
#define PARSE_TYPE(T, E) \
172-
if (TensorType == #T) \
173-
return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
174-
SUPPORTED_TENSOR_TYPES(PARSE_TYPE)
175-
#undef PARSE_TYPE
176-
return None;
177-
}
178-
179-
Optional<std::vector<LoggedFeatureSpec>>
180-
loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
181-
StringRef ModelPath, StringRef SpecFileOverride) {
182-
SmallVector<char, 128> OutputSpecsPath;
183-
StringRef FileName = SpecFileOverride;
184-
if (FileName.empty()) {
185-
llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
186-
FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
187-
}
188-
189-
auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
190-
if (!BufferOrError) {
191-
Ctx.emitError("Error opening output specs file: " + FileName + " : " +
192-
BufferOrError.getError().message());
193-
return None;
194-
}
195-
auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
196-
if (!ParsedJSONValues) {
197-
Ctx.emitError("Could not parse specs file: " + FileName);
198-
return None;
199-
}
200-
auto ValuesArray = ParsedJSONValues->getAsArray();
201-
if (!ValuesArray) {
202-
Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
203-
"logging_name:<name>} dictionaries");
204-
return None;
205-
}
206-
std::vector<LoggedFeatureSpec> Ret;
207-
for (const auto &Value : *ValuesArray)
208-
if (const auto *Obj = Value.getAsObject())
209-
if (const auto *SpecPart = Obj->get("tensor_spec"))
210-
if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
211-
if (auto LoggingName = Obj->getString("logging_name")) {
212-
if (!TensorSpec->isElementType<int64_t>() &&
213-
!TensorSpec->isElementType<int32_t>() &&
214-
!TensorSpec->isElementType<float>()) {
215-
Ctx.emitError(
216-
"Only int64, int32, and float tensors are supported. "
217-
"Found unsupported type for tensor named " +
218-
TensorSpec->name());
219-
return None;
220-
}
221-
Ret.push_back({*TensorSpec, LoggingName->str()});
222-
}
223-
224-
if (ValuesArray->size() != Ret.size()) {
225-
Ctx.emitError(
226-
"Unable to parse output spec. It should be a json file containing an "
227-
"array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
228-
"with a json object describing a TensorSpec; and a 'logging_name' key, "
229-
"which is a string to use as name when logging this tensor in the "
230-
"training log.");
231-
return None;
232-
}
233-
if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
234-
Ctx.emitError("The first output spec must describe the decision tensor, "
235-
"and must have the logging_name " +
236-
StringRef(ExpectedDecisionName));
237-
return None;
238-
}
239-
return Ret;
240-
}
241-
242135
class TFModelEvaluatorImpl {
243136
public:
244137
TFModelEvaluatorImpl(StringRef SavedModelPath,
@@ -519,13 +412,6 @@ TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
519412
return TF_TensorData(Impl->getOutput()[Index]);
520413
}
521414

522-
#define TFUTILS_GETDATATYPE_IMPL(T, E) \
523-
template <> TensorType TensorSpec::getDataType<T>() { return TensorType::E; }
524-
525-
SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_IMPL)
526-
527-
#undef TFUTILS_GETDATATYPE_IMPL
528-
529415
TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
530416
TFModelEvaluator::~TFModelEvaluator() {}
531417

0 commit comments

Comments
 (0)