Skip to content

Commit a9bf307

Browse files
refactor: Intern strings during merging (#345)
1 parent 343ecb0 commit a9bf307

File tree

3 files changed

+61
-33
lines changed

3 files changed

+61
-33
lines changed

indexer/Driver.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "spdlog/spdlog.h"
3535

3636
#include "llvm/ADT/StringMap.h"
37+
#include "llvm/Support/StringSaver.h"
3738

3839
#include "scip/scip.pb.h"
3940

@@ -1046,7 +1047,10 @@ class Driver {
10461047

10471048
absl::flat_hash_set<uint32_t> badJobIds{};
10481049

1049-
scip::IndexBuilder builder{};
1050+
llvm::BumpPtrAllocator allocator;
1051+
llvm::UniqueStringSaver stringSaver{allocator};
1052+
scip::SymbolNameInterner interner{stringSaver};
1053+
scip::IndexBuilder builder{interner};
10501054
// TODO: Measure how much time this is taking and parallelize if too slow.
10511055
for (auto &paths : this->shardPaths) {
10521056
scip::Index indexShard;

indexer/ScipExtras.cc

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "perfetto/perfetto.h"
1212

1313
#include "llvm/Support/Path.h"
14+
#include "llvm/Support/StringSaver.h"
1415

1516
#include "scip/scip.pb.h"
1617

@@ -97,9 +98,15 @@ void SymbolInformationBuilder::finish(bool deterministic,
9798
}));
9899
}
99100

100-
DocumentBuilder::DocumentBuilder(scip::Document &&first)
101-
: soFar(), _bomb(BOMB_INIT(fmt::format("DocumentBuilder for '{}",
102-
first.relative_path()))) {
101+
SymbolNameRef SymbolNameInterner::intern(std::string &&s) {
102+
return SymbolNameRef{std::string_view(this->impl.save(s))};
103+
}
104+
105+
DocumentBuilder::DocumentBuilder(scip::Document &&first,
106+
SymbolNameInterner interner)
107+
: soFar(), interner(interner),
108+
_bomb(BOMB_INIT(
109+
fmt::format("DocumentBuilder for '{}", first.relative_path()))) {
103110
auto &language = *first.mutable_language();
104111
this->soFar.set_language(std::move(language));
105112
auto &relativePath = *first.mutable_relative_path();
@@ -112,17 +119,17 @@ void DocumentBuilder::merge(scip::Document &&doc) {
112119
this->occurrences.insert({std::move(occ)});
113120
}
114121
for (auto &symbolInfo : *doc.mutable_symbols()) {
115-
SymbolName name{std::move(*symbolInfo.mutable_symbol())};
122+
auto name = this->interner.intern(std::move(*symbolInfo.mutable_symbol()));
116123
auto it = this->symbolInfos.find(name);
117124
if (it == this->symbolInfos.end()) {
118125
// SAFETY: Don't inline this initializer call since lack of
119126
// guarantees around subexpression evaluation order mean that
120127
// the std::move(name) may happen before passing name to
121128
// the initializer.
122129
SymbolInformationBuilder builder{
123-
name.asStringRef(), std::move(*symbolInfo.mutable_documentation()),
130+
name, std::move(*symbolInfo.mutable_documentation()),
124131
std::move(*symbolInfo.mutable_relationships())};
125-
this->symbolInfos.emplace(std::move(name), std::move(builder));
132+
this->symbolInfos.emplace(name, std::move(builder));
126133
continue;
127134
}
128135
auto &symbolInfoBuilder = it->second;
@@ -138,7 +145,7 @@ void DocumentBuilder::merge(scip::Document &&doc) {
138145
void DocumentBuilder::populateSymbolToInfoMap(
139146
SymbolToInfoMap &symbolToInfoMap) {
140147
for (auto &[symbolName, symbolInfoBuilder] : this->symbolInfos) {
141-
symbolToInfoMap.emplace(std::string_view(symbolName.asStringRef()),
148+
symbolToInfoMap.emplace(symbolName,
142149
SymbolToInfoMap::mapped_type(&symbolInfoBuilder));
143150
}
144151
}
@@ -157,10 +164,10 @@ void DocumentBuilder::finish(bool deterministic, scip::Document &out) {
157164

158165
scip_clang::extractTransform(
159166
std::move(this->symbolInfos), deterministic,
160-
absl::FunctionRef<void(SymbolName &&, SymbolInformationBuilder &&)>(
167+
absl::FunctionRef<void(SymbolNameRef &&, SymbolInformationBuilder &&)>(
161168
[&](auto &&name, auto &&builder) {
162169
scip::SymbolInformation symbolInfo{};
163-
symbolInfo.set_symbol(std::move(name.asStringRefMut()));
170+
symbolInfo.set_symbol(name.value.data(), name.value.size());
164171
builder.finish(deterministic, symbolInfo);
165172
*this->soFar.add_symbols() = std::move(symbolInfo);
166173
}));
@@ -173,8 +180,9 @@ RootRelativePath::RootRelativePath(std::string &&value)
173180
ENFORCE(llvm::sys::path::is_relative(this->value));
174181
}
175182

176-
IndexBuilder::IndexBuilder()
177-
: multiplyIndexed(), externalSymbols(), _bomb(BOMB_INIT("IndexBuilder")) {}
183+
IndexBuilder::IndexBuilder(SymbolNameInterner interner)
184+
: multiplyIndexed(), externalSymbols(), interner(interner),
185+
_bomb(BOMB_INIT("IndexBuilder")) {}
178186

179187
void IndexBuilder::addDocument(scip::Document &&doc, bool isMultiplyIndexed) {
180188
ENFORCE(!doc.relative_path().empty());
@@ -184,7 +192,7 @@ void IndexBuilder::addDocument(scip::Document &&doc, bool isMultiplyIndexed) {
184192
if (it == this->multiplyIndexed.end()) {
185193
this->multiplyIndexed.insert(
186194
{std::move(docPath),
187-
std::make_unique<DocumentBuilder>(std::move(doc))});
195+
std::make_unique<DocumentBuilder>(std::move(doc), this->interner)});
188196
} else {
189197
auto &docBuilder = it->second;
190198
docBuilder->merge(std::move(doc));
@@ -200,7 +208,7 @@ void IndexBuilder::addDocument(scip::Document &&doc, bool isMultiplyIndexed) {
200208
}
201209

202210
void IndexBuilder::addExternalSymbolUnchecked(
203-
SymbolName &&name, scip::SymbolInformation &&extSym) {
211+
SymbolNameRef name, scip::SymbolInformation &&extSym) {
204212
std::vector<std::string> docs{};
205213
absl::c_move(*extSym.mutable_documentation(), std::back_inserter(docs));
206214
absl::flat_hash_set<RelationshipExt> rels{};
@@ -211,16 +219,16 @@ void IndexBuilder::addExternalSymbolUnchecked(
211219
// guarantees around subexpression evaluation order mean that
212220
// the std::move(name) may happen before name.asStringRef() is called.
213221
auto builder = std::make_unique<SymbolInformationBuilder>(
214-
name.asStringRef(), std::move(docs), std::move(rels));
215-
this->externalSymbols.emplace(std::move(name), std::move(builder));
222+
name, std::move(docs), std::move(rels));
223+
this->externalSymbols.emplace(name, std::move(builder));
216224
return;
217225
}
218226

219227
void IndexBuilder::addExternalSymbol(scip::SymbolInformation &&extSym) {
220-
SymbolName name{std::move(*extSym.mutable_symbol())};
228+
auto name = this->interner.intern(std::move(*extSym.mutable_symbol()));
221229
auto it = this->externalSymbols.find(name);
222230
if (it == this->externalSymbols.end()) {
223-
this->addExternalSymbolUnchecked(std::move(name), std::move(extSym));
231+
this->addExternalSymbolUnchecked(name, std::move(extSym));
224232
return;
225233
}
226234
// NOTE(def: precondition-deterministic-ext-symbol-docs)
@@ -241,8 +249,7 @@ std::unique_ptr<SymbolToInfoMap> IndexBuilder::populateSymbolToInfoMap() {
241249
SymbolToInfoMap symbolToInfoMap;
242250
for (auto &document : this->documents) {
243251
for (auto &symbolInfo : *document.mutable_symbols()) {
244-
auto symbolName = std::string_view(symbolInfo.symbol());
245-
symbolToInfoMap.emplace(symbolName,
252+
symbolToInfoMap.emplace(SymbolNameRef{symbolInfo.symbol()},
246253
SymbolToInfoMap::mapped_type(&symbolInfo));
247254
}
248255
}
@@ -255,8 +262,9 @@ std::unique_ptr<SymbolToInfoMap> IndexBuilder::populateSymbolToInfoMap() {
255262
void IndexBuilder::addForwardDeclaration(
256263
const SymbolToInfoMap &symbolToInfoMap,
257264
scip::SymbolInformation &&forwardDeclSym) {
258-
SymbolName name{std::move(*forwardDeclSym.mutable_symbol())};
259-
auto it = symbolToInfoMap.find(name.asStringRef());
265+
auto name =
266+
this->interner.intern(std::move(*forwardDeclSym.mutable_symbol()));
267+
auto it = symbolToInfoMap.find(name);
260268
if (it == symbolToInfoMap.end()) {
261269
if (!this->externalSymbols.contains(name)) {
262270
this->addExternalSymbolUnchecked(std::move(name),
@@ -353,11 +361,11 @@ void IndexBuilder::finish(bool deterministic, std::ostream &outputStream) {
353361
}));
354362
scip_clang::extractTransform(
355363
std::move(this->externalSymbols), deterministic,
356-
absl::FunctionRef<void(SymbolName &&,
364+
absl::FunctionRef<void(SymbolNameRef &&,
357365
std::unique_ptr<SymbolInformationBuilder> &&)>(
358366
[&](auto &&name, auto &&builder) -> void {
359367
scip::SymbolInformation extSym{};
360-
extSym.set_symbol(std::move(name.asStringRefMut()));
368+
extSym.set_symbol(name.value.data(), name.value.size());
361369
builder->finish(deterministic, extSym);
362370
writer.writeExternalSymbol(std::move(extSym));
363371
}));

indexer/ScipExtras.h

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include "indexer/RAII.h"
2323
#include "indexer/SymbolName.h"
2424

25+
namespace llvm {
26+
class UniqueStringSaver;
27+
} // namespace llvm
28+
2529
namespace scip {
2630

2731
std::strong_ordering compareRelationships(const scip::Relationship &lhs,
@@ -88,10 +92,10 @@ class SymbolInformationBuilder final {
8892

8993
public:
9094
template <typename C1, typename C2>
91-
SymbolInformationBuilder(std::string_view name, C1 &&docs, C2 &&rels)
95+
SymbolInformationBuilder(SymbolNameRef name, C1 &&docs, C2 &&rels)
9296
: documentation(), relationships(),
93-
_bomb(
94-
BOMB_INIT(fmt::format("SymbolInformationBuilder for '{}'", name))) {
97+
_bomb(BOMB_INIT(
98+
fmt::format("SymbolInformationBuilder for '{}'", name.value))) {
9599
(void)name;
96100
this->setDocumentation(std::move(docs));
97101
this->mergeRelationships(std::move(rels));
@@ -116,21 +120,31 @@ class SymbolInformationBuilder final {
116120
};
117121

118122
using SymbolToInfoMap = absl::flat_hash_map<
119-
std::string_view,
123+
SymbolNameRef,
120124
llvm::PointerUnion<SymbolInformation *, SymbolInformationBuilder *>>;
121125

126+
class SymbolNameInterner {
127+
llvm::UniqueStringSaver &impl;
128+
129+
public:
130+
SymbolNameInterner(llvm::UniqueStringSaver &impl) : impl(impl) {}
131+
132+
SymbolNameRef intern(std::string &&);
133+
};
134+
122135
class DocumentBuilder final {
123136
scip::Document soFar;
137+
SymbolNameInterner interner;
124138
scip_clang::Bomb _bomb;
125139

126140
absl::flat_hash_set<OccurrenceExt> occurrences;
127141

128142
// Keyed by the symbol name. The SymbolInformationBuilder value
129143
// doesn't carry the name to avoid redundant allocations.
130-
absl::flat_hash_map<SymbolName, SymbolInformationBuilder> symbolInfos;
144+
absl::flat_hash_map<SymbolNameRef, SymbolInformationBuilder> symbolInfos;
131145

132146
public:
133-
DocumentBuilder(scip::Document &&document);
147+
DocumentBuilder(scip::Document &&document, SymbolNameInterner interner);
134148
void merge(scip::Document &&doc);
135149
void populateSymbolToInfoMap(SymbolToInfoMap &);
136150
void finish(bool deterministic, scip::Document &out);
@@ -153,13 +167,15 @@ class IndexBuilder final {
153167
// aggregate information across different hashes into a single Document.
154168
absl::flat_hash_map<RootRelativePath, std::unique_ptr<DocumentBuilder>>
155169
multiplyIndexed;
156-
absl::flat_hash_map<SymbolName, std::unique_ptr<SymbolInformationBuilder>>
170+
absl::flat_hash_map<SymbolNameRef, std::unique_ptr<SymbolInformationBuilder>>
157171
externalSymbols;
158172

173+
SymbolNameInterner interner;
174+
159175
scip_clang::Bomb _bomb;
160176

161177
public:
162-
IndexBuilder();
178+
IndexBuilder(SymbolNameInterner interner);
163179
void addDocument(scip::Document &&doc, bool isMultiplyIndexed);
164180
void addExternalSymbol(scip::SymbolInformation &&extSym);
165181

@@ -171,7 +187,7 @@ class IndexBuilder final {
171187
void finish(bool deterministic, std::ostream &);
172188

173189
private:
174-
void addExternalSymbolUnchecked(SymbolName &&,
190+
void addExternalSymbolUnchecked(SymbolNameRef,
175191
scip::SymbolInformation &&symWithoutName);
176192
};
177193

0 commit comments

Comments
 (0)