Skip to content

Commit bbc6242

Browse files
committed
Provide SCIP external_symbols + fix symbols provided with Document
Before this change `SymbolInformation` provided by a document was the info for all encountered symbols that have not yet been emitted. So, the symbol information on a `Document` was a mishmash of symbols defined in the documents, symbols from other documents, and external symbols. After this change, the `SymbolInformation` on documents is just the locals and defined symbols from the document. All symbols referenced and not from emitted documents are included in `external_symbols`.
1 parent b1c091c commit bbc6242

File tree

1 file changed

+146
-58
lines changed
  • src/tools/rust-analyzer/crates/rust-analyzer/src/cli

1 file changed

+146
-58
lines changed

src/tools/rust-analyzer/crates/rust-analyzer/src/cli/scip.rs

Lines changed: 146 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@ use std::{path::PathBuf, time::Instant};
44

55
use ide::{
66
AnalysisHost, LineCol, Moniker, MonikerDescriptorKind, MonikerIdentifier, MonikerResult,
7-
StaticIndex, StaticIndexedFile, SymbolInformationKind, TextRange, TokenId, TokenStaticData,
8-
VendoredLibrariesConfig,
7+
RootDatabase, StaticIndex, StaticIndexedFile, SymbolInformationKind, TextRange, TokenId,
8+
TokenStaticData, VendoredLibrariesConfig,
99
};
1010
use ide_db::LineIndexDatabase;
1111
use load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
1212
use rustc_hash::{FxHashMap, FxHashSet};
13-
use scip::types as scip_types;
13+
use scip::types::{self as scip_types, SymbolInformation};
1414
use tracing::error;
15+
use vfs::FileId;
1516

1617
use crate::{
1718
cli::flags,
@@ -84,26 +85,40 @@ impl flags::Scip {
8485
text_document_encoding: scip_types::TextEncoding::UTF8.into(),
8586
special_fields: Default::default(),
8687
};
88+
8789
let mut documents = Vec::new();
8890

91+
// All TokenIds where an Occurrence has been emitted that references a symbol.
92+
let mut token_ids_referenced: FxHashSet<TokenId> = FxHashSet::default();
93+
// All TokenIds where the SymbolInformation has been written to the document.
8994
let mut token_ids_emitted: FxHashSet<TokenId> = FxHashSet::default();
90-
let mut global_symbols_emitted: FxHashSet<String> = FxHashSet::default();
91-
let mut duplicate_symbols: Vec<(String, String)> = Vec::new();
95+
// All FileIds emitted as documents.
96+
let mut file_ids_emitted: FxHashSet<FileId> = FxHashSet::default();
97+
98+
// All non-local symbols encountered, for detecting duplicate symbol errors.
99+
let mut nonlocal_symbols_emitted: FxHashSet<String> = FxHashSet::default();
100+
// List of (source_location, symbol) for duplicate symbol errors to report.
101+
let mut duplicate_symbol_errors: Vec<(String, String)> = Vec::new();
102+
// This is called after definitions have been deduplicated by token_ids_emitted. The purpose
103+
// is to detect reuse of symbol names because this causes ambiguity about their meaning.
104+
let mut record_error_if_symbol_already_used =
105+
|symbol: String, relative_path: &str, line_index: &LineIndex, text_range: TextRange| {
106+
let is_local = symbol.starts_with("local ");
107+
if !is_local && !nonlocal_symbols_emitted.insert(symbol.clone()) {
108+
let source_location =
109+
text_range_to_string(relative_path, line_index, text_range);
110+
duplicate_symbol_errors.push((source_location, symbol));
111+
}
112+
};
113+
114+
// Generates symbols from token monikers.
92115
let mut symbol_generator = SymbolGenerator::new();
93116

94117
for StaticIndexedFile { file_id, tokens, .. } in si.files {
95118
symbol_generator.clear_document_local_state();
96119

97-
let relative_path = match get_relative_filepath(&vfs, &root, file_id) {
98-
Some(relative_path) => relative_path,
99-
None => continue,
100-
};
101-
102-
let line_index = LineIndex {
103-
index: db.line_index(file_id),
104-
encoding: PositionEncoding::Utf8,
105-
endings: LineEndings::Unix,
106-
};
120+
let Some(relative_path) = get_relative_filepath(&vfs, &root, file_id) else { continue };
121+
let line_index = get_line_index(db, file_id);
107122

108123
let mut occurrences = Vec::new();
109124
let mut symbols = Vec::new();
@@ -120,54 +135,45 @@ impl flags::Scip {
120135
("".to_owned(), None)
121136
};
122137

123-
if !symbol.is_empty() && token_ids_emitted.insert(id) {
124-
if !symbol.starts_with("local ")
125-
&& !global_symbols_emitted.insert(symbol.clone())
126-
{
127-
let source_location =
128-
text_range_to_string(relative_path.as_str(), &line_index, text_range);
129-
duplicate_symbols.push((source_location, symbol.clone()));
138+
if !symbol.is_empty() {
139+
let is_defined_in_this_document = match token.definition {
140+
Some(def) => def.file_id == file_id,
141+
_ => false,
142+
};
143+
if is_defined_in_this_document {
144+
if token_ids_emitted.insert(id) {
145+
// token_ids_emitted does deduplication. This checks that this results
146+
// in unique emitted symbols, as otherwise references are ambiguous.
147+
record_error_if_symbol_already_used(
148+
symbol.clone(),
149+
relative_path.as_str(),
150+
&line_index,
151+
text_range,
152+
);
153+
symbols.push(compute_symbol_info(
154+
relative_path.clone(),
155+
symbol.clone(),
156+
enclosing_symbol,
157+
token,
158+
));
159+
}
130160
} else {
131-
let documentation = match &token.documentation {
132-
Some(doc) => vec![doc.as_str().to_owned()],
133-
None => vec![],
134-
};
135-
136-
let position_encoding =
137-
scip_types::PositionEncoding::UTF8CodeUnitOffsetFromLineStart.into();
138-
let signature_documentation =
139-
token.signature.clone().map(|text| scip_types::Document {
140-
relative_path: relative_path.clone(),
141-
language: "rust".to_owned(),
142-
text,
143-
position_encoding,
144-
..Default::default()
145-
});
146-
let symbol_info = scip_types::SymbolInformation {
147-
symbol: symbol.clone(),
148-
documentation,
149-
relationships: Vec::new(),
150-
special_fields: Default::default(),
151-
kind: symbol_kind(token.kind).into(),
152-
display_name: token.display_name.clone().unwrap_or_default(),
153-
signature_documentation: signature_documentation.into(),
154-
enclosing_symbol: enclosing_symbol.unwrap_or_default(),
155-
};
156-
157-
symbols.push(symbol_info)
161+
token_ids_referenced.insert(id);
158162
}
159163
}
160164

161165
// If the range of the def and the range of the token are the same, this must be the definition.
162166
// they also must be in the same file. See https://github.com/rust-lang/rust-analyzer/pull/17988
163-
let mut symbol_roles = Default::default();
164-
match token.definition {
165-
Some(def) if def.file_id == file_id && def.range == text_range => {
166-
symbol_roles |= scip_types::SymbolRole::Definition as i32;
167-
}
168-
_ => {}
167+
let is_definition = match token.definition {
168+
Some(def) => def.file_id == file_id && def.range == text_range,
169+
_ => false,
169170
};
170171

172+
let mut symbol_roles = Default::default();
173+
if is_definition {
174+
symbol_roles |= scip_types::SymbolRole::Definition as i32;
175+
}
176+
171177
occurrences.push(scip_types::Occurrence {
172178
range: text_range_to_scip_range(&line_index, text_range),
173179
symbol,
@@ -195,18 +201,61 @@ impl flags::Scip {
195201
position_encoding,
196202
special_fields: Default::default(),
197203
});
204+
if !file_ids_emitted.insert(file_id) {
205+
panic!("Invariant violation: file emitted multiple times.");
206+
}
207+
}
208+
209+
// Collect all symbols referenced by the files but not defined within them.
210+
let mut external_symbols = Vec::new();
211+
for id in token_ids_referenced.difference(&token_ids_emitted) {
212+
let id = *id;
213+
let token = si.tokens.get(id).unwrap();
214+
215+
let Some(definition) = token.definition else {
216+
break;
217+
};
218+
219+
let file_id = definition.file_id;
220+
let Some(relative_path) = get_relative_filepath(&vfs, &root, file_id) else { continue };
221+
let line_index = get_line_index(db, file_id);
222+
let text_range = definition.range;
223+
if file_ids_emitted.contains(&file_id) {
224+
tracing::error!(
225+
"Bug: definition at {} should have been in an SCIP document but was not.",
226+
text_range_to_string(relative_path.as_str(), &line_index, text_range)
227+
);
228+
continue;
229+
}
230+
231+
let TokenSymbols { symbol, enclosing_symbol } = symbol_generator
232+
.token_symbols(id, token)
233+
.expect("To have been referenced, the symbol must be in the cache.");
234+
235+
record_error_if_symbol_already_used(
236+
symbol.clone(),
237+
relative_path.as_str(),
238+
&line_index,
239+
text_range,
240+
);
241+
external_symbols.push(compute_symbol_info(
242+
relative_path.clone(),
243+
symbol.clone(),
244+
enclosing_symbol,
245+
token,
246+
));
198247
}
199248

200249
let index = scip_types::Index {
201250
metadata: Some(metadata).into(),
202251
documents,
203-
external_symbols: Vec::new(),
252+
external_symbols,
204253
special_fields: Default::default(),
205254
};
206255

207-
if !duplicate_symbols.is_empty() {
256+
if !duplicate_symbol_errors.is_empty() {
208257
eprintln!("{}", DUPLICATE_SYMBOLS_MESSAGE);
209-
for (source_location, symbol) in duplicate_symbols {
258+
for (source_location, symbol) in duplicate_symbol_errors {
210259
eprintln!("{}", source_location);
211260
eprintln!(" Duplicate symbol: {}", symbol);
212261
eprintln!();
@@ -239,6 +288,37 @@ Known cases that can cause this:
239288
Duplicate symbols encountered:
240289
";
241290

291+
fn compute_symbol_info(
292+
relative_path: String,
293+
symbol: String,
294+
enclosing_symbol: Option<String>,
295+
token: &TokenStaticData,
296+
) -> SymbolInformation {
297+
let documentation = match &token.documentation {
298+
Some(doc) => vec![doc.as_str().to_owned()],
299+
None => vec![],
300+
};
301+
302+
let position_encoding = scip_types::PositionEncoding::UTF8CodeUnitOffsetFromLineStart.into();
303+
let signature_documentation = token.signature.clone().map(|text| scip_types::Document {
304+
relative_path,
305+
language: "rust".to_owned(),
306+
text,
307+
position_encoding,
308+
..Default::default()
309+
});
310+
scip_types::SymbolInformation {
311+
symbol,
312+
documentation,
313+
relationships: Vec::new(),
314+
special_fields: Default::default(),
315+
kind: symbol_kind(token.kind).into(),
316+
display_name: token.display_name.clone().unwrap_or_default(),
317+
signature_documentation: signature_documentation.into(),
318+
enclosing_symbol: enclosing_symbol.unwrap_or_default(),
319+
}
320+
}
321+
242322
fn get_relative_filepath(
243323
vfs: &vfs::Vfs,
244324
rootpath: &vfs::AbsPathBuf,
@@ -247,6 +327,14 @@ fn get_relative_filepath(
247327
Some(vfs.file_path(file_id).as_path()?.strip_prefix(rootpath)?.as_str().to_owned())
248328
}
249329

330+
fn get_line_index(db: &RootDatabase, file_id: FileId) -> LineIndex {
331+
LineIndex {
332+
index: db.line_index(file_id),
333+
encoding: PositionEncoding::Utf8,
334+
endings: LineEndings::Unix,
335+
}
336+
}
337+
250338
// SCIP Ranges have a (very large) optimization that ranges if they are on the same line
251339
// only encode as a vector of [start_line, start_col, end_col].
252340
//

0 commit comments

Comments
 (0)