@@ -4,14 +4,15 @@ use std::{path::PathBuf, time::Instant};
4
4
5
5
use ide:: {
6
6
AnalysisHost , LineCol , Moniker , MonikerDescriptorKind , MonikerIdentifier , MonikerResult ,
7
- StaticIndex , StaticIndexedFile , SymbolInformationKind , TextRange , TokenId , TokenStaticData ,
8
- VendoredLibrariesConfig ,
7
+ RootDatabase , StaticIndex , StaticIndexedFile , SymbolInformationKind , TextRange , TokenId ,
8
+ TokenStaticData , VendoredLibrariesConfig ,
9
9
} ;
10
10
use ide_db:: LineIndexDatabase ;
11
11
use load_cargo:: { load_workspace_at, LoadCargoConfig , ProcMacroServerChoice } ;
12
12
use rustc_hash:: { FxHashMap , FxHashSet } ;
13
- use scip:: types as scip_types;
13
+ use scip:: types:: { self as scip_types, SymbolInformation } ;
14
14
use tracing:: error;
15
+ use vfs:: FileId ;
15
16
16
17
use crate :: {
17
18
cli:: flags,
@@ -84,26 +85,40 @@ impl flags::Scip {
84
85
text_document_encoding : scip_types:: TextEncoding :: UTF8 . into ( ) ,
85
86
special_fields : Default :: default ( ) ,
86
87
} ;
88
+
87
89
let mut documents = Vec :: new ( ) ;
88
90
91
+ // All TokenIds where an Occurrence has been emitted that references a symbol.
92
+ let mut token_ids_referenced: FxHashSet < TokenId > = FxHashSet :: default ( ) ;
93
+ // All TokenIds where the SymbolInformation has been written to the document.
89
94
let mut token_ids_emitted: FxHashSet < TokenId > = FxHashSet :: default ( ) ;
90
- let mut global_symbols_emitted: FxHashSet < String > = FxHashSet :: default ( ) ;
91
- let mut duplicate_symbols: Vec < ( String , String ) > = Vec :: new ( ) ;
95
+ // All FileIds emitted as documents.
96
+ let mut file_ids_emitted: FxHashSet < FileId > = FxHashSet :: default ( ) ;
97
+
98
+ // All non-local symbols encountered, for detecting duplicate symbol errors.
99
+ let mut nonlocal_symbols_emitted: FxHashSet < String > = FxHashSet :: default ( ) ;
100
+ // List of (source_location, symbol) for duplicate symbol errors to report.
101
+ let mut duplicate_symbol_errors: Vec < ( String , String ) > = Vec :: new ( ) ;
102
+ // This is called after definitions have been deduplicated by token_ids_emitted. The purpose
103
+ // is to detect reuse of symbol names because this causes ambiguity about their meaning.
104
+ let mut record_error_if_symbol_already_used =
105
+ |symbol : String , relative_path : & str , line_index : & LineIndex , text_range : TextRange | {
106
+ let is_local = symbol. starts_with ( "local " ) ;
107
+ if !is_local && !nonlocal_symbols_emitted. insert ( symbol. clone ( ) ) {
108
+ let source_location =
109
+ text_range_to_string ( relative_path, line_index, text_range) ;
110
+ duplicate_symbol_errors. push ( ( source_location, symbol) ) ;
111
+ }
112
+ } ;
113
+
114
+ // Generates symbols from token monikers.
92
115
let mut symbol_generator = SymbolGenerator :: new ( ) ;
93
116
94
117
for StaticIndexedFile { file_id, tokens, .. } in si. files {
95
118
symbol_generator. clear_document_local_state ( ) ;
96
119
97
- let relative_path = match get_relative_filepath ( & vfs, & root, file_id) {
98
- Some ( relative_path) => relative_path,
99
- None => continue ,
100
- } ;
101
-
102
- let line_index = LineIndex {
103
- index : db. line_index ( file_id) ,
104
- encoding : PositionEncoding :: Utf8 ,
105
- endings : LineEndings :: Unix ,
106
- } ;
120
+ let Some ( relative_path) = get_relative_filepath ( & vfs, & root, file_id) else { continue } ;
121
+ let line_index = get_line_index ( db, file_id) ;
107
122
108
123
let mut occurrences = Vec :: new ( ) ;
109
124
let mut symbols = Vec :: new ( ) ;
@@ -120,54 +135,45 @@ impl flags::Scip {
120
135
( "" . to_owned ( ) , None )
121
136
} ;
122
137
123
- if !symbol. is_empty ( ) && token_ids_emitted. insert ( id) {
124
- if !symbol. starts_with ( "local " )
125
- && !global_symbols_emitted. insert ( symbol. clone ( ) )
126
- {
127
- let source_location =
128
- text_range_to_string ( relative_path. as_str ( ) , & line_index, text_range) ;
129
- duplicate_symbols. push ( ( source_location, symbol. clone ( ) ) ) ;
138
+ if !symbol. is_empty ( ) {
139
+ let is_defined_in_this_document = match token. definition {
140
+ Some ( def) => def. file_id == file_id,
141
+ _ => false ,
142
+ } ;
143
+ if is_defined_in_this_document {
144
+ if token_ids_emitted. insert ( id) {
145
+ // token_ids_emitted does deduplication. This checks that this results
146
+ // in unique emitted symbols, as otherwise references are ambiguous.
147
+ record_error_if_symbol_already_used (
148
+ symbol. clone ( ) ,
149
+ relative_path. as_str ( ) ,
150
+ & line_index,
151
+ text_range,
152
+ ) ;
153
+ symbols. push ( compute_symbol_info (
154
+ relative_path. clone ( ) ,
155
+ symbol. clone ( ) ,
156
+ enclosing_symbol,
157
+ token,
158
+ ) ) ;
159
+ }
130
160
} else {
131
- let documentation = match & token. documentation {
132
- Some ( doc) => vec ! [ doc. as_str( ) . to_owned( ) ] ,
133
- None => vec ! [ ] ,
134
- } ;
135
-
136
- let position_encoding =
137
- scip_types:: PositionEncoding :: UTF8CodeUnitOffsetFromLineStart . into ( ) ;
138
- let signature_documentation =
139
- token. signature . clone ( ) . map ( |text| scip_types:: Document {
140
- relative_path : relative_path. clone ( ) ,
141
- language : "rust" . to_owned ( ) ,
142
- text,
143
- position_encoding,
144
- ..Default :: default ( )
145
- } ) ;
146
- let symbol_info = scip_types:: SymbolInformation {
147
- symbol : symbol. clone ( ) ,
148
- documentation,
149
- relationships : Vec :: new ( ) ,
150
- special_fields : Default :: default ( ) ,
151
- kind : symbol_kind ( token. kind ) . into ( ) ,
152
- display_name : token. display_name . clone ( ) . unwrap_or_default ( ) ,
153
- signature_documentation : signature_documentation. into ( ) ,
154
- enclosing_symbol : enclosing_symbol. unwrap_or_default ( ) ,
155
- } ;
156
-
157
- symbols. push ( symbol_info)
161
+ token_ids_referenced. insert ( id) ;
158
162
}
159
163
}
160
164
161
165
// If the range of the def and the range of the token are the same, this must be the definition.
162
166
// they also must be in the same file. See https://github.com/rust-lang/rust-analyzer/pull/17988
163
- let mut symbol_roles = Default :: default ( ) ;
164
- match token. definition {
165
- Some ( def) if def. file_id == file_id && def. range == text_range => {
166
- symbol_roles |= scip_types:: SymbolRole :: Definition as i32 ;
167
- }
168
- _ => { }
167
+ let is_definition = match token. definition {
168
+ Some ( def) => def. file_id == file_id && def. range == text_range,
169
+ _ => false ,
169
170
} ;
170
171
172
+ let mut symbol_roles = Default :: default ( ) ;
173
+ if is_definition {
174
+ symbol_roles |= scip_types:: SymbolRole :: Definition as i32 ;
175
+ }
176
+
171
177
occurrences. push ( scip_types:: Occurrence {
172
178
range : text_range_to_scip_range ( & line_index, text_range) ,
173
179
symbol,
@@ -195,18 +201,61 @@ impl flags::Scip {
195
201
position_encoding,
196
202
special_fields : Default :: default ( ) ,
197
203
} ) ;
204
+ if !file_ids_emitted. insert ( file_id) {
205
+ panic ! ( "Invariant violation: file emitted multiple times." ) ;
206
+ }
207
+ }
208
+
209
+ // Collect all symbols referenced by the files but not defined within them.
210
+ let mut external_symbols = Vec :: new ( ) ;
211
+ for id in token_ids_referenced. difference ( & token_ids_emitted) {
212
+ let id = * id;
213
+ let token = si. tokens . get ( id) . unwrap ( ) ;
214
+
215
+ let Some ( definition) = token. definition else {
216
+ break ;
217
+ } ;
218
+
219
+ let file_id = definition. file_id ;
220
+ let Some ( relative_path) = get_relative_filepath ( & vfs, & root, file_id) else { continue } ;
221
+ let line_index = get_line_index ( db, file_id) ;
222
+ let text_range = definition. range ;
223
+ if file_ids_emitted. contains ( & file_id) {
224
+ tracing:: error!(
225
+ "Bug: definition at {} should have been in an SCIP document but was not." ,
226
+ text_range_to_string( relative_path. as_str( ) , & line_index, text_range)
227
+ ) ;
228
+ continue ;
229
+ }
230
+
231
+ let TokenSymbols { symbol, enclosing_symbol } = symbol_generator
232
+ . token_symbols ( id, token)
233
+ . expect ( "To have been referenced, the symbol must be in the cache." ) ;
234
+
235
+ record_error_if_symbol_already_used (
236
+ symbol. clone ( ) ,
237
+ relative_path. as_str ( ) ,
238
+ & line_index,
239
+ text_range,
240
+ ) ;
241
+ external_symbols. push ( compute_symbol_info (
242
+ relative_path. clone ( ) ,
243
+ symbol. clone ( ) ,
244
+ enclosing_symbol,
245
+ token,
246
+ ) ) ;
198
247
}
199
248
200
249
let index = scip_types:: Index {
201
250
metadata : Some ( metadata) . into ( ) ,
202
251
documents,
203
- external_symbols : Vec :: new ( ) ,
252
+ external_symbols,
204
253
special_fields : Default :: default ( ) ,
205
254
} ;
206
255
207
- if !duplicate_symbols . is_empty ( ) {
256
+ if !duplicate_symbol_errors . is_empty ( ) {
208
257
eprintln ! ( "{}" , DUPLICATE_SYMBOLS_MESSAGE ) ;
209
- for ( source_location, symbol) in duplicate_symbols {
258
+ for ( source_location, symbol) in duplicate_symbol_errors {
210
259
eprintln ! ( "{}" , source_location) ;
211
260
eprintln ! ( " Duplicate symbol: {}" , symbol) ;
212
261
eprintln ! ( ) ;
@@ -239,6 +288,37 @@ Known cases that can cause this:
239
288
Duplicate symbols encountered:
240
289
" ;
241
290
291
+ fn compute_symbol_info (
292
+ relative_path : String ,
293
+ symbol : String ,
294
+ enclosing_symbol : Option < String > ,
295
+ token : & TokenStaticData ,
296
+ ) -> SymbolInformation {
297
+ let documentation = match & token. documentation {
298
+ Some ( doc) => vec ! [ doc. as_str( ) . to_owned( ) ] ,
299
+ None => vec ! [ ] ,
300
+ } ;
301
+
302
+ let position_encoding = scip_types:: PositionEncoding :: UTF8CodeUnitOffsetFromLineStart . into ( ) ;
303
+ let signature_documentation = token. signature . clone ( ) . map ( |text| scip_types:: Document {
304
+ relative_path,
305
+ language : "rust" . to_owned ( ) ,
306
+ text,
307
+ position_encoding,
308
+ ..Default :: default ( )
309
+ } ) ;
310
+ scip_types:: SymbolInformation {
311
+ symbol,
312
+ documentation,
313
+ relationships : Vec :: new ( ) ,
314
+ special_fields : Default :: default ( ) ,
315
+ kind : symbol_kind ( token. kind ) . into ( ) ,
316
+ display_name : token. display_name . clone ( ) . unwrap_or_default ( ) ,
317
+ signature_documentation : signature_documentation. into ( ) ,
318
+ enclosing_symbol : enclosing_symbol. unwrap_or_default ( ) ,
319
+ }
320
+ }
321
+
242
322
fn get_relative_filepath (
243
323
vfs : & vfs:: Vfs ,
244
324
rootpath : & vfs:: AbsPathBuf ,
@@ -247,6 +327,14 @@ fn get_relative_filepath(
247
327
Some ( vfs. file_path ( file_id) . as_path ( ) ?. strip_prefix ( rootpath) ?. as_str ( ) . to_owned ( ) )
248
328
}
249
329
330
+ fn get_line_index ( db : & RootDatabase , file_id : FileId ) -> LineIndex {
331
+ LineIndex {
332
+ index : db. line_index ( file_id) ,
333
+ encoding : PositionEncoding :: Utf8 ,
334
+ endings : LineEndings :: Unix ,
335
+ }
336
+ }
337
+
250
338
// SCIP Ranges have a (very large) optimization that ranges if they are on the same line
251
339
// only encode as a vector of [start_line, start_col, end_col].
252
340
//
0 commit comments