15
15
-- of the license. --
16
16
-- ----------------------------------------------------------------------------
17
17
18
- with Ada.Exceptions ; use Ada.Exceptions;
19
- with Ada.Characters.Handling ;
20
18
with GNAT.Strings ; use GNAT.Strings;
21
- with GNATCOLL.Traces ; use GNATCOLL.Traces;
22
19
20
+ with GNATCOLL.Traces ; use GNATCOLL.Traces;
23
21
with GNATCOLL.VFS ; use GNATCOLL.VFS;
24
- with GNATCOLL.Iconv ; use GNATCOLL.Iconv;
22
+
25
23
with VSS.Strings ; use VSS.Strings;
24
+ pragma Warnings
25
+ (Off, " unit "" VSS.Strings.Character_Iterators"" is not referenced" );
26
+ -- GNAT 20220919 report this package as unused, however it is necessary to
27
+ -- make visible full declaration of Character_Iterator.
28
+ with VSS.Strings.Character_Iterators ;
29
+ with VSS.Strings.Converters.Decoders ;
26
30
with VSS.Strings.Conversions ;
31
+
27
32
with LSP.Ada_Documents ; use LSP.Ada_Documents;
28
33
with Libadalang.Preprocessing ; use Libadalang.Preprocessing;
29
34
with Langkit_Support.File_Readers ; use Langkit_Support.File_Readers;
35
+ with Langkit_Support.Slocs ;
36
+ with Langkit_Support.Text ;
30
37
31
38
package body LSP.Ada_Handlers.File_Readers is
32
39
33
- Me : constant Trace_Handle := Create (" ALS.FILE_READERS" );
34
-
35
- function Read_And_Convert_To_UTF8
36
- (Filename : String; Charset : String)
37
- return GNAT.Strings.String_Access;
38
- -- Read the file content from Filename and convert it from the original
39
- -- Charset to UTF-8.
40
+ use all type VSS.Strings.Converters.Converter_Flag;
40
41
41
- -- ----------------------------
42
- -- Read_And_Convert_To_UTF8 --
43
- -- ----------------------------
42
+ Me : constant Trace_Handle := Create (" ALS.FILE_READERS" );
44
43
45
- function Read_And_Convert_To_UTF8
46
- (Filename : String; Charset : String)
47
- return GNAT.Strings.String_Access
44
+ procedure Read_And_Decode
45
+ (Filename : String;
46
+ Charset : VSS.Strings.Virtual_String;
47
+ Decoded : out VSS.Strings.Virtual_String;
48
+ Error : out VSS.Strings.Virtual_String);
49
+ -- Read the file content from Filename and decode it from the original
50
+ -- Charset.
51
+
52
+ Decoder_Flags : constant VSS.Strings.Converters.Converter_Flags :=
53
+ (Stateless => True,
54
+ -- Data is decoded as single chunk, don't save state but report error
55
+ -- for incomplete byte sequences at the end of data
56
+ Stop_On_Error => False,
57
+ -- Errors should be reported but not to stop decoding of the following
58
+ -- data
59
+ Process_BOM => True);
60
+ -- Byte-Order-Mark at the beginning of the data should be ignored if
61
+ -- present
62
+ -- Default flags for the text decoder.
63
+
64
+ -- -------------------
65
+ -- Read_And_Decode --
66
+ -- -------------------
67
+
68
+ procedure Read_And_Decode
69
+ (Filename : String;
70
+ Charset : VSS.Strings.Virtual_String;
71
+ Decoded : out VSS.Strings.Virtual_String;
72
+ Error : out VSS.Strings.Virtual_String)
48
73
is
49
- Raw : GNAT.Strings.String_Access;
50
- Decoded : GNAT.Strings.String_Access;
74
+ Raw : GNAT.Strings.String_Access;
75
+ Decoder : VSS.Strings.Converters.Decoders.Virtual_String_Decoder;
76
+
51
77
begin
52
78
-- Read the file (this call uses MMAP)
79
+
53
80
Raw := Create_From_UTF8 (Filename).Read_File;
54
81
55
82
if Raw = null then
56
- return null ;
83
+ Decoded.Clear;
84
+ Error := " Unable to read file" ;
85
+
86
+ return ;
57
87
end if ;
58
88
59
- -- Convert the file if it's not already encoded in utf-8
89
+ Decoder.Initialize (Charset, Decoder_Flags);
60
90
61
- if Ada.Characters.Handling.To_Lower (Charset) = " utf-8" then
62
- Decoded := Raw;
63
- else
64
- declare
65
- State : constant Iconv_T := Iconv_Open (UTF8, Charset);
66
- Outbuf : Byte_Sequence (1 .. 4096 );
67
- Input_Index : Positive := Raw'First;
68
- Conv_Result : Iconv_Result := Full_Buffer;
69
- Output_Index : Positive;
70
- begin
71
- while Conv_Result = Full_Buffer loop
72
- Output_Index := 1 ;
73
- Iconv (State => State,
74
- Inbuf => Raw.all ,
75
- Input_Index => Input_Index,
76
- Outbuf => Outbuf,
77
- Output_Index => Output_Index,
78
- Result => Conv_Result);
79
-
80
- -- Append the converted contents
81
- if Decoded /= null then
82
- declare
83
- Tmp : GNAT.Strings.String_Access := Decoded;
84
- begin
85
- Decoded := new String'
86
- (Tmp.all & Outbuf (1 .. Output_Index - 1 ));
87
- GNAT.Strings.Free (Tmp);
88
- end ;
89
- else
90
- Decoded := new String'(Outbuf (1 .. Output_Index - 1 ));
91
- end if ;
92
- end loop ;
93
-
94
- GNAT.Strings.Free (Raw);
95
- Iconv_Close (State);
96
-
97
- case Conv_Result is
98
- when Success =>
99
- -- The conversion was successful
100
- null ;
101
- when others =>
102
- Me.Trace
103
- (" Failed to convert '" & Filename & " ' to UTF-8: "
104
- & Conv_Result'Img);
105
- return null ;
106
- end case ;
107
- exception
108
- when E : others =>
109
-
110
- Me.Trace
111
- (" Exception caught when reading '" & Filename & " ':"
112
- & Exception_Message (E));
113
- return null ;
114
- end ;
91
+ if not Decoder.Is_Valid then
92
+ -- Charset is not supported, fallback to "utf-8".
93
+
94
+ Me.Trace
95
+ (" Encoding '"
96
+ & VSS.Strings.Conversions.To_UTF_8_String (Charset)
97
+ & " ' is not supported by text decoder." );
98
+
99
+ Decoder.Initialize (" utf-8" , Decoder_Flags);
115
100
end if ;
116
101
117
- -- Convert the string to a Virtual_String for easier handling
102
+ pragma Assert (Decoder.Is_Valid);
103
+ -- At this point decoder is initialized to decode ether given encoding
104
+ -- or fallback encoding "utf-8", which is known to be supported.
118
105
119
- return Decoded;
120
- exception
121
- when E : others =>
122
- if Decoded /= null then
123
- GNAT.Strings.Free (Decoded);
124
- end if ;
106
+ declare
107
+ Encoded : constant Ada.Streams.Stream_Element_Array (1 .. Raw'Length)
108
+ with Import, Address => Raw.all 'Address;
125
109
126
- Me.Trace
127
- (" Exception caught when reading '" & Filename & " ':"
128
- & Exception_Message (E));
110
+ begin
111
+ Decoded := Decoder.Decode (Encoded);
112
+ Error := Decoder.Error_Message;
113
+ end ;
129
114
130
- return null ;
131
- end Read_And_Convert_To_UTF8 ;
115
+ GNAT.Strings.Free (Raw) ;
116
+ end Read_And_Decode ;
132
117
133
118
-- --------
134
119
-- Read --
@@ -143,65 +128,100 @@ package body LSP.Ada_Handlers.File_Readers is
143
128
Diagnostics : in out
144
129
Langkit_Support.Diagnostics.Diagnostics_Vectors.Vector)
145
130
is
146
- Doc : Document_Access;
147
- Source : Preprocessed_Source := Preprocessed_Source'
148
- (Buffer => null , Last => 0 ) ;
149
- Buffer : GNAT.Strings.String_Access;
131
+ Doc : Document_Access;
132
+ Text : VSS.Strings.Virtual_String;
133
+ Error : VSS.Strings.Virtual_String ;
134
+
150
135
begin
151
136
-- First check if the file is an open document
137
+
152
138
Doc := Self.Handler.Get_Open_Document
153
139
(URI => LSP.Types.File_To_URI (Filename),
154
140
Force => False);
155
141
156
142
-- Preprocess the document's contents if open, or the file contents if
157
143
-- not.
144
+
158
145
if Doc /= null then
159
- Buffer := new String'
160
- (VSS.Strings.Conversions.To_UTF_8_String (Doc.Text));
161
- else
162
- Buffer := Read_And_Convert_To_UTF8 (Filename, Charset);
146
+ Text := Doc.Text;
163
147
164
- -- Return an empty sring when failing to read the file (i.e: when the
165
- -- file has been deleted).
166
- if Buffer = null then
167
- Buffer := new String'(" " );
148
+ else
149
+ Read_And_Decode
150
+ (Filename => Filename,
151
+ Charset => VSS.Strings.Conversions.To_Virtual_String (Charset),
152
+ Decoded => Text,
153
+ Error => Error);
154
+
155
+ if not Error.Is_Empty then
156
+ Diagnostics.Append
157
+ (Langkit_Support.Diagnostics.Diagnostic'
158
+ (Langkit_Support.Slocs.No_Source_Location_Range,
159
+ VSS.Strings.Conversions.To_Unbounded_Wide_Wide_String
160
+ (Error)));
168
161
end if ;
169
162
end if ;
170
163
171
164
-- If we have preprocessing data, use LAL's API to preoprocess the file.
172
165
-- Otherwise, just decode the contents of the document/file.
173
166
174
167
if Self.Preprocessing_Data /= No_Preprocessor_Data then
175
- Libadalang.Preprocessing.Preprocess
176
- (Data => Self.Preprocessing_Data,
177
- Filename => Filename,
178
- Input => Buffer.all ,
179
- Contents => Source,
180
- Diagnostics => Diagnostics);
181
-
182
- if Source.Buffer = null then
183
- -- Log the diagnostics when processing has failed
184
- for Diag of Diagnostics loop
185
- Me.Trace (Langkit_Support.Diagnostics.To_Pretty_String (Diag));
186
- end loop ;
187
- end if ;
188
- end if ;
168
+ declare
169
+ Buffer : GNAT.Strings.String_Access :=
170
+ new String
171
+ (1 .. Integer (Text.After_Last_Character.First_UTF8_Offset));
172
+ -- Size of the "utf-8" encoded data for text is known, so
173
+ -- allocate necessary space and fill it later. Allocation on the
174
+ -- stack can't be use here due to potential stack overflow.
175
+ Source : Preprocessed_Source := Preprocessed_Source'
176
+ (Buffer => null , Last => 0 );
189
177
190
- -- Decode the preprocessed buffer (or the initial contents when there is
191
- -- no preprocessing needed) in utf-8.
192
-
193
- Decode_Buffer
194
- (Buffer => (if Source.Buffer /= null then
195
- Source.Buffer (1 .. Source.Last)
196
- else
197
- Buffer.all ),
198
- Charset => " utf-8" ,
199
- Read_BOM => Read_BOM,
200
- Contents => Contents,
201
- Diagnostics => Diagnostics);
202
-
203
- Free (Source);
204
- GNAT.Strings.Free (Buffer);
178
+ begin
179
+ VSS.Strings.Conversions.Set_UTF_8_String (Text, Buffer.all );
180
+
181
+ Libadalang.Preprocessing.Preprocess
182
+ (Data => Self.Preprocessing_Data,
183
+ Filename => Filename,
184
+ Input => Buffer.all ,
185
+ Contents => Source,
186
+ Diagnostics => Diagnostics);
187
+
188
+ if Source.Buffer = null then
189
+ -- Log the diagnostics when processing has failed
190
+
191
+ for Diag of Diagnostics loop
192
+ Me.Trace
193
+ (Langkit_Support.Diagnostics.To_Pretty_String (Diag));
194
+ end loop ;
195
+ end if ;
196
+
197
+ -- Decode the preprocessed buffer (or the initial contents when
198
+ -- there is no preprocessing needed) in utf-8.
199
+
200
+ Decode_Buffer
201
+ (Buffer => (if Source.Buffer /= null then
202
+ Source.Buffer (1 .. Source.Last)
203
+ else
204
+ Buffer.all ),
205
+ Charset => " utf-8" ,
206
+ Read_BOM => Read_BOM,
207
+ Contents => Contents,
208
+ Diagnostics => Diagnostics);
209
+
210
+ Free (Source);
211
+ GNAT.Strings.Free (Buffer);
212
+ end ;
213
+
214
+ else
215
+ Contents :=
216
+ (Buffer =>
217
+ new Langkit_Support.Text.Text_Type
218
+ (1 .. Natural (Text.Character_Length)),
219
+ First => 1 ,
220
+ Last => Natural (Text.Character_Length));
221
+
222
+ VSS.Strings.Conversions.Set_Wide_Wide_String
223
+ (Text, Contents.Buffer.all );
224
+ end if ;
205
225
end Read ;
206
226
207
227
end LSP.Ada_Handlers.File_Readers ;
0 commit comments