Skip to content

Commit a92bd66

Browse files
author
mergerepo
committed
Merge remote branch 'origin/master' into edge
(no-precommit-check no-tn-check)
2 parents 2687cb9 + 892e384 commit a92bd66

File tree

1 file changed

+148
-128
lines changed

1 file changed

+148
-128
lines changed

source/ada/lsp-ada_handlers-file_readers.adb

Lines changed: 148 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -15,120 +15,105 @@
1515
-- of the license. --
1616
------------------------------------------------------------------------------
1717

18-
with Ada.Exceptions; use Ada.Exceptions;
19-
with Ada.Characters.Handling;
2018
with GNAT.Strings; use GNAT.Strings;
21-
with GNATCOLL.Traces; use GNATCOLL.Traces;
2219

20+
with GNATCOLL.Traces; use GNATCOLL.Traces;
2321
with GNATCOLL.VFS; use GNATCOLL.VFS;
24-
with GNATCOLL.Iconv; use GNATCOLL.Iconv;
22+
2523
with VSS.Strings; use VSS.Strings;
24+
pragma Warnings
25+
(Off, "unit ""VSS.Strings.Character_Iterators"" is not referenced");
26+
-- GNAT 20220919 report this package as unused, however it is necessary to
27+
-- make visible full declaration of Character_Iterator.
28+
with VSS.Strings.Character_Iterators;
29+
with VSS.Strings.Converters.Decoders;
2630
with VSS.Strings.Conversions;
31+
2732
with LSP.Ada_Documents; use LSP.Ada_Documents;
2833
with Libadalang.Preprocessing; use Libadalang.Preprocessing;
2934
with Langkit_Support.File_Readers; use Langkit_Support.File_Readers;
35+
with Langkit_Support.Slocs;
36+
with Langkit_Support.Text;
3037

3138
package body LSP.Ada_Handlers.File_Readers is
3239

33-
Me : constant Trace_Handle := Create ("ALS.FILE_READERS");
34-
35-
function Read_And_Convert_To_UTF8
36-
(Filename : String; Charset : String)
37-
return GNAT.Strings.String_Access;
38-
-- Read the file content from Filename and convert it from the original
39-
-- Charset to UTF-8.
40+
use all type VSS.Strings.Converters.Converter_Flag;
4041

41-
------------------------------
42-
-- Read_And_Convert_To_UTF8 --
43-
------------------------------
42+
Me : constant Trace_Handle := Create ("ALS.FILE_READERS");
4443

45-
function Read_And_Convert_To_UTF8
46-
(Filename : String; Charset : String)
47-
return GNAT.Strings.String_Access
44+
procedure Read_And_Decode
45+
(Filename : String;
46+
Charset : VSS.Strings.Virtual_String;
47+
Decoded : out VSS.Strings.Virtual_String;
48+
Error : out VSS.Strings.Virtual_String);
49+
-- Read the file content from Filename and decode it from the original
50+
-- Charset.
51+
52+
Decoder_Flags : constant VSS.Strings.Converters.Converter_Flags :=
53+
(Stateless => True,
54+
-- Data is decoded as single chunk, don't save state but report error
55+
-- for incomplete byte sequences at the end of data
56+
Stop_On_Error => False,
57+
-- Errors should be reported but not to stop decoding of the following
58+
-- data
59+
Process_BOM => True);
60+
-- Byte-Order-Mark at the beginning of the data should be ignored if
61+
-- present
62+
-- Default flags for the text decoder.
63+
64+
---------------------
65+
-- Read_And_Decode --
66+
---------------------
67+
68+
procedure Read_And_Decode
69+
(Filename : String;
70+
Charset : VSS.Strings.Virtual_String;
71+
Decoded : out VSS.Strings.Virtual_String;
72+
Error : out VSS.Strings.Virtual_String)
4873
is
49-
Raw : GNAT.Strings.String_Access;
50-
Decoded : GNAT.Strings.String_Access;
74+
Raw : GNAT.Strings.String_Access;
75+
Decoder : VSS.Strings.Converters.Decoders.Virtual_String_Decoder;
76+
5177
begin
5278
-- Read the file (this call uses MMAP)
79+
5380
Raw := Create_From_UTF8 (Filename).Read_File;
5481

5582
if Raw = null then
56-
return null;
83+
Decoded.Clear;
84+
Error := "Unable to read file";
85+
86+
return;
5787
end if;
5888

59-
-- Convert the file if it's not already encoded in utf-8
89+
Decoder.Initialize (Charset, Decoder_Flags);
6090

61-
if Ada.Characters.Handling.To_Lower (Charset) = "utf-8" then
62-
Decoded := Raw;
63-
else
64-
declare
65-
State : constant Iconv_T := Iconv_Open (UTF8, Charset);
66-
Outbuf : Byte_Sequence (1 .. 4096);
67-
Input_Index : Positive := Raw'First;
68-
Conv_Result : Iconv_Result := Full_Buffer;
69-
Output_Index : Positive;
70-
begin
71-
while Conv_Result = Full_Buffer loop
72-
Output_Index := 1;
73-
Iconv (State => State,
74-
Inbuf => Raw.all,
75-
Input_Index => Input_Index,
76-
Outbuf => Outbuf,
77-
Output_Index => Output_Index,
78-
Result => Conv_Result);
79-
80-
-- Append the converted contents
81-
if Decoded /= null then
82-
declare
83-
Tmp : GNAT.Strings.String_Access := Decoded;
84-
begin
85-
Decoded := new String'
86-
(Tmp.all & Outbuf (1 .. Output_Index - 1));
87-
GNAT.Strings.Free (Tmp);
88-
end;
89-
else
90-
Decoded := new String'(Outbuf (1 .. Output_Index - 1));
91-
end if;
92-
end loop;
93-
94-
GNAT.Strings.Free (Raw);
95-
Iconv_Close (State);
96-
97-
case Conv_Result is
98-
when Success =>
99-
-- The conversion was successful
100-
null;
101-
when others =>
102-
Me.Trace
103-
("Failed to convert '" & Filename & "' to UTF-8: "
104-
& Conv_Result'Img);
105-
return null;
106-
end case;
107-
exception
108-
when E : others =>
109-
110-
Me.Trace
111-
("Exception caught when reading '" & Filename & "':"
112-
& Exception_Message (E));
113-
return null;
114-
end;
91+
if not Decoder.Is_Valid then
92+
-- Charset is not supported, fallback to "utf-8".
93+
94+
Me.Trace
95+
("Encoding '"
96+
& VSS.Strings.Conversions.To_UTF_8_String (Charset)
97+
& "' is not supported by text decoder.");
98+
99+
Decoder.Initialize ("utf-8", Decoder_Flags);
115100
end if;
116101

117-
-- Convert the string to a Virtual_String for easier handling
102+
pragma Assert (Decoder.Is_Valid);
103+
-- At this point decoder is initialized to decode ether given encoding
104+
-- or fallback encoding "utf-8", which is known to be supported.
118105

119-
return Decoded;
120-
exception
121-
when E : others =>
122-
if Decoded /= null then
123-
GNAT.Strings.Free (Decoded);
124-
end if;
106+
declare
107+
Encoded : constant Ada.Streams.Stream_Element_Array (1 .. Raw'Length)
108+
with Import, Address => Raw.all'Address;
125109

126-
Me.Trace
127-
("Exception caught when reading '" & Filename & "':"
128-
& Exception_Message (E));
110+
begin
111+
Decoded := Decoder.Decode (Encoded);
112+
Error := Decoder.Error_Message;
113+
end;
129114

130-
return null;
131-
end Read_And_Convert_To_UTF8;
115+
GNAT.Strings.Free (Raw);
116+
end Read_And_Decode;
132117

133118
----------
134119
-- Read --
@@ -143,65 +128,100 @@ package body LSP.Ada_Handlers.File_Readers is
143128
Diagnostics : in out
144129
Langkit_Support.Diagnostics.Diagnostics_Vectors.Vector)
145130
is
146-
Doc : Document_Access;
147-
Source : Preprocessed_Source := Preprocessed_Source'
148-
(Buffer => null, Last => 0);
149-
Buffer : GNAT.Strings.String_Access;
131+
Doc : Document_Access;
132+
Text : VSS.Strings.Virtual_String;
133+
Error : VSS.Strings.Virtual_String;
134+
150135
begin
151136
-- First check if the file is an open document
137+
152138
Doc := Self.Handler.Get_Open_Document
153139
(URI => LSP.Types.File_To_URI (Filename),
154140
Force => False);
155141

156142
-- Preprocess the document's contents if open, or the file contents if
157143
-- not.
144+
158145
if Doc /= null then
159-
Buffer := new String'
160-
(VSS.Strings.Conversions.To_UTF_8_String (Doc.Text));
161-
else
162-
Buffer := Read_And_Convert_To_UTF8 (Filename, Charset);
146+
Text := Doc.Text;
163147

164-
-- Return an empty sring when failing to read the file (i.e: when the
165-
-- file has been deleted).
166-
if Buffer = null then
167-
Buffer := new String'("");
148+
else
149+
Read_And_Decode
150+
(Filename => Filename,
151+
Charset => VSS.Strings.Conversions.To_Virtual_String (Charset),
152+
Decoded => Text,
153+
Error => Error);
154+
155+
if not Error.Is_Empty then
156+
Diagnostics.Append
157+
(Langkit_Support.Diagnostics.Diagnostic'
158+
(Langkit_Support.Slocs.No_Source_Location_Range,
159+
VSS.Strings.Conversions.To_Unbounded_Wide_Wide_String
160+
(Error)));
168161
end if;
169162
end if;
170163

171164
-- If we have preprocessing data, use LAL's API to preoprocess the file.
172165
-- Otherwise, just decode the contents of the document/file.
173166

174167
if Self.Preprocessing_Data /= No_Preprocessor_Data then
175-
Libadalang.Preprocessing.Preprocess
176-
(Data => Self.Preprocessing_Data,
177-
Filename => Filename,
178-
Input => Buffer.all,
179-
Contents => Source,
180-
Diagnostics => Diagnostics);
181-
182-
if Source.Buffer = null then
183-
-- Log the diagnostics when processing has failed
184-
for Diag of Diagnostics loop
185-
Me.Trace (Langkit_Support.Diagnostics.To_Pretty_String (Diag));
186-
end loop;
187-
end if;
188-
end if;
168+
declare
169+
Buffer : GNAT.Strings.String_Access :=
170+
new String
171+
(1 .. Integer (Text.After_Last_Character.First_UTF8_Offset));
172+
-- Size of the "utf-8" encoded data for text is known, so
173+
-- allocate necessary space and fill it later. Allocation on the
174+
-- stack can't be use here due to potential stack overflow.
175+
Source : Preprocessed_Source := Preprocessed_Source'
176+
(Buffer => null, Last => 0);
189177

190-
-- Decode the preprocessed buffer (or the initial contents when there is
191-
-- no preprocessing needed) in utf-8.
192-
193-
Decode_Buffer
194-
(Buffer => (if Source.Buffer /= null then
195-
Source.Buffer (1 .. Source.Last)
196-
else
197-
Buffer.all),
198-
Charset => "utf-8",
199-
Read_BOM => Read_BOM,
200-
Contents => Contents,
201-
Diagnostics => Diagnostics);
202-
203-
Free (Source);
204-
GNAT.Strings.Free (Buffer);
178+
begin
179+
VSS.Strings.Conversions.Set_UTF_8_String (Text, Buffer.all);
180+
181+
Libadalang.Preprocessing.Preprocess
182+
(Data => Self.Preprocessing_Data,
183+
Filename => Filename,
184+
Input => Buffer.all,
185+
Contents => Source,
186+
Diagnostics => Diagnostics);
187+
188+
if Source.Buffer = null then
189+
-- Log the diagnostics when processing has failed
190+
191+
for Diag of Diagnostics loop
192+
Me.Trace
193+
(Langkit_Support.Diagnostics.To_Pretty_String (Diag));
194+
end loop;
195+
end if;
196+
197+
-- Decode the preprocessed buffer (or the initial contents when
198+
-- there is no preprocessing needed) in utf-8.
199+
200+
Decode_Buffer
201+
(Buffer => (if Source.Buffer /= null then
202+
Source.Buffer (1 .. Source.Last)
203+
else
204+
Buffer.all),
205+
Charset => "utf-8",
206+
Read_BOM => Read_BOM,
207+
Contents => Contents,
208+
Diagnostics => Diagnostics);
209+
210+
Free (Source);
211+
GNAT.Strings.Free (Buffer);
212+
end;
213+
214+
else
215+
Contents :=
216+
(Buffer =>
217+
new Langkit_Support.Text.Text_Type
218+
(1 .. Natural (Text.Character_Length)),
219+
First => 1,
220+
Last => Natural (Text.Character_Length));
221+
222+
VSS.Strings.Conversions.Set_Wide_Wide_String
223+
(Text, Contents.Buffer.all);
224+
end if;
205225
end Read;
206226

207227
end LSP.Ada_Handlers.File_Readers;

0 commit comments

Comments
 (0)