Skip to content

Commit 65afc28

Browse files
V315-025: Convert to UTF-8 before preprocessing
This is needed to handle other charsets (e.g: cyrillic). Remove the old ad-hoc preprocessor implementation.
1 parent 0032ccf commit 65afc28

File tree

3 files changed

+104
-309
lines changed

3 files changed

+104
-309
lines changed

source/ada/lsp-ada_handlers-file_readers.adb

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,13 @@
1515
-- of the license. --
1616
------------------------------------------------------------------------------
1717

18+
with Ada.Exceptions; use Ada.Exceptions;
19+
with Ada.Characters.Handling;
1820
with GNAT.Strings; use GNAT.Strings;
1921
with GNATCOLL.Traces; use GNATCOLL.Traces;
22+
23+
with GNATCOLL.VFS; use GNATCOLL.VFS;
24+
with GNATCOLL.Iconv; use GNATCOLL.Iconv;
2025
with VSS.Strings; use VSS.Strings;
2126
with VSS.Strings.Conversions;
2227
with LSP.Ada_Documents; use LSP.Ada_Documents;
@@ -27,6 +32,104 @@ package body LSP.Ada_Handlers.File_Readers is
2732

2833
Me : constant Trace_Handle := Create ("ALS.FILE_READERS");
2934

35+
function Read_And_Convert_To_UTF8
36+
(Filename : String; Charset : String)
37+
return GNAT.Strings.String_Access;
38+
-- Read the file content from Filename and convert it from the original
39+
-- Charset to UTF-8.
40+
41+
------------------------------
42+
-- Read_And_Convert_To_UTF8 --
43+
------------------------------
44+
45+
function Read_And_Convert_To_UTF8
46+
(Filename : String; Charset : String)
47+
return GNAT.Strings.String_Access
48+
is
49+
Raw : GNAT.Strings.String_Access;
50+
Decoded : GNAT.Strings.String_Access;
51+
begin
52+
-- Read the file (this call uses MMAP)
53+
Raw := Create_From_UTF8 (Filename).Read_File;
54+
55+
if Raw = null then
56+
return null;
57+
end if;
58+
59+
-- Convert the file if it's not already encoded in utf-8
60+
61+
if Ada.Characters.Handling.To_Lower (Charset) = "utf-8" then
62+
Decoded := Raw;
63+
else
64+
declare
65+
State : constant Iconv_T := Iconv_Open (UTF8, Charset);
66+
Outbuf : Byte_Sequence (1 .. 4096);
67+
Input_Index : Positive := Raw'First;
68+
Conv_Result : Iconv_Result := Full_Buffer;
69+
Output_Index : Positive;
70+
begin
71+
while Conv_Result = Full_Buffer loop
72+
Output_Index := 1;
73+
Iconv (State => State,
74+
Inbuf => Raw.all,
75+
Input_Index => Input_Index,
76+
Outbuf => Outbuf,
77+
Output_Index => Output_Index,
78+
Result => Conv_Result);
79+
80+
-- Append the converted contents
81+
if Decoded /= null then
82+
declare
83+
Tmp : GNAT.Strings.String_Access := Decoded;
84+
begin
85+
Decoded := new String'
86+
(Tmp.all & Outbuf (1 .. Output_Index - 1));
87+
GNAT.Strings.Free (Tmp);
88+
end;
89+
else
90+
Decoded := new String'(Outbuf (1 .. Output_Index - 1));
91+
end if;
92+
end loop;
93+
94+
GNAT.Strings.Free (Raw);
95+
Iconv_Close (State);
96+
97+
case Conv_Result is
98+
when Success =>
99+
-- The conversion was successful
100+
null;
101+
when others =>
102+
Me.Trace
103+
("Failed to convert '" & Filename & "' to UTF-8: "
104+
& Conv_Result'Img);
105+
return null;
106+
end case;
107+
exception
108+
when E : others =>
109+
110+
Me.Trace
111+
("Exception caught when reading '" & Filename & "':"
112+
& Exception_Message (E));
113+
return null;
114+
end;
115+
end if;
116+
117+
-- Convert the string to a Virtual_String for easier handling
118+
119+
return Decoded;
120+
exception
121+
when E : others =>
122+
if Decoded /= null then
123+
GNAT.Strings.Free (Decoded);
124+
end if;
125+
126+
Me.Trace
127+
("Exception caught when reading '" & Filename & "':"
128+
& Exception_Message (E));
129+
130+
return null;
131+
end Read_And_Convert_To_UTF8;
132+
30133
----------
31134
-- Read --
32135
----------
@@ -56,7 +159,7 @@ package body LSP.Ada_Handlers.File_Readers is
56159
Buffer := new String'
57160
(VSS.Strings.Conversions.To_UTF_8_String (Doc.Text));
58161
else
59-
Buffer := Create_From_UTF8 (Filename).Read_File;
162+
Buffer := Read_And_Convert_To_UTF8 (Filename, Charset);
60163

61164
-- Return an empty sring when failing to read the file (i.e: when the
62165
-- file has been deleted).

source/ada/lsp-preprocessor.adb

Lines changed: 0 additions & 265 deletions
This file was deleted.

0 commit comments

Comments
 (0)