Skip to content

Commit 2957667

Browse files
committed
Merge branch 'mr/pmderodat/utf8-preparatory' into 'master'
Preparatory work for the transition of source buffers to UTF-8 See merge request eng/libadalang/langkit!1022
2 parents 6f53bc0 + 4048175 commit 2957667

File tree

12 files changed

+354
-1
lines changed

12 files changed

+354
-1
lines changed

testsuite/tests/grammar/case_rule/expected_concrete_syntax.lkt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ lexer foo_lexer {
22

33
char
44
dot <- "."
5-
id <- p"[a-zA-Z]+"
5+
id <- p"[a-zA-Zé🙂]+"
66
tick <- "'"
77
newline <- p"\n"
88

testsuite/tests/grammar/case_rule/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
('simple-attr', "a'b"),
1111
('char-dot', "'a'.b"),
1212
('id-char', "a'b'"),
13+
('unicode-id-char', "\xe9'\U0001f642'"),
1314
):
1415
print('== {} =='.format(label))
1516
u = ctx.get_from_buffer('{}.txt'.format(label), text)

testsuite/tests/grammar/case_rule/test.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,14 @@ main.py: Running...
2424
<Token Tick "'" at 1:4-1:5>
2525
<Token Termination at 1:5-1:5>
2626

27+
== unicode-id-char ==
28+
1:5-1:5: Expected Id, got Termination
29+
--
30+
<Token Id 'é' at 1:1-1:2>
31+
<Token Tick "'" at 1:2-1:3>
32+
<Token Id '🙂' at 1:3-1:4>
33+
<Token Tick "'" at 1:4-1:5>
34+
<Token Termination at 1:5-1:5>
35+
2736
main.py: Done.
2837
Done

testsuite/tests/misc/unicode/empty.txt

Whitespace-only changes.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import lexer_example
2+
3+
@with_lexer(foo_lexer)
4+
grammar foo_grammar {
5+
@main_rule main_rule <- list+(Example(@example StrLit(@string)))
6+
}
7+
8+
@abstract class FooNode implements Node[FooNode] {
9+
}
10+
11+
class Example: FooNode {
12+
@parse_field f: StrLit
13+
}
14+
15+
class StrLit: FooNode implements TokenNode {
16+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# ��������������������������������������������������������������������������� #
2+
# ��������������������������������������������������������������������������� #
3+
4+
example "1�"
5+
6+
# ��������������������������������������������������������������������������� #
7+
8+
example "1�2�"
9+
10+
# ��������������������������������������������������������������������������� #
11+
12+
example "1�2�3�"
13+
14+
# ��������������������������������������������������������������������������� #
15+
16+
example "1�2�3�4�"
17+
18+
# ��������������������������������������������������������������������������� #
19+
20+
example "1�2�3�4�5�"
21+
22+
# ��������������������������������������������������������������������������� #
23+
24+
example "1�2�3�4�5�6�"
25+
26+
# ��������������������������������������������������������������������������� #
27+
28+
example "1�2�3�4�5�6�7�"
29+
30+
# ��������������������������������������������������������������������������� #
31+
32+
example "1�2�3�4�5�6�7�8�"
33+
34+
# ��������������������������������������������������������������������������� #
35+
36+
example "1�2�3�4�5�6�7�8�9�"
37+
38+
# ��������������������������������������������������������������������������� #
39+
40+
example "1�2�3�4�5�6�7�8�9�0�"
41+
42+
# ��������������������������������������������������������������������������� #
43+
# ��������������������������������������������������������������������������� #

testsuite/tests/misc/unicode/main.adb

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
with Ada.Text_IO; use Ada.Text_IO;
2+
3+
with GNAT.Strings; use GNAT.Strings;
4+
with GNATCOLL.Mmap; use GNATCOLL.Mmap;
5+
6+
with Langkit_Support.File_Readers; use Langkit_Support.File_Readers;
7+
with Langkit_Support.Slocs; use Langkit_Support.Slocs;
8+
with Libfoolang.Analysis; use Libfoolang.Analysis;
9+
with Libfoolang.Common; use Libfoolang.Common;
10+
11+
with Support; use Support;
12+
13+
procedure Main is
14+
15+
Empty_File : constant String := "empty.txt";
16+
Empty_Buffer : aliased constant String := "";
17+
18+
Example_File : constant String := "main-iso-8859-1.txt";
19+
Example_Buffer : String_Access := Read_Whole_File (Example_File);
20+
21+
procedure Check
22+
(From_Buffer : Boolean := False;
23+
Empty_File : Boolean := False;
24+
Wrong_Encoding : Boolean := False;
25+
With_File_Reader : Boolean := False);
26+
27+
-----------
28+
-- Check --
29+
-----------
30+
31+
procedure Check
32+
(From_Buffer : Boolean := False;
33+
Empty_File : Boolean := False;
34+
Wrong_Encoding : Boolean := False;
35+
With_File_Reader : Boolean := False)
36+
is
37+
Charset : constant String :=
38+
(if Wrong_Encoding then "utf-8" else "iso-8859-1");
39+
Filename : constant String :=
40+
(if Empty_File then Main.Empty_File else Example_File);
41+
Buffer : constant access constant String :=
42+
(if Empty_File then Empty_Buffer'Access else Example_Buffer);
43+
44+
Ctx : Analysis_Context;
45+
U : Analysis_Unit;
46+
begin
47+
-- Put some label for this check
48+
49+
Put ("== ");
50+
Put (if From_Buffer then "buffer" else "file");
51+
Put (" | ");
52+
Put (if Empty_File then "empty-file" else "example-file");
53+
Put (" | ");
54+
Put (if Wrong_Encoding then "wrong-encoding" else "correct-encoding");
55+
Put (" | ");
56+
Put (if With_File_Reader then "file-reader" else "default");
57+
Put_Line (" ==");
58+
New_Line;
59+
60+
-- Parse the source according to requested settings
61+
62+
Ctx := Create_Context
63+
(File_Reader => (if With_File_Reader
64+
then Get_File_Reader
65+
else No_File_Reader_Reference));
66+
if From_Buffer then
67+
U := Ctx.Get_From_Buffer
68+
(Filename => Filename,
69+
Charset => Charset,
70+
Buffer => Buffer.all);
71+
else
72+
U := Ctx.Get_From_File
73+
(Filename => Filename, Charset => Charset);
74+
end if;
75+
76+
-- Display parsing errors, if any
77+
78+
if U.Has_Diagnostics then
79+
Put_Line ("Errors:");
80+
for D of U.Diagnostics loop
81+
Put_Line (" " & U.Format_GNU_Diagnostic (D));
82+
end loop;
83+
New_Line;
84+
end if;
85+
86+
-- Summarize the content of the parsed unit
87+
88+
if U.Root.Is_Null then
89+
Put_Line ("No root node");
90+
else
91+
Put_Line ("Root node children:" & U.Root.Children_Count'Image);
92+
declare
93+
D : constant Token_Data_Type := Data (U.First_Token);
94+
begin
95+
Put_Line
96+
("First token: "
97+
& Kind (D)'Image
98+
& " at " & Image (Sloc_Range (D)));
99+
end;
100+
declare
101+
D : constant Token_Data_Type := Data (U.Last_Token);
102+
begin
103+
Put_Line
104+
("Last token: "
105+
& Kind (D)'Image
106+
& " at " & Image (Sloc_Range (D)));
107+
end;
108+
end if;
109+
New_Line;
110+
end Check;
111+
112+
begin
113+
-- Get_From_File
114+
115+
Check;
116+
Check (With_File_Reader => True);
117+
118+
Check (Empty_File => True);
119+
Check (Empty_File => True, With_File_Reader => True);
120+
121+
Check (Wrong_Encoding => True);
122+
Check (Wrong_Encoding => True, With_File_Reader => True);
123+
124+
-- Get_From_Buffer
125+
126+
Check (From_Buffer => True);
127+
Check (From_Buffer => True, Empty_File => True);
128+
Check (From_Buffer => True, Wrong_Encoding => True);
129+
130+
Free (Example_Buffer);
131+
end Main;
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
with Langkit_Support.Diagnostics; use Langkit_Support.Diagnostics;
2+
3+
package body Support is
4+
5+
type My_FR is new File_Reader_Interface with null record;
6+
7+
overriding procedure Read
8+
(Self : My_FR;
9+
Filename : String;
10+
Charset : String;
11+
Read_BOM : Boolean;
12+
Contents : out Decoded_File_Contents;
13+
Diagnostics : in out Diagnostics_Vectors.Vector);
14+
15+
overriding procedure Release (Self : in out My_FR) is null;
16+
17+
----------
18+
-- Read --
19+
----------
20+
21+
overriding procedure Read
22+
(Self : My_FR;
23+
Filename : String;
24+
Charset : String;
25+
Read_BOM : Boolean;
26+
Contents : out Decoded_File_Contents;
27+
Diagnostics : in out Diagnostics_Vectors.Vector)
28+
is
29+
begin
30+
Direct_Read (Filename, Charset, Read_BOM, Contents, Diagnostics);
31+
if Diagnostics.Is_Empty and then Contents.Buffer.all'Length > 79 then
32+
Contents.Buffer.all (Contents.First .. Contents.First + 79) :=
33+
(1 .. 80 => ' ');
34+
end if;
35+
end Read;
36+
37+
---------------------
38+
-- Get_File_Reader --
39+
---------------------
40+
41+
function Get_File_Reader return File_Reader_Reference is
42+
begin
43+
return Create_File_Reader_Reference (My_FR'(null record));
44+
end Get_File_Reader;
45+
46+
end Support;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
with Langkit_Support.File_Readers; use Langkit_Support.File_Readers;
2+
3+
package Support is
4+
function Get_File_Reader return File_Reader_Reference;
5+
end Support;

testsuite/tests/misc/unicode/test.out

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
== file | example-file | correct-encoding | default ==
2+
3+
Root node children: 10
4+
First token: FOO_COMMENT at 1:1-1:80
5+
Last token: FOO_TERMINATION at 44:1-44:1
6+
7+
== file | example-file | correct-encoding | file-reader ==
8+
9+
Root node children: 10
10+
First token: FOO_WHITESPACE at 1:1-1:81
11+
Last token: FOO_TERMINATION at 43:1-43:1
12+
13+
== file | empty-file | correct-encoding | default ==
14+
15+
Errors:
16+
empty.txt:1:1: Expected 'example', got Termination
17+
18+
Root node children: 0
19+
First token: FOO_TERMINATION at 1:1-1:1
20+
Last token: FOO_TERMINATION at 1:1-1:1
21+
22+
== file | empty-file | correct-encoding | file-reader ==
23+
24+
Errors:
25+
empty.txt:1:1: Expected 'example', got Termination
26+
27+
Root node children: 0
28+
First token: FOO_TERMINATION at 1:1-1:1
29+
Last token: FOO_TERMINATION at 1:1-1:1
30+
31+
== file | example-file | wrong-encoding | default ==
32+
33+
Errors:
34+
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8"
35+
main-iso-8859-1.txt:1:1: Expected 'example', got Termination
36+
37+
Root node children: 0
38+
First token: FOO_TERMINATION at 1:1-1:1
39+
Last token: FOO_TERMINATION at 1:1-1:1
40+
41+
== file | example-file | wrong-encoding | file-reader ==
42+
43+
Errors:
44+
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8"
45+
main-iso-8859-1.txt:1:1: Expected 'example', got Termination
46+
47+
Root node children: 0
48+
First token: FOO_TERMINATION at 1:1-1:1
49+
Last token: FOO_TERMINATION at 1:1-1:1
50+
51+
== buffer | example-file | correct-encoding | default ==
52+
53+
Root node children: 10
54+
First token: FOO_COMMENT at 1:1-1:80
55+
Last token: FOO_TERMINATION at 44:1-44:1
56+
57+
== buffer | empty-file | correct-encoding | default ==
58+
59+
Errors:
60+
empty.txt:1:1: Expected 'example', got Termination
61+
62+
Root node children: 0
63+
First token: FOO_TERMINATION at 1:1-1:1
64+
Last token: FOO_TERMINATION at 1:1-1:1
65+
66+
== buffer | example-file | wrong-encoding | default ==
67+
68+
Errors:
69+
main-iso-8859-1.txt:1:3: Could not decode source as "utf-8"
70+
main-iso-8859-1.txt:1:1: Expected 'example', got Termination
71+
72+
Root node children: 0
73+
First token: FOO_TERMINATION at 1:1-1:1
74+
Last token: FOO_TERMINATION at 1:1-1:1
75+
76+
Done

0 commit comments

Comments
 (0)