Skip to content

Commit 26bc9e4

Browse files
committed
Merge branch 'main' into tokenization-testing
2 parents ca448c5 + 9f3d91c commit 26bc9e4

File tree

2 files changed

+39
-20
lines changed

2 files changed

+39
-20
lines changed

rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,10 @@ str encode(list[int] chars, bool withBounds = false)
172172
? "\\b<encode(chars, withBounds = false)>\\b"
173173
: intercalate("", [encode(i) | i <- chars]);
174174
175-
str encode(int char)
176-
= char in alnum
177-
? stringChar(char)
178-
: "\\x{<toHex(char)>}";
175+
str encode(int char) = preEncoded[char] ? "\\x{<toHex(char)>}";
176+
177+
178+
private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]};
179179
180180
private str toHex(int i)
181181
= i < 16
@@ -186,5 +186,24 @@ private list[str] hex
186186
= ["<i>" | i <- [0..10]]
187187
+ ["A", "B", "C", "D", "E", "F"];
188188
189-
private set[int] alnum
190-
= {*[48..58], *[65..91], *[97..123]};
189+
private set[int] printable
190+
= charRange("0", "9")
191+
+ charRange("a", "z")
192+
+ charRange("A", "Z")
193+
;
194+
195+
private map[int, str] escapes = (
196+
0x09: "\\t",
197+
0x0A: "\\n",
198+
0x0D: "\\r",
199+
0x20: "\\x20" // spaces look a bit strange in a regex, although they are valid, people tend to read over them as layout
200+
) + ( c : "\\<stringChar(c)>" | c <- [0x21..0x7F], c notin printable); // regular ascii characters that might have special meaning in a regex
201+
202+
203+
private map[int, str] addFallback(map[int, str] defined)
204+
= ( char : "\\x<right(toHex(char),2, "0")>" | char <- [0..256], char notin defined)
205+
+ defined
206+
;
207+
208+
private map[int, str] preEncoded
209+
= addFallback(escapes + ( c : stringChar(c) | c <- printable));

vscode-extension/syntaxes/rascal.tmLanguage.json

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"repository": {
33
"prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})": {
4-
"match": "((?:\\x{2F}\\x{2F})(?:(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{10FFFF}])*?(?!(?:\\x{9}|\\x{D}|\\x{20}|\\x{A0}|\\x{1680}|[\\x{2000}-\\x{200A}]|\\x{202F}|\\x{205F}|\\x{3000}))(?:$)))",
4+
"match": "((?:\\/\\/)(?:(?:[\\x01-\\t]|[\\x0B-\\x{10FFFF}])*?(?!(?:[\\t-\\t]|[\\r-\\r]|[\\x20-\\x20]|[\\xA0-\\xA0]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))",
55
"name": "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})",
66
"captures": {
77
"1": {
@@ -19,16 +19,16 @@
1919
}
2020
},
2121
"prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})": {
22-
"match": "(?:(?:bottom\\x{2D}up\\x{2D}break)|(?:\\x{29})|(?:\\x{226B})|(?:\\x{A})|(?:\\x{21}\\x{3A}\\x{3D})|(?:\\x{27})|(?:\\x{21}\\x{3D})|(?:\\x{3E}\\x{3D})|(?:\\x{3A}\\x{2F}\\x{2F})|(?:non\\x{2D}assoc)|(?:\\x{26}\\x{3D})|(?:\\x{3C}\\x{2D})|(?:\\x{2A}\\x{3D})|(?:\\x{2B}\\x{3D})|(?:top\\x{2D}down\\x{2D}break)|(?:\\x{2C})|(?:\\x{2E}\\x{2E}\\x{2E})|(?:\\x{2F}\\x{3D})|(?:\\x{21E8})|(?:\\x{28})|(?:\\x{2A}\\x{2F})|(?:\\x{25})|(?:\\x{21}\\x{3C}\\x{3C})|(?:\\x{3D}\\x{3E})|(?:\\x{21}\\x{3E}\\x{3E})|(?:\\x{7C}\\x{7C})|(?:\\x{3E}\\x{3E})|(?:\\x{3A}\\x{3A})|(?:\\x{26A0})|(?:\\x{26}\\x{26})|(?:\\x{40})|(?:\\x{3A}\\x{3D})|(?:\\x{23})|(?:\\x{3C}\\x{3D}\\x{3D}\\x{3E})|(?:\\x{22})|(?:\\x{3C}\\x{3C}\\x{3D})|(?:\\x{7D})|(?:\\x{3F}\\x{3D})|(?:\\x{3C}\\x{3A})|(?:\\x{3D}\\x{3D}\\x{3E})|(?:\\x{5E})|(?:\\x{2F}\\x{2A})|(?:\\x{3B})|(?:\\x{7B})|(?:\\x{2D}\\x{3D})|(?:\\x{24}T))",
22+
"match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\x{226B})|(?:\\n)|(?:\\!\\:\\=)|(?:\\')|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\x{21E8})|(?:\\()|(?:\\*\\/)|(?:\\%)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\@)|(?:\\:\\=)|(?:\\#)|(?:\\<\\=\\=\\>)|(?:\\\")|(?:\\<\\<\\=)|(?:\\})|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\/\\*)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))",
2323
"name": "prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})",
24-
"captures": []
24+
"captures": {}
2525
},
2626
"lit(\"`\"):lit(\"`\")": {
27-
"begin": "(?:\\x{60})",
28-
"end": "(?:\\x{60})",
27+
"begin": "(?:\\`)",
28+
"end": "(?:\\`)",
2929
"patterns": [
3030
{
31-
"match": "(\\x{5C}\\x{5C})",
31+
"match": "(\\\\\\\\)",
3232
"name": "prod(label(\"bs\",lex(\"ConcretePart\")),[lit(\"\\\\\\\\\")],{tag(\"category\"(\"string\"))})",
3333
"captures": {
3434
"1": {
@@ -37,7 +37,7 @@
3737
}
3838
},
3939
{
40-
"match": "(\\x{5C}\\x{3E})",
40+
"match": "(\\\\\\>)",
4141
"name": "prod(label(\"gt\",lex(\"ConcretePart\")),[lit(\"\\\\\\>\")],{tag(\"category\"(\"string\"))})",
4242
"captures": {
4343
"1": {
@@ -46,7 +46,7 @@
4646
}
4747
},
4848
{
49-
"match": "((?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|\\x{3D}|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])+?(?!(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|\\x{3D}|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])))",
49+
"match": "((?:[\\x01-\\t]|[\\x0B-\\;]|[\\=-\\=]|[\\?-\\[]|[\\]-\\_]|[a-\\x{10FFFF}])+?(?!(?:[\\x01-\\t]|[\\x0B-\\;]|[\\=-\\=]|[\\?-\\[]|[\\]-\\_]|[a-\\x{10FFFF}])))",
5050
"name": "prod(label(\"text\",lex(\"ConcretePart\")),[conditional(iter(\\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\\not-follow(\\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{tag(\"category\"(\"string\"))})",
5151
"captures": {
5252
"1": {
@@ -55,7 +55,7 @@
5555
}
5656
},
5757
{
58-
"match": "(\\x{5C}\\x{3C})",
58+
"match": "(\\\\\\<)",
5959
"name": "prod(label(\"lt\",lex(\"ConcretePart\")),[lit(\"\\\\\\<\")],{tag(\"category\"(\"string\"))})",
6060
"captures": {
6161
"1": {
@@ -64,7 +64,7 @@
6464
}
6565
},
6666
{
67-
"match": "(\\x{5C}\\x{60})",
67+
"match": "(\\\\\\`)",
6868
"name": "prod(label(\"bq\",lex(\"ConcretePart\")),[lit(\"\\\\`\")],{tag(\"category\"(\"string\"))})",
6969
"captures": {
7070
"1": {
@@ -76,11 +76,11 @@
7676
"name": "lit(\"`\"):lit(\"`\")"
7777
},
7878
"lit(\"[\"):lit(\"]\")": {
79-
"begin": "(?:\\x{5B})",
80-
"end": "(?:\\x{5D})",
79+
"begin": "(?:\\[)",
80+
"end": "(?:\\])",
8181
"patterns": [
8282
{
83-
"match": "((?:\\x{5C})(?:\\x{20}|\\x{22}|\\x{27}|\\x{2D}|\\x{3C}|\\x{3E}|[\\x{5B}-\\x{5D}]|b|f|n|r|t))",
83+
"match": "((?:\\\\)(?:[\\x20-\\x20]|[\\\"-\\\"]|[\\'-\\']|[\\--\\-]|[\\<-\\<]|[\\>-\\>]|[\\[-\\]]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))",
8484
"name": "prod(lex(\"Char\"),[lit(\"\\\\\"),\\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{tag(\"category\"(\"constant\"))})",
8585
"captures": {
8686
"1": {
@@ -89,7 +89,7 @@
8989
}
9090
},
9191
{
92-
"match": "((?:(?:\\x{5C})U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)(?:[0-9]|[A-F]|[a-f])))(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})u(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})a[0-7](?:[0-9]|[A-F]|[a-f])))",
92+
"match": "((?:(?:\\\\)[U-U](?:(?:\\b10\\b)|(?:(?:\\b0\\b)(?:[0-9]|[A-F]|[a-f])))(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\\\)[u-u](?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\\\)[a-a][0-7](?:[0-9]|[A-F]|[a-f])))",
9393
"name": "prod(lex(\"Char\"),[lex(\"UnicodeEscape\")],{tag(\"category\"(\"constant\"))})",
9494
"captures": {
9595
"1": {

0 commit comments

Comments
 (0)