Skip to content

Commit 15f4e49

Browse files
committed
Refactoring: special characters and escape sequences
Removed escape sequences for hardly ever used special characters (\a, \b, \f) and for custom character codes (\xhh). Added escape sequence for null character (\0). Improved displaying special characters in nested structures. Improved error handling for invalid escape sequences.
1 parent 550b12e commit 15f4e49

27 files changed

+85
-28
lines changed

docs/.vuepress/utils/pyxell-syntax.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ module.exports = {
55
variable_name: /\b[a-z_][\w']*/, // so that apostrophes in variable names are handled properly
66
type_name: /\b[A-Z][\w']*/,
77
number: /(?:0b[01_]+|0o[0-7_]+|0x[\da-fA-F_]+|\d[\d_]*(?:r|(?:\.[\d_]+)?(?:[eE][-+]?[\d_]+|f)?)?)\b/,
8-
string: /"(?:[^\\"]|\\(?:["\\abfnrt]|x[0-9a-fA-F]+))*(?:"|$)|'(?:[^\\']|\\(?:['\\abfnrt]|x[0-9a-fA-F]+))?(?:'|$)/,
8+
string: /"(?:[^\\"]|\\.)*(?:"|$)|'(?:[^\\']|\\.)?(?:'|$)/,
99
};

docs/specification.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ This section describes all data types available in Pyxell, together with their p
4646
| `Rat` | arbitrary-precision rational number | `1.5` | `0r` |
4747
| `Float` | double-precision floating-point number | `3.14f` | `0f` |
4848
| `Bool` | boolean value | `true` | `false` |
49-
| `Char` | single-byte character | `'A'` | `'\x0'` |
49+
| `Char` | single-byte character | `'A'` | `'\0'` |
5050
| `String` | arbitrary-length string of characters | `"example"` | `""` |
5151

5252
### Compound types

lib/base.hpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,14 +1054,44 @@ String toString(Bool x)
10541054
return make_string(x ? "true" : "false");
10551055
}
10561056

1057+
std::string transform_character_to_display(char c)
1058+
{
1059+
switch (c) {
1060+
case '\\': return "\\\\";
1061+
case '\n': return "\\n";
1062+
case '\r': return "\\r";
1063+
case '\t': return "\\t";
1064+
case '\0': return "\\0";
1065+
}
1066+
return std::string(1, c);
1067+
}
1068+
10571069
String toString(Char x)
10581070
{
1059-
return make_string("'" + std::string(1, x) + "'");
1071+
auto r = make_string();
1072+
r->append("'");
1073+
if (x == '\'') {
1074+
r->append("\\'");
1075+
} else {
1076+
r->append(transform_character_to_display(x));
1077+
}
1078+
r->append("'");
1079+
return r;
10601080
}
10611081

10621082
String toString(const String& x)
10631083
{
1064-
return make_string('"' + *x + '"');
1084+
auto r = make_string();
1085+
r->append("\"");
1086+
for (auto c: *x) {
1087+
if (c == '"') {
1088+
r->append("\\\"");
1089+
} else {
1090+
r->append(transform_character_to_display(c));
1091+
}
1092+
}
1093+
r->append("\"");
1094+
return r;
10651095
}
10661096

10671097
template <typename T>

src/errors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
class PyxellError(Exception):
33

4+
InvalidEscapeSequence = lambda s: f"Invalid escape sequence `{s}`"
45
InvalidSyntax = lambda: f"Syntax error"
56

67
AbstractClass = lambda t: f"Cannot instantiate an abstract class `{t.show()}`"

src/lexer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212

1313
ID_REGEX = r'''[a-zA-Z_][\w']*'''
1414
NUMBER_REGEX = r'''0b[01_]+|0o[0-7_]+|0x[\da-fA-F_]+|\d[\d_]*(?:r|(?:\.[\d_]+)?(?:[eE][-+]?[\d_]+|f)?)?'''
15-
ESCAPE_SEQ_REGEX = r'''[\\abfnrt]|x[0-9a-fA-F]+'''
16-
CHAR_REGEX = rf'''\'(?:[^\\']|\\(?:'|{ESCAPE_SEQ_REGEX}))\''''
17-
STRING_REGEX = rf'''\"(?:[^\\"]|\\(?:"|{ESCAPE_SEQ_REGEX}))*\"'''
15+
CHAR_REGEX = rf'''\'(?:[^\\']|\\.)\''''
16+
STRING_REGEX = rf'''\"(?:[^\\"]|\\.)*\"'''
1817

1918

2019
class Token:

src/parser.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,9 +412,17 @@ def parse_expr_prefix_op(self, token):
412412
'value': token.text == 'true',
413413
}
414414
if token.type in {Token.CHAR, Token.STRING}:
415+
value = token.text[1:-1]
416+
i = 0
417+
while i < len(value):
418+
if value[i] == '\\':
419+
i += 1
420+
if value[i] not in {'\\', '\'', '"', 'n', 'r', 't', '0'}:
421+
raise err(self.filepath, (token.position[0], token.position[1] + i), err.InvalidEscapeSequence(value[i-1:i+1]))
422+
i += 1
415423
return {
416424
**self.expr_node(f'Atom{token.type.capitalize()}', token),
417-
'value': token.text[1:-1],
425+
'value': value,
418426
}
419427
if token.text in {'null', 'super', 'this'}:
420428
return {

test/bad/strings/escape01.err

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:2:2: Invalid escape sequence `\x`.
File renamed without changes.

test/bad/strings/escape02.err

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:2:8: Invalid escape sequence `\T`.

test/bad/strings/escape02.px

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
print "\T"

test/bad/strings/escape03.err

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:2:4: Invalid escape sequence `\.`.

test/bad/strings/escape03.px

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
"\"\.\\"
File renamed without changes.

test/bad/strings/escape04.px

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
'\'

test/bad/strings/escape05.err

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:2:1: Syntax error.

test/bad/strings/escape05.px

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
"\"\\\"

test/good/strings/char06.out

Lines changed: 0 additions & 3 deletions
This file was deleted.

test/good/strings/char06.px

Lines changed: 0 additions & 6 deletions
This file was deleted.

test/good/strings/escape01.out

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
escape"\
2+
test

test/good/strings/escape01.px

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
s = "escape\"\\\n\ttest"
3+
print s

test/good/strings/escape02.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
7
2+
true
3+
\'"
4+
\
5+
("\\'\"\n\r\t\0", 1)
6+
['\\', '\'', '"', '\n', '\r', '\t', '\0']
7+
[92, 39, 34, 10, 13, 9, 0]

test/good/strings/escape02.px

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
s = "\\\'\"\n\r\t\0"
3+
c = ['\\', '\'', '\"', '\n', '\r', '\t', '\0']
4+
print s.length
5+
print s == c.join()
6+
print s[:4], c[0]
7+
print (s, 1)
8+
print c
9+
print c.map(_.code)

test/good/strings/string02.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
escape"\
2-
test
1+
true
2+
6

test/good/strings/string02.px

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11

2-
s = "escape\"\\\n\ttest"
3-
print s
2+
s = ""
3+
print s.empty
4+
print "string".length + s.length

test/good/strings/string06.out

Lines changed: 0 additions & 2 deletions
This file was deleted.

test/good/strings/string06.px

Lines changed: 0 additions & 4 deletions
This file was deleted.

version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.13.1.dev
1+
0.14.0

0 commit comments

Comments
 (0)