Skip to content

Commit 05d8f53

Browse files
Merge pull request #50 from 20urc3/stable
Update f1_c_gen.py
2 parents 95a6857 + 56f0eba commit 05d8f53

File tree

1 file changed

+7
-14
lines changed

1 file changed

+7
-14
lines changed

grammars/f1_c_gen.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -78,22 +78,13 @@ def to_bytes(self):
7878
# subnode_count
7979
subnode_count = len(self)
8080
ret += subnode_count.to_bytes(4, byteorder='little', signed=False)
81-
# val_len
82-
val_len = len(self.val)
81+
82+
# Encode the value as UTF-8
83+
val_bytes = self.val.encode('utf-8')
84+
# val_len (now stores the byte length of the UTF-8 encoded string)
85+
val_len = len(val_bytes)
8386
ret += val_len.to_bytes(4, byteorder='little', signed=False)
8487
# val
85-
# Latin-1 is an 8-bit character set. The first 128 characters of its
86-
# set are identical to the US ASCII standard. By encoding the string as
87-
# Latin-1, we can handle all hex characters from \u0000 to \u00ff
88-
# Refs:
89-
# - https://stackoverflow.com/questions/66601743/python3-str-to-bytes-convertation-problem
90-
# - https://kb.iu.edu/d/aepu
91-
val_bytes = bytes(self.val, 'latin-1')
92-
if val_len != len(val_bytes):
93-
print(f'The length of `val` should be {val_len}, but found {len(val_bytes)}.')
94-
print(f'`val` bytes in UTF-8 encoding: {val_bytes}')
95-
print('Please check your grammar file!')
96-
sys.exit(1)
9788
ret += val_bytes
9889

9990
# subnodes
@@ -103,6 +94,7 @@ def to_bytes(self):
10394
return ret
10495

10596
@staticmethod
97+
10698
def from_bytes(data: bytes):
10799
node = TreeNode()
108100
consumed = 0
@@ -133,6 +125,7 @@ def from_bytes(data: bytes):
133125

134126
return node, consumed
135127

128+
136129
def __str__(self):
137130
ret = ''
138131
if len(self) == 0:

0 commit comments

Comments
 (0)