Skip to content

Commit 7abd71b

Browse files
committed
Add tests for escaped string delimiters
1 parent d2d39b3 commit 7abd71b

File tree

2 files changed

+57
-13
lines changed

2 files changed

+57
-13
lines changed

jsonpath_rw/lexer.py

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def tokenize(self, string):
2626

2727
new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
2828
new_lexer.latest_newline = 0
29+
new_lexer.string_value = None
2930
new_lexer.input(string)
3031

3132
while True:
@@ -34,6 +35,9 @@ def tokenize(self, string):
3435
t.col = t.lexpos - new_lexer.latest_newline
3536
yield t
3637

38+
if new_lexer.string_value is not None:
39+
raise JsonPathLexerError('Unexpected EOF in string literal or identifier')
40+
3741
# ============== PLY Lexer specification ==================
3842
#
3943
# This probably should be private but:
@@ -66,17 +70,28 @@ def t_NUMBER(self, t):
6670
t.value = int(t.value)
6771
return t
6872

73+
6974
# Single-quoted strings
7075
t_singlequote_ignore = ''
71-
def t_SINGLEQUOTE(self, t):
72-
r'\''
76+
def t_singlequote(self, t):
77+
r"'"
7378
t.lexer.string_start = t.lexer.lexpos
79+
t.lexer.string_value = ''
7480
t.lexer.push_state('singlequote')
7581

76-
def t_singlequote_SINGLEQUOTE(self, t):
77-
r"([^']|\\')*'"
78-
t.value = t.value[:-1]
82+
def t_singlequote_content(self, t):
83+
r"[^'\\]+"
84+
t.lexer.string_value += t.value
85+
86+
def t_singlequote_escape(self, t):
87+
r'\\.'
88+
t.lexer.string_value += t.value[1]
89+
90+
def t_singlequote_end(self, t):
91+
r"'"
92+
t.value = t.lexer.string_value
7993
t.type = 'ID'
94+
t.lexer.string_value = None
8095
t.lexer.pop_state()
8196
return t
8297

@@ -86,15 +101,25 @@ def t_singlequote_error(self, t):
86101

87102
# Double-quoted strings
88103
t_doublequote_ignore = ''
89-
def t_DOUBLEQUOTE(self, t):
104+
def t_doublequote(self, t):
90105
r'"'
91106
t.lexer.string_start = t.lexer.lexpos
107+
t.lexer.string_value = ''
92108
t.lexer.push_state('doublequote')
93109

94-
def t_doublequote_DOUBLEQUOTE(self, t):
95-
r'([^"]|\\")*"'
96-
t.value = t.value[:-1]
110+
def t_doublequote_content(self, t):
111+
r'[^"\\]+'
112+
t.lexer.string_value += t.value
113+
114+
def t_doublequote_escape(self, t):
115+
r'\\.'
116+
t.lexer.string_value += t.value[1]
117+
118+
def t_doublequote_end(self, t):
119+
r'"'
120+
t.value = t.lexer.string_value
97121
t.type = 'ID'
122+
t.lexer.string_value = None
98123
t.lexer.pop_state()
99124
return t
100125

@@ -104,15 +129,25 @@ def t_doublequote_error(self, t):
104129

105130
# Back-quoted "magic" operators
106131
t_backquote_ignore = ''
107-
def t_BACKQUOTE(self, t):
132+
def t_backquote(self, t):
108133
r'`'
109134
t.lexer.string_start = t.lexer.lexpos
135+
t.lexer.string_value = ''
110136
t.lexer.push_state('backquote')
111137

112-
def t_backquote_BACKQUOTE(self, t):
113-
r'([^`]|\\`)*`'
114-
t.value = t.value[:-1]
138+
def t_backquote_escape(self, t):
139+
r'\\.'
140+
t.lexer.string_value += t.value[1]
141+
142+
def t_backquote_content(self, t):
143+
r"[^`\\]+"
144+
t.lexer.string_value += t.value
145+
146+
def t_backquote_end(self, t):
147+
r'`'
148+
t.value = t.lexer.string_value
115149
t.type = 'NAMED_OPERATOR'
150+
t.lexer.string_value = None
116151
t.lexer.pop_state()
117152
return t
118153

tests/test_lexer.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ def test_simple_inputs(self):
3535
self.assert_lex_equiv('$', [self.token('$', '$')])
3636
self.assert_lex_equiv('"hello"', [self.token('hello', 'ID')])
3737
self.assert_lex_equiv("'goodbye'", [self.token('goodbye', 'ID')])
38+
self.assert_lex_equiv("'doublequote\"'", [self.token('doublequote"', 'ID')])
39+
self.assert_lex_equiv(r'"doublequote\""', [self.token('doublequote"', 'ID')])
40+
self.assert_lex_equiv(r"'singlequote\''", [self.token("singlequote'", 'ID')])
41+
self.assert_lex_equiv('"singlequote\'"', [self.token("singlequote'", 'ID')])
3842
self.assert_lex_equiv('fuzz', [self.token('fuzz', 'ID')])
3943
self.assert_lex_equiv('1', [self.token(1, 'NUMBER')])
4044
self.assert_lex_equiv('45', [self.token(45, 'NUMBER')])
@@ -54,7 +58,12 @@ def test_basic_errors(self):
5458
def tokenize(s):
5559
l = JsonPathLexer(debug=True)
5660
return list(l.tokenize(s))
61+
5762
self.assertRaises(JsonPathLexerError, tokenize, "'\"")
5863
self.assertRaises(JsonPathLexerError, tokenize, '"\'')
64+
self.assertRaises(JsonPathLexerError, tokenize, '`"')
65+
self.assertRaises(JsonPathLexerError, tokenize, "`'")
66+
self.assertRaises(JsonPathLexerError, tokenize, '"`')
67+
self.assertRaises(JsonPathLexerError, tokenize, "'`")
5968
self.assertRaises(JsonPathLexerError, tokenize, '?')
6069
self.assertRaises(JsonPathLexerError, tokenize, '$.foo.bar.#')

0 commit comments

Comments
 (0)