1
1
from __future__ import annotations
2
2
3
+ import weakref
4
+ from asyncio import CancelledError , Event , Task , create_task , sleep
3
5
from contextlib import contextmanager
4
- from typing import ContextManager
6
+ from functools import partial
7
+ from typing import Callable , ContextManager , NamedTuple
5
8
6
9
try :
7
10
from tree_sitter import Language , Node , Parser , Query , Tree
@@ -43,6 +46,17 @@ def temporary_query_point_range(
43
46
query .set_point_range (default_point_range )
44
47
45
48
49
+ class SyntaxTreeEdit (NamedTuple ):
50
+ """Details of a tree-sitter syntax tree edit operation."""
51
+
52
+ start_byte : int
53
+ old_end_byte : int
54
+ new_end_byte : int
55
+ start_point : int
56
+ old_end_point : int
57
+ new_end_point : int
58
+
59
+
46
60
class SyntaxAwareDocumentError (Exception ):
47
61
"""General error raised when SyntaxAwareDocument is used incorrectly."""
48
62
@@ -76,9 +90,37 @@ def __init__(
76
90
self ._parser = Parser (self .language )
77
91
"""The tree-sitter Parser or None if tree-sitter is unavailable."""
78
92
79
- self ._syntax_tree : Tree = self ._parser .parse (self ._read_callable ) # type: ignore
93
+ self ._syntax_tree : Tree = self ._parser .parse (
94
+ partial (self ._read_callable , lines = self .lines )
95
+ ) # type: ignore
80
96
"""The tree-sitter Tree (syntax tree) built from the document."""
81
97
98
+ self ._syntax_tree_update_callback : Callable [[], None ] | None = None
99
+ self ._background_parser = BackgroundSyntaxParser (self )
100
+ self ._pending_syntax_edits : list [SyntaxTreeEdit ] = []
101
+
102
+ def clean_up (self ) -> None :
103
+ """Perform any pre-deletion clean up."""
104
+ self ._background_parser .stop ()
105
+
106
+ def copy_of_lines (self ):
107
+ """Provide a copy of the document's lines."""
108
+ return list (self ._lines )
109
+
110
+ def apply_pending_syntax_edits (self ) -> bool :
111
+ """Apply any pending edits to the syntax tree.
112
+
113
+ Returns:
114
+ True if any edits were applied.
115
+ """
116
+ if self ._pending_syntax_edits :
117
+ for edit in self ._pending_syntax_edits :
118
+ self ._syntax_tree .edit (** edit ._asdict ())
119
+ self ._pending_syntax_edits [:] = []
120
+ return True
121
+ else :
122
+ return False
123
+
82
124
def prepare_query (self , query : str ) -> Query | None :
83
125
"""Prepare a tree-sitter tree query.
84
126
@@ -117,6 +159,26 @@ def query_syntax_tree(
117
159
with temporary_query_point_range (query , start_point , end_point ):
118
160
return query .captures (self ._syntax_tree .root_node )
119
161
162
+ def set_syntax_tree_update_callback (
163
+ self ,
164
+ callback : Callable [[], None ],
165
+ ) -> None :
166
+ """Set a callback function for signalling a rebuild of the syntax tree.
167
+
168
+ Args:
169
+ callback: A function that takes no arguments and returns None.
170
+ """
171
+ self ._syntax_tree_update_callback = callback
172
+
173
+ def trigger_syntax_tree_update (self , force_update : bool = False ) -> None :
174
+ """Trigger a new syntax tree update to run in the background.
175
+
176
+ Args:
177
+ force_update: When set, ensure that the syntax tree is regenerated
178
+ unconditionally.
179
+ """
180
+ self ._background_parser .trigger_syntax_tree_update (force_update )
181
+
120
182
def replace_range (self , start : Location , end : Location , text : str ) -> EditResult :
121
183
"""Replace text at the given range.
122
184
@@ -143,22 +205,47 @@ def replace_range(self, start: Location, end: Location, text: str) -> EditResult
143
205
end_location = replace_result .end_location
144
206
assert self ._syntax_tree is not None
145
207
assert self ._parser is not None
146
- self ._syntax_tree .edit (
147
- start_byte = start_byte ,
148
- old_end_byte = old_end_byte ,
149
- new_end_byte = start_byte + text_byte_length ,
150
- start_point = start_point ,
151
- old_end_point = old_end_point ,
152
- new_end_point = self ._location_to_point (end_location ),
153
- )
154
- # Incrementally parse the document.
155
- self ._syntax_tree = self ._parser .parse (
156
- self ._read_callable ,
157
- self ._syntax_tree , # type: ignore[arg-type]
208
+ self ._pending_syntax_edits .append (
209
+ SyntaxTreeEdit (
210
+ start_byte = start_byte ,
211
+ old_end_byte = old_end_byte ,
212
+ new_end_byte = start_byte + text_byte_length ,
213
+ start_point = start_point ,
214
+ old_end_point = old_end_point ,
215
+ new_end_point = self ._location_to_point (end_location ),
216
+ )
158
217
)
159
-
160
218
return replace_result
161
219
220
+ def reparse (self , timeout_us : int , lines : list [str ], syntax_tree = None ) -> bool :
221
+ """Reparse the document.
222
+
223
+ Args:
224
+ timeout_us: The parser timeout in microseconds.
225
+ lines: A list of the lines being parsed.
226
+
227
+ Returns:
228
+ True if parsing succeeded and False if a timeout occurred.
229
+ """
230
+ assert timeout_us > 0
231
+ read_source = partial (self ._read_callable , lines = lines )
232
+ tree = self ._syntax_tree
233
+ saved_timeout = self ._parser .timeout_micros
234
+ try :
235
+ self ._parser .timeout_micros = timeout_us
236
+ try :
237
+ tree = self ._parser .parse (read_source , tree ) # type: ignore[arg-type]
238
+ except ValueError :
239
+ # The only known cause is a timeout.
240
+ return False
241
+ else :
242
+ self ._syntax_tree = tree
243
+ if self ._syntax_tree_update_callback is not None :
244
+ self ._syntax_tree_update_callback ()
245
+ return True
246
+ finally :
247
+ self ._parser .timeout_micros = saved_timeout
248
+
162
249
def get_line (self , index : int ) -> str :
163
250
"""Return the string representing the line, not including new line characters.
164
251
@@ -214,7 +301,12 @@ def _location_to_point(self, location: Location) -> tuple[int, int]:
214
301
bytes_on_left = 0
215
302
return row , bytes_on_left
216
303
217
- def _read_callable (self , byte_offset : int , point : tuple [int , int ]) -> bytes :
304
+ def _read_callable (
305
+ self ,
306
+ byte_offset : int ,
307
+ point : tuple [int , int ],
308
+ lines : list [str ],
309
+ ) -> bytes :
218
310
"""A callable which informs tree-sitter about the document content.
219
311
220
312
This is passed to tree-sitter which will call it frequently to retrieve
@@ -224,14 +316,14 @@ def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
224
316
byte_offset: The number of (utf-8) bytes from the start of the document.
225
317
point: A tuple (row index, column *byte* offset). Note that this differs
226
318
from our Location tuple which is (row_index, column codepoint offset).
319
+ lines: The lines of the document being parsed.
227
320
228
321
Returns:
229
322
All the utf-8 bytes between the byte_offset/point and the end of the current
230
323
line _including_ the line separator character(s). Returns None if the
231
324
offset/point requested by tree-sitter doesn't correspond to a byte.
232
325
"""
233
326
row , column = point
234
- lines = self ._lines
235
327
newline = self .newline
236
328
237
329
row_out_of_bounds = row >= len (lines )
@@ -252,3 +344,75 @@ def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
252
344
return b"\n "
253
345
254
346
return b""
347
+
348
+
349
+ class BackgroundSyntaxParser :
350
+ """A provider of incremental background parsing for syntax highlighting.
351
+
352
+ This runs tree-sitter parsing as a parallel, background asyncio task. This
353
+ prevents occasional, relatively long parsing times from making `TextArea`
354
+ editing become unresponsive.
355
+ """
356
+
357
+ PARSE_TIME_SLICE = 0.005
358
+ PARSE_TIMEOUT_MICROSECONDS = int (PARSE_TIME_SLICE * 1_000_000 )
359
+
360
+ def __init__ (self , document : SyntaxAwareDocument ):
361
+ self ._document_ref = weakref .ref (document )
362
+ self ._event = Event ()
363
+ self ._task : Task = create_task (self ._execute_reparsing ())
364
+ self ._force_update = False
365
+
366
+ def stop (self ):
367
+ """Stop running as a background task."""
368
+ self ._task .cancel ()
369
+
370
+ def trigger_syntax_tree_update (self , force_update : bool ) -> None :
371
+ """Trigger a new syntax tree update to run in the background.
372
+
373
+ Args:
374
+ force_update: When set, ensure that the syntax tree is regenerated
375
+ unconditionally.
376
+ """
377
+ if force_update :
378
+ self ._force_update = True
379
+ self ._event .set ()
380
+
381
+ async def _execute_reparsing (self ) -> None :
382
+ """Run, as a task, tree-sitter reparse operations on demand."""
383
+ while True :
384
+ try :
385
+ try :
386
+ await self ._event .wait ()
387
+ except Exception as e :
388
+ return
389
+ self ._event .clear ()
390
+ force_update = self ._force_update
391
+ self ._force_update = False
392
+ await self ._perform_a_single_reparse (force_update )
393
+ except CancelledError :
394
+ return
395
+
396
+ async def _perform_a_single_reparse (self , force_update : bool ) -> None :
397
+ document = self ._document_ref ()
398
+ if document is None :
399
+ return
400
+ if not (document .apply_pending_syntax_edits () or force_update ):
401
+ return
402
+
403
+ # In order to allow the user to continue editing without interruption, we reparse
404
+ # a snapshot of the TextArea's document.
405
+ copy_of_text_for_parsing = document .copy_of_lines ()
406
+
407
+ # Use tree-sitter's parser timeout mechanism, when necessary, break the
408
+ # full reparse into multiple steps. Most of the time, tree-sitter is so
409
+ # fast that no looping occurs.
410
+ parsed_ok = False
411
+ while not parsed_ok :
412
+ parsed_ok = document .reparse (
413
+ self .PARSE_TIMEOUT_MICROSECONDS , lines = copy_of_text_for_parsing
414
+ )
415
+ if not parsed_ok :
416
+ # Sleeping for zero seconds allows other tasks, I/O, *etc.* to execute,
417
+ # keeping the TextArea and other widgets responsive.
418
+ await sleep (0.0 )
0 commit comments