Skip to content

Commit 9baa643

Browse files
committed
Run tree-sitter parsing as a background task.
Tree-sitter's incremental parsing is not always fast enough to be executed for every editing keystroke without providing a very poor user experience. For example: ```python """Docstring with closing quotes not yet added <cursor here> import textual ... ``` Typing into the above docstring can become very slow. For a 25,000 line Python file on my laptop, each change causes a reparse time of about 0.2 - 0.3 seconds: editing is painful. This change decouples incremental parsing from TextArea edits, using tree-sitter's timeout mechanism and a task to effectivley run parsing in the background, on a snapshot of the TextAreas's contents. While parsing is in progress, editing of the TextArea text continues in a responsive manner. Edits to the tree-siiter parse tree are buffered until the background parser is able to process them, while edits to the displayed text are applied as they occur.
1 parent 59a7793 commit 9baa643

File tree

5 files changed

+261
-40
lines changed

5 files changed

+261
-40
lines changed

CHANGELOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
6868
single colour. Now the 'px' appears in a different colour to the rest of the
6969
text.
7070

71-
- Fixed a cause of slow editing for syntax highlighed TextArea widgets with
72-
large documents.
71+
- Fixed some situations where editing for syntax highlighed TextArea widgets with
72+
large documents was very unresponsive.
7373

7474

7575
## [2.1.2] - 2025-02-26

src/textual/document/_document.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from abc import ABC, abstractmethod
44
from dataclasses import dataclass
55
from functools import lru_cache
6-
from typing import TYPE_CHECKING, NamedTuple, Tuple, overload
6+
from typing import TYPE_CHECKING, Callable, NamedTuple, Tuple, overload
77

88
from typing_extensions import Literal, get_args
99

@@ -140,6 +140,12 @@ def get_size(self, indent_width: int) -> Size:
140140
The Size of the document bounding box.
141141
"""
142142

143+
def clean_up(self) -> None:
144+
"""Perform any pre-deletion clean up.
145+
146+
The default implementation does nothing.
147+
"""
148+
143149
def query_syntax_tree(
144150
self,
145151
query: "Query",
@@ -162,6 +168,27 @@ def query_syntax_tree(
162168
"""
163169
return {}
164170

171+
def set_syntax_tree_update_callback(
172+
callback: Callable[[], None],
173+
) -> None:
174+
"""Set a callback function for signalling a rebuild of the syntax tree.
175+
176+
The default implementation does nothing.
177+
178+
Args:
179+
callback: A function that takes no arguments and returns None.
180+
"""
181+
182+
def trigger_syntax_tree_update(self, force_update: bool = False) -> None:
183+
"""Trigger a new syntax tree update to run in the background.
184+
185+
The default implementation does nothing.
186+
187+
Args:
188+
force_update: When set, ensure that the syntax tree is regenerated
189+
unconditionally.
190+
"""
191+
165192
def prepare_query(self, query: str) -> "Query | None":
166193
return None
167194

src/textual/document/_syntax_aware_document.py

Lines changed: 181 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from __future__ import annotations
22

3+
import weakref
4+
from asyncio import CancelledError, Event, Task, create_task, sleep
35
from contextlib import contextmanager
4-
from typing import ContextManager
6+
from functools import partial
7+
from typing import Callable, ContextManager, NamedTuple
58

69
try:
710
from tree_sitter import Language, Node, Parser, Query, Tree
@@ -43,6 +46,17 @@ def temporary_query_point_range(
4346
query.set_point_range(default_point_range)
4447

4548

49+
class SyntaxTreeEdit(NamedTuple):
50+
"""Details of a tree-sitter syntax tree edit operation."""
51+
52+
start_byte: int
53+
old_end_byte: int
54+
new_end_byte: int
55+
start_point: int
56+
old_end_point: int
57+
new_end_point: int
58+
59+
4660
class SyntaxAwareDocumentError(Exception):
4761
"""General error raised when SyntaxAwareDocument is used incorrectly."""
4862

@@ -76,9 +90,37 @@ def __init__(
7690
self._parser = Parser(self.language)
7791
"""The tree-sitter Parser or None if tree-sitter is unavailable."""
7892

79-
self._syntax_tree: Tree = self._parser.parse(self._read_callable) # type: ignore
93+
self._syntax_tree: Tree = self._parser.parse(
94+
partial(self._read_callable, lines=self.lines)
95+
) # type: ignore
8096
"""The tree-sitter Tree (syntax tree) built from the document."""
8197

98+
self._syntax_tree_update_callback: Callable[[], None] | None = None
99+
self._background_parser = BackgroundSyntaxParser(self)
100+
self._pending_syntax_edits: list[SyntaxTreeEdit] = []
101+
102+
def clean_up(self) -> None:
103+
"""Perform any pre-deletion clean up."""
104+
self._background_parser.stop()
105+
106+
def copy_of_lines(self):
107+
"""Provide a copy of the document's lines."""
108+
return list(self._lines)
109+
110+
def apply_pending_syntax_edits(self) -> bool:
111+
"""Apply any pending edits to the syntax tree.
112+
113+
Returns:
114+
True if any edits were applied.
115+
"""
116+
if self._pending_syntax_edits:
117+
for edit in self._pending_syntax_edits:
118+
self._syntax_tree.edit(**edit._asdict())
119+
self._pending_syntax_edits[:] = []
120+
return True
121+
else:
122+
return False
123+
82124
def prepare_query(self, query: str) -> Query | None:
83125
"""Prepare a tree-sitter tree query.
84126
@@ -117,6 +159,26 @@ def query_syntax_tree(
117159
with temporary_query_point_range(query, start_point, end_point):
118160
return query.captures(self._syntax_tree.root_node)
119161

162+
def set_syntax_tree_update_callback(
163+
self,
164+
callback: Callable[[], None],
165+
) -> None:
166+
"""Set a callback function for signalling a rebuild of the syntax tree.
167+
168+
Args:
169+
callback: A function that takes no arguments and returns None.
170+
"""
171+
self._syntax_tree_update_callback = callback
172+
173+
def trigger_syntax_tree_update(self, force_update: bool = False) -> None:
174+
"""Trigger a new syntax tree update to run in the background.
175+
176+
Args:
177+
force_update: When set, ensure that the syntax tree is regenerated
178+
unconditionally.
179+
"""
180+
self._background_parser.trigger_syntax_tree_update(force_update)
181+
120182
def replace_range(self, start: Location, end: Location, text: str) -> EditResult:
121183
"""Replace text at the given range.
122184
@@ -143,22 +205,47 @@ def replace_range(self, start: Location, end: Location, text: str) -> EditResult
143205
end_location = replace_result.end_location
144206
assert self._syntax_tree is not None
145207
assert self._parser is not None
146-
self._syntax_tree.edit(
147-
start_byte=start_byte,
148-
old_end_byte=old_end_byte,
149-
new_end_byte=start_byte + text_byte_length,
150-
start_point=start_point,
151-
old_end_point=old_end_point,
152-
new_end_point=self._location_to_point(end_location),
153-
)
154-
# Incrementally parse the document.
155-
self._syntax_tree = self._parser.parse(
156-
self._read_callable,
157-
self._syntax_tree, # type: ignore[arg-type]
208+
self._pending_syntax_edits.append(
209+
SyntaxTreeEdit(
210+
start_byte=start_byte,
211+
old_end_byte=old_end_byte,
212+
new_end_byte=start_byte + text_byte_length,
213+
start_point=start_point,
214+
old_end_point=old_end_point,
215+
new_end_point=self._location_to_point(end_location),
216+
)
158217
)
159-
160218
return replace_result
161219

220+
def reparse(self, timeout_us: int, lines: list[str], syntax_tree=None) -> bool:
221+
"""Reparse the document.
222+
223+
Args:
224+
timeout_us: The parser timeout in microseconds.
225+
lines: A list of the lines being parsed.
226+
227+
Returns:
228+
True if parsing succeeded and False if a timeout occurred.
229+
"""
230+
assert timeout_us > 0
231+
read_source = partial(self._read_callable, lines=lines)
232+
tree = self._syntax_tree
233+
saved_timeout = self._parser.timeout_micros
234+
try:
235+
self._parser.timeout_micros = timeout_us
236+
try:
237+
tree = self._parser.parse(read_source, tree) # type: ignore[arg-type]
238+
except ValueError:
239+
# The only known cause is a timeout.
240+
return False
241+
else:
242+
self._syntax_tree = tree
243+
if self._syntax_tree_update_callback is not None:
244+
self._syntax_tree_update_callback()
245+
return True
246+
finally:
247+
self._parser.timeout_micros = saved_timeout
248+
162249
def get_line(self, index: int) -> str:
163250
"""Return the string representing the line, not including new line characters.
164251
@@ -214,7 +301,12 @@ def _location_to_point(self, location: Location) -> tuple[int, int]:
214301
bytes_on_left = 0
215302
return row, bytes_on_left
216303

217-
def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
304+
def _read_callable(
305+
self,
306+
byte_offset: int,
307+
point: tuple[int, int],
308+
lines: list[str],
309+
) -> bytes:
218310
"""A callable which informs tree-sitter about the document content.
219311
220312
This is passed to tree-sitter which will call it frequently to retrieve
@@ -224,14 +316,14 @@ def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
224316
byte_offset: The number of (utf-8) bytes from the start of the document.
225317
point: A tuple (row index, column *byte* offset). Note that this differs
226318
from our Location tuple which is (row_index, column codepoint offset).
319+
lines: The lines of the document being parsed.
227320
228321
Returns:
229322
All the utf-8 bytes between the byte_offset/point and the end of the current
230323
line _including_ the line separator character(s). Returns None if the
231324
offset/point requested by tree-sitter doesn't correspond to a byte.
232325
"""
233326
row, column = point
234-
lines = self._lines
235327
newline = self.newline
236328

237329
row_out_of_bounds = row >= len(lines)
@@ -252,3 +344,75 @@ def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
252344
return b"\n"
253345

254346
return b""
347+
348+
349+
class BackgroundSyntaxParser:
350+
"""A provider of incremental background parsing for syntax highlighting.
351+
352+
This runs tree-sitter parsing as a parallel, background asyncio task. This
353+
prevents occasional, relatively long parsing times from making `TextArea`
354+
editing become unresponsive.
355+
"""
356+
357+
PARSE_TIME_SLICE = 0.005
358+
PARSE_TIMEOUT_MICROSECONDS = int(PARSE_TIME_SLICE * 1_000_000)
359+
360+
def __init__(self, document: SyntaxAwareDocument):
361+
self._document_ref = weakref.ref(document)
362+
self._event = Event()
363+
self._task: Task = create_task(self._execute_reparsing())
364+
self._force_update = False
365+
366+
def stop(self):
367+
"""Stop running as a background task."""
368+
self._task.cancel()
369+
370+
def trigger_syntax_tree_update(self, force_update: bool) -> None:
371+
"""Trigger a new syntax tree update to run in the background.
372+
373+
Args:
374+
force_update: When set, ensure that the syntax tree is regenerated
375+
unconditionally.
376+
"""
377+
if force_update:
378+
self._force_update = True
379+
self._event.set()
380+
381+
async def _execute_reparsing(self) -> None:
382+
"""Run, as a task, tree-sitter reparse operations on demand."""
383+
while True:
384+
try:
385+
try:
386+
await self._event.wait()
387+
except Exception as e:
388+
return
389+
self._event.clear()
390+
force_update = self._force_update
391+
self._force_update = False
392+
await self._perform_a_single_reparse(force_update)
393+
except CancelledError:
394+
return
395+
396+
async def _perform_a_single_reparse(self, force_update: bool) -> None:
397+
document = self._document_ref()
398+
if document is None:
399+
return
400+
if not (document.apply_pending_syntax_edits() or force_update):
401+
return
402+
403+
# In order to allow the user to continue editing without interruption, we reparse
404+
# a snapshot of the TextArea's document.
405+
copy_of_text_for_parsing = document.copy_of_lines()
406+
407+
# Use tree-sitter's parser timeout mechanism, when necessary, break the
408+
# full reparse into multiple steps. Most of the time, tree-sitter is so
409+
# fast that no looping occurs.
410+
parsed_ok = False
411+
while not parsed_ok:
412+
parsed_ok = document.reparse(
413+
self.PARSE_TIMEOUT_MICROSECONDS, lines=copy_of_text_for_parsing
414+
)
415+
if not parsed_ok:
416+
# Sleeping for zero seconds allows other tasks, I/O, *etc.* to execute,
417+
# keeping the TextArea and other widgets responsive.
418+
await sleep(0.0)

0 commit comments

Comments
 (0)