Skip to content

Commit 57de067

Browse files
committed
Make syntax highlight generation scale better.
The TextArea widget code queries the entires syntax tree for each edit, using the tree-sitter Query.captures method. This has potential scaling issues, but such issues are exacerbated by the fact the Query.captures method scales very poorly with the number of line it is asked to generate captures for. It appears to be quadratic or something similar I think - I strongly suspect a bug in tree-sitter or its python bindings. On my laptop, this makes editing a 25,000 line Python file painfully unresponsive. A 25,000 lines Python file is large, but not entirely unreasonable. I actually became aware of this behaviour developing a LaTeX editor, which showed symptoms after about 1,000 lines. This change replaces the plain TextArea._highlights dictionary with a dictonary-like class that lazily performs item access to build hilghlight information for small blocks of (50) lines at a time. As well as keeping the TextArea very much more responsive, it will reduce the average memory requirements for larger documents. During regression testing, I discovered that the per-line highlights are not necessarily in the correct (best) order for syntax highlighting. For example, highlighting within string expressions can lost. So I added suitable sorting. This required that the snapshots for some tests to be updated.
1 parent 3d82280 commit 57de067

9 files changed

+894
-784
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
6161
- Added `Style.has_transparent_foreground` property https://github.com/Textualize/textual/pull/5657
6262

6363

64+
### Fixed
65+
66+
- Fixed TextArea's syntax highlighting. Some highlighting details were not being
67+
applied. For example, in CSS, the text 'padding: 10px 0;' was shown in a
68+
single colour. Now the 'px' appears in a different colour to the rest of the
69+
text.
70+
71+
- Fixed a cause of slow editing for syntax highlighed TextArea widgets with
72+
large documents.
73+
74+
6475
## [2.1.2] - 2025-02-26
6576

6677
### Fixed

src/textual/document/_syntax_aware_document.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from __future__ import annotations
22

3+
from contextlib import contextmanager
4+
from typing import ContextManager
5+
36
try:
47
from tree_sitter import Language, Node, Parser, Query, Tree
58

@@ -11,6 +14,35 @@
1114
from textual.document._document import Document, EditResult, Location, _utf8_encode
1215

1316

17+
@contextmanager
18+
def temporary_query_point_range(
19+
query: Query,
20+
start_point: tuple[int, int] | None,
21+
end_point: tuple[int, int] | None,
22+
) -> ContextManager[None]:
23+
"""Temporarily change the start and/or end point for a tree-sitter Query.
24+
25+
Args:
26+
query: The tree-sitter Query.
27+
start_point: The (row, column byte) to start the query at.
28+
end_point: The (row, column byte) to end the query at.
29+
"""
30+
# Note: Although not documented for the tree-sitter Python API, an
31+
# end-point of (0, 0) means 'end of document'.
32+
default_point_range = [(0, 0), (0, 0)]
33+
34+
point_range = list(default_point_range)
35+
if start_point is not None:
36+
point_range[0] = start_point
37+
if end_point is not None:
38+
point_range[1] = end_point
39+
query.set_point_range(point_range)
40+
try:
41+
yield None
42+
finally:
43+
query.set_point_range(default_point_range)
44+
45+
1446
class SyntaxAwareDocumentError(Exception):
1547
"""General error raised when SyntaxAwareDocument is used incorrectly."""
1648

@@ -82,14 +114,8 @@ def query_syntax_tree(
82114
Returns:
83115
A tuple containing the nodes and text captured by the query.
84116
"""
85-
captures_kwargs = {}
86-
if start_point is not None:
87-
captures_kwargs["start_point"] = start_point
88-
if end_point is not None:
89-
captures_kwargs["end_point"] = end_point
90-
91-
captures = query.captures(self._syntax_tree.root_node, **captures_kwargs)
92-
return captures
117+
with temporary_query_point_range(query, start_point, end_point):
118+
return query.captures(self._syntax_tree.root_node)
93119

94120
def replace_range(self, start: Location, end: Location, text: str) -> EditResult:
95121
"""Replace text at the given range.

src/textual/widgets/_text_area.py

Lines changed: 105 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,105 @@ class LanguageDoesNotExist(Exception):
8989
"""
9090

9191

92+
class HighlightMap:
93+
"""Lazy evaluated pseudo dictionary mapping lines to highlight information.
94+
95+
This allows TextArea syntax highlighting to scale.
96+
97+
Args:
98+
text_area_widget: The associated `TextArea` widget.
99+
"""
100+
101+
BLOCK_SIZE = 50
102+
103+
def __init__(self, text_area_widget: widgets.TextArea):
104+
self.text_area_widget: widgets.TextArea = text_area_widget
105+
self.uncovered_lines: dict[int, range] = {}
106+
107+
# A mapping from line index to a list of Highlight instances.
108+
self._highlights: LineToHighlightsMap = defaultdict(list)
109+
self.reset()
110+
111+
def reset(self) -> None:
112+
"""Reset so that future lookups rebuild the highlight map."""
113+
self._highlights.clear()
114+
line_count = self.document.line_count
115+
uncovered_lines = self.uncovered_lines
116+
uncovered_lines.clear()
117+
i = end_range = 0
118+
for i in range(0, line_count, self.BLOCK_SIZE):
119+
end_range = min(i + self.BLOCK_SIZE, line_count)
120+
line_range = range(i, end_range)
121+
uncovered_lines.update({j: line_range for j in line_range})
122+
if end_range < line_count:
123+
line_range = range(i, line_count)
124+
uncovered_lines.update({j: line_range for j in line_range})
125+
126+
@property
127+
def document(self) -> DocumentBase:
128+
"""The text document being highlighted."""
129+
return self.text_area_widget.document
130+
131+
def __getitem__(self, idx: int) -> list[text_area.Highlight]:
132+
if idx in self.uncovered_lines:
133+
self._build_part_of_highlight_map(self.uncovered_lines[idx])
134+
return self._highlights[idx]
135+
136+
def _build_part_of_highlight_map(self, line_range: range) -> None:
137+
"""Build part of the highlight map."""
138+
highlights = self._highlights
139+
for line_index in line_range:
140+
self.uncovered_lines.pop(line_index)
141+
start_point = (line_range[0], 0)
142+
end_point = (line_range[-1] + 1, 0)
143+
captures = self.document.query_syntax_tree(
144+
self.text_area_widget._highlight_query,
145+
start_point=start_point,
146+
end_point=end_point,
147+
)
148+
for highlight_name, nodes in captures.items():
149+
for node in nodes:
150+
node_start_row, node_start_column = node.start_point
151+
node_end_row, node_end_column = node.end_point
152+
if node_start_row == node_end_row:
153+
highlight = node_start_column, node_end_column, highlight_name
154+
highlights[node_start_row].append(highlight)
155+
else:
156+
# Add the first line of the node range
157+
highlights[node_start_row].append(
158+
(node_start_column, None, highlight_name)
159+
)
160+
161+
# Add the middle lines - entire row of this node is highlighted
162+
for node_row in range(node_start_row + 1, node_end_row):
163+
highlights[node_row].append((0, None, highlight_name))
164+
165+
# Add the last line of the node range
166+
highlights[node_end_row].append(
167+
(0, node_end_column, highlight_name)
168+
)
169+
170+
# The highlights for each line need to be sorted. Each highlight is of
171+
# the form:
172+
#
173+
# a, b, highlight-name
174+
#
175+
# Where a is a number and b is a number or ``None``. These highlights need
176+
# to be sorted in ascending order of ``a``. When two highlights have the same
177+
# value of ``a`` then the one with the larger a--b range comes first, with ``None``
178+
# being considered larger than any number.
179+
def sort_key(hl) -> tuple[int, int, int]:
180+
a, b, _ = hl
181+
max_range_ind = 1
182+
if b is None:
183+
max_range_ind = 0
184+
b = a
185+
return a, max_range_ind, a - b
186+
187+
for line_index in line_range:
188+
line_highlights = highlights.get(line_index, []).sort(key=sort_key)
189+
190+
92191
@dataclass
93192
class TextAreaLanguage:
94193
"""A container for a language which has been registered with the TextArea."""
@@ -472,15 +571,15 @@ def __init__(
472571
cursor is currently at. If the cursor is at a bracket, or there's no matching
473572
bracket, this will be `None`."""
474573

475-
self._highlights: dict[int, list[Highlight]] = defaultdict(list)
476-
"""Mapping line numbers to the set of highlights for that line."""
477-
478574
self._highlight_query: "Query | None" = None
479575
"""The query that's currently being used for highlighting."""
480576

481577
self.document: DocumentBase = Document(text)
482578
"""The document this widget is currently editing."""
483579

580+
self._highlights: HighlightMap = HighlightMap(self)
581+
"""Mapping line numbers to the set of highlights for that line."""
582+
484583
self.wrapped_document: WrappedDocument = WrappedDocument(self.document)
485584
"""The wrapped view of the document."""
486585

@@ -609,35 +708,10 @@ def check_consume_key(self, key: str, character: str | None = None) -> bool:
609708
return character is not None and character.isprintable()
610709

611710
def _build_highlight_map(self) -> None:
612-
"""Query the tree for ranges to highlights, and update the internal highlights mapping."""
613-
highlights = self._highlights
614-
highlights.clear()
615-
if not self._highlight_query:
616-
return
617-
618-
captures = self.document.query_syntax_tree(self._highlight_query)
619-
for highlight_name, nodes in captures.items():
620-
for node in nodes:
621-
node_start_row, node_start_column = node.start_point
622-
node_end_row, node_end_column = node.end_point
623-
624-
if node_start_row == node_end_row:
625-
highlight = (node_start_column, node_end_column, highlight_name)
626-
highlights[node_start_row].append(highlight)
627-
else:
628-
# Add the first line of the node range
629-
highlights[node_start_row].append(
630-
(node_start_column, None, highlight_name)
631-
)
711+
"""Reset the lazily evaluated highlight map."""
632712

633-
# Add the middle lines - entire row of this node is highlighted
634-
for node_row in range(node_start_row + 1, node_end_row):
635-
highlights[node_row].append((0, None, highlight_name))
636-
637-
# Add the last line of the node range
638-
highlights[node_end_row].append(
639-
(0, node_end_column, highlight_name)
640-
)
713+
if self._highlight_query:
714+
self._highlights.reset()
641715

642716
def _watch_has_focus(self, focus: bool) -> None:
643717
self._cursor_visible = focus

0 commit comments

Comments
 (0)