Skip to content

Commit 64cba2f

Browse files
authored
Merge pull request #5639 from Textualize/tree-sitter-improvements
TextArea improvements - lazy language import and allow installation of only required languages
2 parents 77e354c + 0216abc commit 64cba2f

File tree

6 files changed

+125
-154
lines changed

6 files changed

+125
-154
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
3232
- Tabs now accept Content or content markup https://github.com/Textualize/textual/pull/5657
3333
- Buttons will now use Textual markup rather than console markup
3434

35+
### Changed
36+
37+
- tree-sitter languages are now loaded lazily, improving cold-start time https://github.com/Textualize/textual/pull/5639
38+
3539
## [2.1.2] - 2025-02-26
3640

3741
### Fixed

src/textual/_tree_sitter.py

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,44 @@
11
from __future__ import annotations
2+
from importlib import import_module
3+
4+
from textual import log
5+
26

37
try:
4-
import tree_sitter_bash
5-
import tree_sitter_css
6-
import tree_sitter_go
7-
import tree_sitter_html
8-
import tree_sitter_java
9-
import tree_sitter_javascript
10-
import tree_sitter_json
11-
import tree_sitter_markdown
12-
import tree_sitter_python
13-
import tree_sitter_regex
14-
import tree_sitter_rust
15-
import tree_sitter_sql
16-
import tree_sitter_toml
17-
import tree_sitter_xml
18-
import tree_sitter_yaml
198
from tree_sitter import Language
209

21-
_tree_sitter = True
10+
_LANGUAGE_CACHE: dict[str, Language] = {}
2211

23-
_languages = {
24-
"python": Language(tree_sitter_python.language()),
25-
"json": Language(tree_sitter_json.language()),
26-
"markdown": Language(tree_sitter_markdown.language()),
27-
"yaml": Language(tree_sitter_yaml.language()),
28-
"toml": Language(tree_sitter_toml.language()),
29-
"rust": Language(tree_sitter_rust.language()),
30-
"html": Language(tree_sitter_html.language()),
31-
"css": Language(tree_sitter_css.language()),
32-
"xml": Language(tree_sitter_xml.language_xml()),
33-
"regex": Language(tree_sitter_regex.language()),
34-
"sql": Language(tree_sitter_sql.language()),
35-
"javascript": Language(tree_sitter_javascript.language()),
36-
"java": Language(tree_sitter_java.language()),
37-
"bash": Language(tree_sitter_bash.language()),
38-
"go": Language(tree_sitter_go.language()),
39-
}
12+
_tree_sitter = True
4013

4114
def get_language(language_name: str) -> Language | None:
42-
return _languages.get(language_name)
15+
if language_name in _LANGUAGE_CACHE:
16+
return _LANGUAGE_CACHE[language_name]
17+
18+
try:
19+
module = import_module(f"tree_sitter_{language_name}")
20+
except ImportError:
21+
return None
22+
else:
23+
try:
24+
if language_name == "xml":
25+
# xml uses language_xml() instead of language()
26+
# it's the only outlier amongst the languages in the `textual[syntax]` extra
27+
language = Language(module.language_xml(), name=language_name)
28+
else:
29+
language = Language(module.language(), name=language_name)
30+
except (OSError, AttributeError):
31+
log.warning(f"Could not load language {language_name!r}.")
32+
return None
33+
else:
34+
_LANGUAGE_CACHE[language_name] = language
35+
return language
4336

4437
except ImportError:
4538
_tree_sitter = False
46-
_languages = {}
39+
40+
def get_language(language_name: str) -> Language | None:
41+
return None
42+
4743

4844
TREE_SITTER = _tree_sitter
49-
BUILTIN_LANGUAGES: dict[str, "Language"] = _languages

src/textual/document/_syntax_aware_document.py

Lines changed: 8 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
TREE_SITTER = False
99

1010

11-
from textual._tree_sitter import BUILTIN_LANGUAGES
1211
from textual.document._document import Document, EditResult, Location, _utf8_encode
1312

1413

@@ -17,59 +16,30 @@ class SyntaxAwareDocumentError(Exception):
1716

1817

1918
class SyntaxAwareDocument(Document):
20-
"""A wrapper around a Document which also maintains a tree-sitter syntax
19+
"""A subclass of Document which also maintains a tree-sitter syntax
2120
tree when the document is edited.
22-
23-
The primary reason for this split is actually to keep tree-sitter stuff separate,
24-
since it isn't supported in Python 3.7. By having the tree-sitter code
25-
isolated in this subclass, it makes it easier to conditionally import. However,
26-
it does come with other design flaws (e.g. Document is required to have methods
27-
which only really make sense on SyntaxAwareDocument).
28-
29-
If you're reading this and Python 3.7 is no longer supported by Textual,
30-
consider merging this subclass into the `Document` superclass.
3121
"""
3222

3323
def __init__(
3424
self,
3525
text: str,
36-
language: str | Language,
26+
language: Language,
3727
):
3828
"""Construct a SyntaxAwareDocument.
3929
4030
Args:
4131
text: The initial text contained in the document.
42-
language: The language to use. You can pass a string to use a supported
43-
language, or pass in your own tree-sitter `Language` object.
32+
language: The tree-sitter language to use.
4433
"""
4534

4635
if not TREE_SITTER:
47-
raise RuntimeError("SyntaxAwareDocument unavailable.")
36+
raise RuntimeError(
37+
"SyntaxAwareDocument unavailable - tree-sitter is not installed."
38+
)
4839

4940
super().__init__(text)
50-
self.language: Language | None = None
51-
"""The tree-sitter Language or None if tree-sitter is unavailable."""
52-
53-
self._parser: Parser | None = None
54-
55-
from textual._tree_sitter import get_language
56-
57-
# If the language is `None`, then avoid doing any parsing related stuff.
58-
if isinstance(language, str):
59-
if language not in BUILTIN_LANGUAGES:
60-
raise SyntaxAwareDocumentError(f"Invalid language {language!r}")
61-
# If tree-sitter-languages is not installed properly, get_language
62-
# and get_parser may raise an OSError when unable to load their
63-
# resources
64-
65-
try:
66-
self.language = get_language(language)
67-
except OSError as e:
68-
raise SyntaxAwareDocumentError(
69-
f"Could not find binaries for {language!r}"
70-
) from e
71-
else:
72-
self.language = language
41+
self.language: Language = language
42+
"""The tree-sitter Language."""
7343

7444
self._parser = Parser(self.language)
7545
"""The tree-sitter Parser or None if tree-sitter is unavailable."""
@@ -90,16 +60,6 @@ def prepare_query(self, query: str) -> Query | None:
9060
Returns:
9161
The prepared query.
9262
"""
93-
if not TREE_SITTER:
94-
raise SyntaxAwareDocumentError(
95-
"Couldn't prepare query - tree-sitter is not available on this architecture."
96-
)
97-
98-
if self.language is None:
99-
raise SyntaxAwareDocumentError(
100-
"Couldn't prepare query - no language assigned."
101-
)
102-
10363
return self.language.query(query)
10464

10565
def query_syntax_tree(
@@ -122,12 +82,6 @@ def query_syntax_tree(
12282
Returns:
12383
A tuple containing the nodes and text captured by the query.
12484
"""
125-
126-
if not TREE_SITTER:
127-
raise SyntaxAwareDocumentError(
128-
"tree-sitter is not available on this architecture."
129-
)
130-
13185
captures_kwargs = {}
13286
if start_point is not None:
13387
captures_kwargs["start_point"] = start_point

0 commit comments

Comments
 (0)