Skip to content

Commit 9c365f7

Browse files
committed
* Fix .text() parameter names - rename seperator to separator (#2, thanks to @cmdlineluser).
* Make `Parser` compatible with `BytesIO` and `TextIO` by adding new methods `.write()` and `.writable()`. * Accept "html" and "xml" values as `options` in parsing. * Set default value for `options` as "html". * Update the docuemntation examples.
1 parent 87957cb commit 9c365f7

File tree

7 files changed

+110
-14
lines changed

7 files changed

+110
-14
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ crate-type = ["cdylib"]
1717
members = ["treedom", "matching"]
1818

1919
[workspace.package]
20-
version = "0.2.1"
20+
version = "0.3.0"
2121
edition = "2021"
2222
readme = "README.md"
2323
license = "MIT"

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
![image](https://img.shields.io/pypi/l/markupever.svg)
1414
![image](https://img.shields.io/pypi/pyversions/markupever.svg)
1515
![python-test](https://github.com/awolverp/markupever/actions/workflows/test.yml/badge.svg)
16+
![download](https://img.shields.io/pypi/dm/markupever?style=flat-square&color=%23314bb5)
1617

1718
------
1819

@@ -66,5 +67,8 @@ body = html.create_element("body")
6667
body.create_text("Hello Everyone ...")
6768

6869
print(root.serialize())
69-
# <!DOCTYPE html><html lang="en"><body>Hello Everyone ...</body></html>
70+
# <!DOCTYPE html>
71+
# <html lang="en">
72+
# <body>Hello Everyone ...</body>
73+
# </html>
7074
```

docs/docs/more-examples.md

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,38 @@ There's a collection of examples for markupever library.
1212
**This documentation is incomplete**. Documenting everything take a while.
1313

1414

15-
### Using markupever alongside HTTP clients
16-
How to use markupever alongside HTTP clients such as `httpx`, `requests` and `aiohttp`.
15+
### Using markupever alongside HTTPX
16+
How to use markupever alongside `httpx` library.
1717

1818
=== "httpx (traditional)"
1919

2020
```python
21+
import markupever
22+
import httpx
23+
24+
# Create a Client instance
2125
with httpx.Client() as client:
26+
# Send a GET request to google
2227
response = client.get("https://www.example.com/")
28+
29+
# Parse the result using markupever
2330
dom = markupever.parse(response.content, markupever.HtmlOptions())
2431
```
2532

2633
=== "httpx (recommended)"
2734

2835
```python
36+
import markupever
37+
import httpx
38+
39+
# Create a Client instance
2940
with httpx.Client() as client:
41+
# Stream a GET request to google
3042
with client.stream(
3143
"GET",
3244
"https://www.example.com/",
3345
) as stream:
46+
# Parse the result using markupever
3447
parser = markupever.Parser(markupever.HtmlOptions())
3548

3649
for content in stream.iter_bytes():
@@ -39,17 +52,63 @@ How to use markupever alongside HTTP clients such as `httpx`, `requests` and `ai
3952
dom = parser.finish().into_dom()
4053
```
4154

55+
### Using markupever alongside Requests
56+
How to use markupever alongside `requests` library.
57+
4258
=== "requests"
4359

4460
```python
61+
import markupever
62+
import requests
63+
64+
# Send a GET request to google
4565
response = requests.get("https://www.example.com/")
66+
67+
# Parse the result using markupever
4668
dom = markupever.parse(response.content, markupever.HtmlOptions())
4769
```
4870

71+
### Using markupever alongside AIOHttp
72+
How to use markupever alongside `aiohttp` library.
73+
4974
=== "aiohttp"
5075

5176
```python
77+
# Create a ClientSession instance
5278
async with aiohttp.ClientSession() as session:
79+
# Send a GET request to google
5380
async with session.get('https://www.google.com/') as resp:
81+
# Parse the result using markupever
5482
dom = markupever.parse(await resp.read(), markupever.HtmlOptions())
5583
```
84+
85+
### Using markupever alongside PycURL
86+
How to use markupever alongside `PycURL` library.
87+
88+
=== "pycurl (recommended & easy)"
89+
90+
```python
91+
import pycurl
92+
import certifi
93+
from io import BytesIO
94+
95+
# Create a PycURL instance
96+
c = pycurl.Curl()
97+
98+
# Define Options ...
99+
c.setopt(c.URL, 'https://www.google.com/')
100+
c.setopt(c.CAINFO, certifi.where())
101+
102+
# Setup markupever to recieve response
103+
parser = markupever.Parser()
104+
c.setopt(c.WRITEDATA, parser)
105+
106+
# Send Request
107+
c.perform()
108+
109+
# Close Connection
110+
c.close()
111+
112+
# Use the parsed DOM
113+
dom = parser.finish().into_dom()
114+
```

python/markupever/dom.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,14 +340,14 @@ def strings(self, strip: bool = False):
340340
else:
341341
yield descendant.content
342342

343-
def text(self, seperator: str = "", strip: bool = False) -> str:
343+
def text(self, separator: str = "", strip: bool = False) -> str:
344344
"""
345345
Concatenates text from all descendant text nodes into a single string.
346346
347-
- seperator (str, optional): String used to join text nodes. Defaults to an empty string.
347+
- separator (str, optional): String used to join text nodes. Defaults to an empty string.
348348
- strip (bool, optional): Whether to strip whitespace from text nodes. Defaults to False.
349349
"""
350-
return seperator.join(self.strings(strip=strip))
350+
return separator.join(self.strings(strip=strip))
351351

352352
def serialize_bytes(
353353
self, indent: int = 4, is_html: typing.Optional[bool] = None, include_self: bool = True
@@ -854,11 +854,11 @@ def __getitem__(self, index: int) -> typing.Tuple[_rustlib.QualName, str]: ...
854854

855855
def __getitem__(self, index):
856856
if not isinstance(index, int):
857-
_, index = self._find_by_key(index)
858-
if index == -1:
857+
_, index_i = self._find_by_key(index)
858+
if index_i == -1:
859859
raise KeyError(index)
860860

861-
_, val = self.__raw.get_by_index(index)
861+
_, val = self.__raw.get_by_index(index_i)
862862
return val
863863

864864
return self.__raw.get_by_index(index)

python/markupever/parser.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
class Parser:
77
__slots__ = ("__raw", "__state")
88

9-
def __init__(self, options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptions]):
9+
def __init__(self, options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptions, typing.Literal["html"], typing.Literal["xml"]] = "html"):
1010
"""
1111
An HTML/XML parser, ready to receive unicode input.
1212
@@ -16,13 +16,40 @@ def __init__(self, options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptio
1616
for `options`, If your input is a HTML document, pass a `HtmlOptions`;
1717
If your input is a XML document, pass `XmlOptions`.
1818
"""
19+
if isinstance(options, str):
20+
if options == "html":
21+
options = _rustlib.HtmlOptions()
22+
elif options == "xml":
23+
options = _rustlib.XmlOptions()
24+
else:
25+
raise ValueError(f"invalid parser options: {options!r}")
26+
1927
self.__raw = _rustlib.Parser(options)
2028

2129
# 0 - processing
2230
# 1 - finished
2331
# 2 - converted
2432
self.__state = 0
2533

34+
def writable(self) -> bool:
35+
"""
36+
Same as `Parser.is_finished`.
37+
38+
This function exists to make `Parser` like a `BytesIO` and `StringIO`.
39+
You can pass the `Parser` to each function which needs a writable buffer or IO.
40+
"""
41+
return self.is_finished
42+
43+
def write(self, content: typing.Union[str, bytes]) -> int:
44+
"""
45+
Same as `Parser.process`.
46+
47+
This function exists to make `Parser` like a `BytesIO` and `StringIO`.
48+
You can pass the `Parser` to each function which needs a writable buffer or IO.
49+
"""
50+
self.__raw.process(content)
51+
return len(content)
52+
2653
def process(self, content: typing.Union[str, bytes]) -> "Parser":
2754
"""
2855
Processes an input.
@@ -86,7 +113,7 @@ def __repr__(self) -> str:
86113

87114
def parse(
88115
content: typing.Union[str, bytes],
89-
options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptions],
116+
options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptions, typing.Literal["html"], typing.Literal["xml"]] = "html",
90117
) -> TreeDom:
91118
"""
92119
Parses HTML or XML content and returns the parsed document tree.
@@ -105,7 +132,7 @@ def parse(
105132

106133
def parse_file(
107134
path: typing.Union[str, typing.TextIO, typing.BinaryIO],
108-
options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptions],
135+
options: typing.Union[_rustlib.HtmlOptions, _rustlib.XmlOptions, typing.Literal["html"], typing.Literal["xml"]] = "html",
109136
*,
110137
chunk_size: int = 10240,
111138
) -> TreeDom:

python/tests/test_dom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def test_connect_node():
149149

150150
assert p.text() == "\ncontent 1\ncontent 2"
151151
assert p.text(strip=True) == "content 1content 2"
152-
assert p.text(seperator="\t", strip=True) == "content 1\tcontent 2"
152+
assert p.text(separator="\t", strip=True) == "content 1\tcontent 2"
153153

154154
assert text.has_siblings
155155
assert p.has_children

python/tests/test_parser.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ def test_parser(): # this is a copy of test_rustlib.test_parser for markupever.
5454
with pytest.raises(RuntimeError):
5555
parser.errors()
5656

57+
_ = markupever.Parser("html")
58+
_ = markupever.Parser("xml")
59+
60+
with pytest.raises(ValueError):
61+
_ = markupever.Parser("invalid")
62+
5763

5864
def test_parse_function():
5965
assert isinstance(

0 commit comments

Comments
 (0)