Skip to content

Commit ba36031

Browse files
authored
TST: Add xfail test for #2336 (#2365)
1 parent 3ab1581 commit ba36031

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

tests/example_files.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,5 @@
110110
url: https://github.com/py-pdf/pypdf/assets/4083478/56c93021-33cd-4387-ae13-5cbe7e673f42
111111
- local_filename: paid.pdf
112112
url: https://github.com/py-pdf/pypdf/files/12050253/tt.pdf
113+
- local_filename: Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
114+
url: https://www.joinville.sc.gov.br/wp-content/uploads/2023/11/Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf

tests/test_text_extraction.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@
33
44
The tested code might be in _page.py.
55
"""
6+
from io import BytesIO
67
from pathlib import Path
78

89
import pytest
910

1011
from pypdf import PdfReader, mult
1112
from pypdf._text_extraction import set_custom_rtl
1213

14+
from . import get_data_from_url
15+
1316
TESTS_ROOT = Path(__file__).parent.resolve()
1417
PROJECT_ROOT = TESTS_ROOT.parent
1518
RESOURCE_ROOT = PROJECT_ROOT / "resources"
@@ -99,3 +102,13 @@ def visitor_text(text, cm, tm, font_dict, font_size) -> None:
99102
x = matches[0]["x"]
100103
y = matches[0]["y"]
101104
assert constraint(x, y), f'Line "{text}" is wrong at x:{x}, y:{y}'
105+
106+
107+
@pytest.mark.xfail(reason="known whitespace issue #2336")
108+
@pytest.mark.enable_socket()
109+
def test_issue_2336():
110+
name = "Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf"
111+
reader = PdfReader(BytesIO(get_data_from_url(name=name)))
112+
page = reader.pages[0]
113+
actual_text = page.extract_text()
114+
assert "Beira Rio" in actual_text

0 commit comments

Comments
 (0)