Skip to content

Commit d1c2b9d

Browse files
committed
v0.5.5: fix mupdf parser
1 parent 404dc9a commit d1c2b9d

File tree

6 files changed

+119
-390
lines changed

6 files changed

+119
-390
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 0.5.5 - 2022-08-06
4+
- bug fix with MuPDF parser
5+
36
## 0.5.4 - 2022-02-01
47
- bug fix in CAS summary statement parser
58

casparser/parsers/mupdf.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,10 @@ def parse_file_type(blocks):
9595
return FileType.UNKNOWN
9696

9797

98-
def parse_investor_info(page_dict) -> InvestorInfo:
98+
def parse_investor_info(page_dict, page_rect: fitz.Rect) -> InvestorInfo:
9999
"""Parse investor info."""
100-
width = max(page_dict["width"], 600)
101-
height = max(page_dict["height"], 800)
100+
width = max(page_rect.width, 600)
101+
height = max(page_rect.height, 800)
102102

103103
blocks = sorted(
104104
[x for x in page_dict["blocks"] if x["bbox"][1] < height / 2], key=lambda x: x["bbox"][1]
@@ -190,7 +190,7 @@ def cas_pdf_to_text(filename: Union[str, io.IOBase], password) -> PartialCASData
190190

191191
with fp:
192192
try:
193-
doc = fitz.open(stream=fp.read(), filetype="pdf")
193+
doc = fitz.Document(stream=fp.read(), filetype="pdf")
194194
except Exception as e:
195195
raise CASParseError("Unhandled error while opening file :: %s" % (str(e)))
196196

@@ -210,7 +210,7 @@ def cas_pdf_to_text(filename: Union[str, io.IOBase], password) -> PartialCASData
210210
file_type = parse_file_type(blocks)
211211
sorted_blocks = sorted(blocks, key=itemgetter(1, 0))
212212
if investor_info is None:
213-
investor_info = parse_investor_info(page_dict)
213+
investor_info = parse_investor_info(page_dict, page.rect)
214214
pages.append(sorted_blocks)
215215
lines = group_similar_rows(pages)
216216
return PartialCASData(file_type=file_type, investor_info=investor_info, lines=lines)

casparser/process/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from decimal import Decimal
21
from typing import Optional, Tuple
32

43
from casparser_isin import MFISINDb

0 commit comments

Comments
 (0)