Skip to content

[ENG-8223] Fix mfr unittests #384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 28 commits into
base: feature/buff-worms
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
44e3f0f
ENG-7603 upgrade infra deps and another updates that is needed to be …
mkovalua May 30, 2025
e243c1f
[tool:pytest] is a way to work with python3.13
mkovalua Jun 3, 2025
3bab7d9
update requirements.txt to make it possible to run unittest tests , a…
mkovalua Jun 4, 2025
caaf0d2
fix bad for py3.13 async with await aiohttp.request -> to async with …
mkovalua Jun 4, 2025
e660d25
ContentEncodingError is removed for python3.13 -> replace with excep…
mkovalua Jun 4, 2025
d2f831e
fix for py3.13
mkovalua Jun 5, 2025
2b7f97e
Todo: for possible agent lib removal
mkovalua Jun 5, 2025
18c0522
fix wrong fixture call usage
mkovalua Jun 12, 2025
9586566
return markdown version with __version__ (it is string, maybe it is n…
mkovalua Jun 12, 2025
fd89244
return Image version with __version__ (it is string, maybe it is need…
mkovalua Jun 12, 2025
d572ebd
fix 0 index access error
mkovalua Jun 13, 2025
fb0d901
fix test_render_iso_not_utf16 body structure changings
mkovalua Jun 13, 2025
c8ebb09
fix AttributeError: '_PyDocXHTMLExporter' object has no attribute 'pa…
mkovalua Jun 13, 2025
e5ab0ab
fix AttributeError: module 'PIL.Image' has no attribute 'ANTIALIAS'
mkovalua Jun 13, 2025
79bb027
fix jinja2.exceptions.TemplateNotFound: basic (template_file looks t…
mkovalua Jun 13, 2025
af2f5c1
fix EscapeHtml.extendMarkdown() missing 1 required positional argume…
mkovalua Jun 13, 2025
3e50ae3
fix module 'PIL.Image' has no attribute 'VERSION
mkovalua Jun 13, 2025
abc3692
fix AttributeError: 'Dataset' object has no attribute 'value'
mkovalua Jun 13, 2025
7348c14
fix AttributeError: 'Series' object has no attribute 'iteritems'
mkovalua Jun 13, 2025
b7fd773
fix numpy.asscalar deprecation
mkovalua Jun 16, 2025
83f46b2
resolve RuntimeError: There is no current event loop in thread 'Main…
mkovalua Jun 18, 2025
45aaf82
fix test_render test, it looks html body was updated with another <tr…
mkovalua Jun 18, 2025
b66bb3e
fix xrld issue
sh-andriy Jun 18, 2025
6b05768
merge feature/buff-worms into fix-mfr-unittests
mkovalua Jun 18, 2025
b3b9bd6
Merge remote-tracking branch 'mkovalua/fix-mfr-unittests' into fix-mf…
sh-andriy Jun 18, 2025
5864da7
fix xrld issue
sh-andriy Jun 18, 2025
5d599bb
fix xrld issue
sh-andriy Jun 18, 2025
d4683eb
remain old test.docx file
mkovalua Jun 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions mfr/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,15 @@ def get_renderer_name(name: str) -> str:
# `ep_iterator` is an iterable object. Must convert it to a `list` for access.
# `list()` can only be called once because the iterator moves to the end after conversion.
ep = entry_points().select(group='mfr.renderers', name=name.lower())
ep_list = list(ep)

# Empty list indicates unsupported file type. Return '' and let `make_renderer()` handle it.
if len(ep) == 0:
if len(ep_list) == 0:
return ''

# If the file type is supported, there must be only one element in the list.
assert len(ep) == 1
return ep[0].value.split(":")[1].split('.')[0]
assert len(ep_list) == 1
return ep_list[0].value.split(":")[-1]


def get_exporter_name(name: str) -> str:
Expand All @@ -135,14 +136,15 @@ def get_exporter_name(name: str) -> str:
# `ep_iterator` is an iterable object. Must convert it to a `list` for access.
# `list()` can only be called once because the iterator moves to the end after conversion.
ep = entry_points().select(group='mfr.exporters', name=name.lower())
ep_list = list(ep)

# Empty list indicates unsupported export type. Return '' and let `make_exporter()` handle it.
if len(ep) == 0:
if len(ep_list) == 0:
return ''

# If the export type is supported, there must be only one element in the list.
assert len(ep) == 1
return ep[0].value.split(":")[1].split('.')[0]
assert len(ep_list) == 1
return ep_list[0].value.split(":")[-1]


def sizeof_fmt(num, suffix='B'):
Expand Down
1 change: 1 addition & 0 deletions mfr/extensions/docx/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .render import DocxRenderer # noqa
40 changes: 40 additions & 0 deletions mfr/extensions/docx/render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os

import pydocx.export
from mako.lookup import TemplateLookup

from mfr.core import extension


class DocxRenderer(extension.BaseRenderer):

TEMPLATE = TemplateLookup(
directories=[
os.path.join(os.path.dirname(__file__), 'templates')
]).get_template('viewer.mako')

# Workaround to remove default stylesheet and inlined styles
# see: https://github.com/CenterForOpenScience/pydocx/issues/102
class _PyDocXHTMLExporter(pydocx.export.PyDocXHTMLExporter):

def style(self):
return ''

def indent(self, text, *args, **kwargs):
return text

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.metrics.add('pydocx_version', pydocx.__version__)

def render(self):
body = self._PyDocXHTMLExporter(self.file_path)
return self.TEMPLATE.render(base=self.assets_url, body=body)

@property
def file_required(self):
return True

@property
def cache_result(self):
return True
6 changes: 6 additions & 0 deletions mfr/extensions/docx/templates/viewer.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<div style="word-wrap: break-word;" class="mfrViewer">
${body}
</div>

<script src="/static/js/mfr.js"></script>
<script src="/static/js/mfr.child.js"></script>
1 change: 1 addition & 0 deletions mfr/extensions/ipynb/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def render(self):
'enabled': False,
},
}))

(body, _) = exporter.from_notebook_node(notebook)
return self.TEMPLATE.render(base=self.assets_url, body=body)

Expand Down
7 changes: 3 additions & 4 deletions mfr/extensions/md/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@


class EscapeHtml(Extension):
def extendMarkdown(self, md, md_globals):
# Todo: do not see extendMarkdown explicit call and what is passed as the method args, maybe it is ok
del md.preprocessors['html_block']
del md.inlinePatterns['html']
def extendMarkdown(self, md):
md.preprocessors.deregister('html_block')
md.inlinePatterns.deregister('html')


class MdRenderer(extension.BaseRenderer):
Expand Down
2 changes: 1 addition & 1 deletion mfr/extensions/tabular/libs/panda_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def data_from_dataframe(dataframe):
data_row = {}
for name, value in frame_row.items():
try:
data_row[name] = numpy.asscalar(value)
data_row[name] = value.item()
except AttributeError:
data_row[name] = value
data.append(data_row)
Expand Down
111 changes: 77 additions & 34 deletions mfr/extensions/tabular/libs/xlrd_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import xlrd
import zipfile
from collections import OrderedDict
from ..exceptions import TableTooBigError
from ..exceptions import TableTooBigError, MissingRequirementsError

from ..utilities import header_population
from mfr.extensions.tabular.compat import range, basestring
Expand All @@ -11,42 +12,84 @@ def xlsx_xlrd(fp):
:param fp: File pointer object
:return: tuple of table headers and data
"""
max_size = 10000
MAX_SIZE = 10000

wb = xlrd.open_workbook(fp.name)
try:
wb = xlrd.open_workbook(fp.name)
using_xlrd = True
except xlrd.biffh.XLRDError:
using_xlrd = False
try:
from openpyxl import load_workbook
except ImportError:
raise MissingRequirementsError(
'openpyxl is required to read .xlsx files',
function_preference='openpyxl'
)
try:
wb = load_workbook(fp.name, data_only=True)
except zipfile.BadZipFile:
raise xlrd.biffh.XLRDError("Excel xlsx file; not supported")

sheets = OrderedDict()

for sheet in wb.sheets():
if sheet.ncols > max_size or sheet.nrows > max_size:
raise TableTooBigError('Table is too large to render.', '.xlsx',
nbr_cols=sheet.ncols, nbr_rows=sheet.nrows)

if sheet.ncols < 1 or sheet.nrows < 1:
sheets[sheet.name] = ([], [])
continue

fields = sheet.row_values(0) if sheet.nrows else []

fields = [
str(value)
if not isinstance(value, basestring) and value is not None
else value or f'Unnamed: {index + 1}'
for index, value in enumerate(fields)
]

data = []
for i in range(1, sheet.nrows):
row = []
for cell in sheet.row(i):
if cell.ctype == xlrd.XL_CELL_DATE:
value = xlrd.xldate.xldate_as_datetime(cell.value, wb.datemode).isoformat()
else:
value = cell.value
row.append(value)
data.append(dict(zip(fields, row)))

header = header_population(fields)
sheets[sheet.name] = (header, data)
if using_xlrd:
for sheet in wb.sheets():
if sheet.ncols > MAX_SIZE or sheet.nrows > MAX_SIZE:
raise TableTooBigError('Table is too large to render.', '.xlsx',
nbr_cols=sheet.ncols, nbr_rows=sheet.nrows)

if sheet.ncols < 1 or sheet.nrows < 1:
sheets[sheet.name] = ([], [])
continue

fields = sheet.row_values(0) if sheet.nrows else []

fields = [
str(value)
if not isinstance(value, basestring) and value is not None
else value or f'Unnamed: {index + 1}'
for index, value in enumerate(fields)
]

data = []
for i in range(1, sheet.nrows):
row = []
for cell in sheet.row(i):
if cell.ctype == xlrd.XL_CELL_DATE:
value = xlrd.xldate.xldate_as_datetime(cell.value, wb.datemode).isoformat()
else:
value = cell.value
row.append(value)
data.append(dict(zip(fields, row)))

header = header_population(fields)
sheets[sheet.name] = (header, data)

else:
for name in wb.sheetnames:
ws = wb[name]
nrows = ws.max_row
ncols = ws.max_column
if ncols > MAX_SIZE or nrows > MAX_SIZE:
raise TableTooBigError('Table is too large to render.', '.xlsx',
nbr_cols=ncols, nbr_rows=nrows)

if nrows < 1 or ncols < 1:
sheets[name] = ([], [])
continue

header_row = next(ws.iter_rows(min_row=1, max_row=1, values_only=True))
fields = [
str(val) if val is not None else f'Unnamed: {i+1}'
for i, val in enumerate(header_row)
]

data = []
for row in ws.iter_rows(min_row=2, max_row=nrows, max_col=ncols, values_only=True):
data.append(dict(zip(fields, row)))

header = header_population(fields)
sheets[name] = (header, data)

return sheets
Loading
Loading