Skip to content

Commit ac01e80

Browse files
committed
[fdiff.diff] major refactor, add new external_diff function
Adds library support for external executables as the source of diff output with new --external command line flag
1 parent 792f5a8 commit ac01e80

File tree

1 file changed

+195
-90
lines changed

1 file changed

+195
-90
lines changed

lib/fdiff/diff.py

Lines changed: 195 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import asyncio
22
import os
33
from multiprocessing import Pool, cpu_count
4+
import shlex
5+
import subprocess
46
import tempfile
57

68
from fontTools.ttLib import TTFont
@@ -14,12 +16,149 @@
1416
from fdiff.utils import get_file_modtime
1517

1618

19+
#
20+
#
21+
# Private functions
22+
#
23+
#
24+
25+
26+
def _async_fetch_files(dirpath, urls):
27+
loop = asyncio.get_event_loop()
28+
tasks = loop.run_until_complete(
29+
create_async_get_request_session_and_run(urls, dirpath)
30+
)
31+
for task in tasks:
32+
if task.exception():
33+
# raise exception here to notify calling code that something
34+
# did not work
35+
raise AIOError(f"{task.exception()}")
36+
elif task.result().http_status != 200:
37+
# handle non-200 HTTP response status codes + file write fails
38+
raise AIOError(
39+
f"failed to pull '{task.result().url}' with HTTP status code {task.result().http_status}"
40+
)
41+
42+
43+
def _get_fonts_and_save_xml(
44+
filepath_a, filepath_b, tmpdirpath, include_tables, exclude_tables, use_multiprocess
45+
):
46+
post_pathname, postpath, pre_pathname, prepath = _get_pre_post_paths(
47+
filepath_a, filepath_b, tmpdirpath
48+
)
49+
# instantiate left and right fontTools.ttLib.TTFont objects
50+
tt_left = TTFont(prepath)
51+
tt_right = TTFont(postpath)
52+
_validate_table_includes(include_tables, tt_left, tt_right)
53+
_validate_table_excludes(exclude_tables, tt_left, tt_right)
54+
left_ttxpath = os.path.join(tmpdirpath, "left.ttx")
55+
right_ttxpath = os.path.join(tmpdirpath, "right.ttx")
56+
_mp_save_ttx_xml(
57+
tt_left,
58+
tt_right,
59+
left_ttxpath,
60+
right_ttxpath,
61+
exclude_tables,
62+
include_tables,
63+
use_multiprocess,
64+
)
65+
return left_ttxpath, right_ttxpath, pre_pathname, prepath, post_pathname, postpath
66+
67+
68+
def _get_pre_post_paths(filepath_a, filepath_b, dirpath):
69+
urls = []
70+
if filepath_a.startswith("http"):
71+
urls.append(filepath_a)
72+
prepath = _get_filepath_from_url(filepath_a, dirpath)
73+
# keep URL as path name for remote file requests
74+
pre_pathname = filepath_a
75+
else:
76+
prepath = filepath_a
77+
pre_pathname = filepath_a
78+
if filepath_b.startswith("http"):
79+
urls.append(filepath_b)
80+
postpath = _get_filepath_from_url(filepath_b, dirpath)
81+
# keep URL as path name for remote file requests
82+
post_pathname = filepath_b
83+
else:
84+
postpath = filepath_b
85+
post_pathname = filepath_b
86+
# Async IO fetch and write of any remote file requests
87+
if len(urls) > 0:
88+
_async_fetch_files(dirpath, urls)
89+
return post_pathname, postpath, pre_pathname, prepath
90+
91+
92+
def _mp_save_ttx_xml(
93+
tt_left,
94+
tt_right,
95+
left_ttxpath,
96+
right_ttxpath,
97+
exclude_tables,
98+
include_tables,
99+
use_multiprocess,
100+
):
101+
if use_multiprocess and cpu_count() > 1:
102+
# Use parallel fontTools.ttLib.TTFont.saveXML dump
103+
# by default on multi CPU systems. This is a performance
104+
# optimization. Profiling demonstrates that this can reduce
105+
# execution time by up to 30% for some fonts
106+
mp_args_list = [
107+
(tt_left, left_ttxpath, include_tables, exclude_tables),
108+
(tt_right, right_ttxpath, include_tables, exclude_tables),
109+
]
110+
with Pool(processes=2) as pool:
111+
pool.starmap(_ttfont_save_xml, mp_args_list)
112+
else:
113+
# use sequential fontTools.ttLib.TTFont.saveXML dumps
114+
# when use_multiprocess is False or single CPU system
115+
# detected
116+
_ttfont_save_xml(tt_left, left_ttxpath, include_tables, exclude_tables)
117+
_ttfont_save_xml(tt_right, right_ttxpath, include_tables, exclude_tables)
118+
119+
17120
def _ttfont_save_xml(ttf, filepath, include_tables, exclude_tables):
18121
"""Writes TTX specification formatted XML to disk on filepath."""
19122
ttf.saveXML(filepath, tables=include_tables, skipTables=exclude_tables)
20123
return True
21124

22125

126+
def _validate_table_excludes(exclude_tables, tt_left, tt_right):
127+
# Validation: exclude_tables request should be for tables that are in one of
128+
# the two fonts. Mis-specified OT table definitions could otherwise result
129+
# in the presence of a table in the diff when the request was to exclude it.
130+
# For example, when an "OS/2" table request is entered as "OS2".
131+
if exclude_tables is not None:
132+
for table in exclude_tables:
133+
if table not in tt_left and table not in tt_right:
134+
raise KeyError(
135+
f"'{table}' table was not identified for exclusion in either font"
136+
)
137+
138+
139+
def _validate_table_includes(include_tables, tt_left, tt_right):
140+
# Validation: include_tables request should be for tables that are in one of
141+
# the two fonts. This otherwise silently passes with exit status code 0 which
142+
# could lead to the interpretation of no diff between two files when the table
143+
# entry is incorrectly defined or is a typo. Let's be conservative and consider
144+
# this an error, force user to use explicit definitions that include tables in
145+
# one of the two files, and understand that the diff request was for one or more
146+
# tables that are not present.
147+
if include_tables is not None:
148+
for table in include_tables:
149+
if table not in tt_left and table not in tt_right:
150+
raise KeyError(
151+
f"'{table}' table was not identified for inclusion in either font"
152+
)
153+
154+
155+
#
156+
#
157+
# Public functions
158+
#
159+
#
160+
161+
23162
def u_diff(
24163
filepath_a,
25164
filepath_b,
@@ -46,103 +185,26 @@ def u_diff(
46185
:raises: fdiff.exceptions.AIOError if exception raised during execution of async I/O
47186
GET request for URL or file write
48187
:raises: fdiff.exceptions.AIOError if GET request to URL returned non-200 response status code"""
49-
with tempfile.TemporaryDirectory() as tmpdirname:
188+
with tempfile.TemporaryDirectory() as tmpdirpath:
50189
# define the file paths with either local file requests
51-
# or pulls of remote files based on the command line request
52-
urls = []
53-
if filepath_a.startswith("http"):
54-
urls.append(filepath_a)
55-
prepath = _get_filepath_from_url(filepath_a, tmpdirname)
56-
# keep URL as path name for remote file requests
57-
pre_pathname = filepath_a
58-
else:
59-
prepath = filepath_a
60-
pre_pathname = filepath_a
61-
62-
if filepath_b.startswith("http"):
63-
urls.append(filepath_b)
64-
postpath = _get_filepath_from_url(filepath_b, tmpdirname)
65-
# keep URL as path name for remote file requests
66-
post_pathname = filepath_b
67-
else:
68-
postpath = filepath_b
69-
post_pathname = filepath_b
70-
71-
# Async IO fetch and write of any remote file requests
72-
if len(urls) > 0:
73-
loop = asyncio.get_event_loop()
74-
tasks = loop.run_until_complete(
75-
create_async_get_request_session_and_run(urls, tmpdirname)
76-
)
77-
for task in tasks:
78-
if task.exception():
79-
# raise exception here to notify calling code that something
80-
# did not work
81-
raise AIOError(f"{task.exception()}")
82-
elif task.result().http_status != 200:
83-
# handle non-200 HTTP response status codes + file write fails
84-
raise AIOError(
85-
f"failed to pull '{task.result().url}' with HTTP status code {task.result().http_status}"
86-
)
87-
88-
# instantiate left and right fontTools.ttLib.TTFont objects
89-
tt_left = TTFont(prepath)
90-
tt_right = TTFont(postpath)
91-
92-
# Validation: include_tables request should be for tables that are in one of
93-
# the two fonts. This otherwise silently passes with exit status code 0 which
94-
# could lead to the interpretation of no diff between two files when the table
95-
# entry is incorrectly defined or is a typo. Let's be conservative and consider
96-
# this an error, force user to use explicit definitions that include tables in
97-
# one of the two files, and understand that the diff request was for one or more
98-
# tables that are not present.
99-
if include_tables is not None:
100-
for table in include_tables:
101-
if table not in tt_left and table not in tt_right:
102-
raise KeyError(
103-
f"'{table}' table was not identified for inclusion in either font"
104-
)
105-
106-
# Validation: exclude_tables request should be for tables that are in one of
107-
# the two fonts. Mis-specified OT table definitions could otherwise result
108-
# in the presence of a table in the diff when the request was to exclude it.
109-
# For example, when an "OS/2" table request is entered as "OS2".
110-
if exclude_tables is not None:
111-
for table in exclude_tables:
112-
if table not in tt_left and table not in tt_right:
113-
raise KeyError(
114-
f"'{table}' table was not identified for exclusion in either font"
115-
)
116-
117-
fromdate = get_file_modtime(prepath)
118-
todate = get_file_modtime(postpath)
119-
120-
left_ttxpath = os.path.join(tmpdirname, "left.ttx")
121-
right_ttxpath = os.path.join(tmpdirname, "right.ttx")
122-
123-
if use_multiprocess and cpu_count() > 1:
124-
# Use parallel fontTools.ttLib.TTFont.saveXML dump
125-
# by default on multi CPU systems. This is a performance
126-
# optimization. Profiling demonstrates that this can reduce
127-
# execution time by up to 30% for some fonts
128-
mp_args_list = [
129-
(tt_left, left_ttxpath, include_tables, exclude_tables),
130-
(tt_right, right_ttxpath, include_tables, exclude_tables),
131-
]
132-
with Pool(processes=2) as pool:
133-
pool.starmap(_ttfont_save_xml, mp_args_list)
134-
else:
135-
# use sequential fontTools.ttLib.TTFont.saveXML dumps
136-
# when use_multiprocess is False or single CPU system
137-
# detected
138-
_ttfont_save_xml(tt_left, left_ttxpath, include_tables, exclude_tables)
139-
_ttfont_save_xml(tt_right, right_ttxpath, include_tables, exclude_tables)
190+
# or HTTP GET requests of remote files based on the command line request
191+
left_ttxpath, right_ttxpath, pre_pathname, prepath, post_pathname, postpath = _get_fonts_and_save_xml(
192+
filepath_a,
193+
filepath_b,
194+
tmpdirpath,
195+
include_tables,
196+
exclude_tables,
197+
use_multiprocess,
198+
)
140199

141200
with open(left_ttxpath) as ff:
142201
fromlines = ff.readlines()
143202
with open(right_ttxpath) as tf:
144203
tolines = tf.readlines()
145204

205+
fromdate = get_file_modtime(prepath)
206+
todate = get_file_modtime(postpath)
207+
146208
return unified_diff(
147209
fromlines,
148210
tolines,
@@ -152,3 +214,46 @@ def u_diff(
152214
todate,
153215
n=context_lines,
154216
)
217+
218+
219+
def external_diff(
220+
command,
221+
filepath_a,
222+
filepath_b,
223+
include_tables=None,
224+
exclude_tables=None,
225+
use_multiprocess=True,
226+
):
227+
# TODO: Add docstring documentation
228+
with tempfile.TemporaryDirectory() as tmpdirpath:
229+
# define the file paths with either local file requests
230+
# or HTTP GET requests of remote files based on the command line request
231+
left_ttxpath, right_ttxpath, pre_pathname, prepath, post_pathname, postpath = _get_fonts_and_save_xml(
232+
filepath_a,
233+
filepath_b,
234+
tmpdirpath,
235+
include_tables,
236+
exclude_tables,
237+
use_multiprocess,
238+
)
239+
240+
full_command = f"{command.strip()} {left_ttxpath} {right_ttxpath}"
241+
242+
process = subprocess.Popen(
243+
shlex.split(full_command),
244+
stdout=subprocess.PIPE,
245+
stderr=subprocess.PIPE,
246+
encoding="utf8",
247+
)
248+
249+
while True:
250+
output = process.stdout.readline()
251+
exit_status = process.poll()
252+
if len(output) == 0 and exit_status is not None:
253+
err = process.stderr.read()
254+
if err:
255+
raise AIOError(err)
256+
yield output, exit_status
257+
break
258+
else:
259+
yield output, None

0 commit comments

Comments
 (0)