Skip to content

Commit ad3501f

Browse files
authored
Merge pull request #93 from scailfin/feature/yaml-include
Feature/yaml include
2 parents f4aa528 + da5de97 commit ad3501f

File tree

15 files changed

+151
-23
lines changed

15 files changed

+151
-23
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,9 @@
142142
* Support different storage volumes for different types of workers.
143143
* Add support for workflow steps that are implemented as Jupyter Notebooks (\#79).
144144
* Include flowapp to run workflow templates using streamlit GUI.
145+
146+
147+
### 0.9.1 - 2021-06-22
148+
149+
* Add option to resolve references to external files when reading Yaml files.
150+
* Support inclusion and import of files in instructions markdown text.

flowserv/client/cli/workflow.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from flowserv.client.api import service
1616
from flowserv.client.cli.table import ResultTable
1717
from flowserv.model.parameter.base import PARA_INT, PARA_STRING
18+
from flowserv.model.workflow.manifest import read_instructions
1819

1920
import flowserv.view.files as flbls
2021
import flowserv.view.run as rlbls
@@ -319,21 +320,3 @@ def cli_workflow_download():
319320
cli_benchmark.add_command(list_workflows, name='list')
320321
cli_benchmark.add_command(show_ranking, name='ranking')
321322
cli_benchmark.add_command(get_workflow, name='show')
322-
323-
324-
# -- Helper Methods -----------------------------------------------------------
325-
326-
def read_instructions(filename):
327-
"""Read instruction text from a given file. If the filename is None the
328-
result will be None as well.
329-
330-
Returns
331-
-------
332-
string
333-
"""
334-
# Read instructions from file if given
335-
instruction_text = None
336-
if filename is not None:
337-
with open(filename, 'r') as f:
338-
instruction_text = f.read().strip()
339-
return instruction_text

flowserv/model/workflow/manifest.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"""Helper functions to read workflow manifest files."""
1010

1111
import os
12+
import re
1213

1314
from typing import List, Tuple
1415

@@ -36,6 +37,9 @@
3637
# Flowserv manifest file names..
3738
MANIFEST_FILES = ['flowserv{}'.format(suffix) for suffix in DEFAULT_SUFFIXES]
3839

40+
"""Regular expression for file includes in markdown."""
41+
REGEX_INCLUDE = r'\{\{(.*?)\}\}'
42+
3943

4044
class WorkflowManifest(object):
4145
"""The workflow manifest contains the workflow specification, the workflow
@@ -190,8 +194,7 @@ def load(
190194
manifest_value=doc.get('instructions'),
191195
user_argument=instructions
192196
)
193-
with open(filename, 'r') as f:
194-
doc['instructions'] = f.read().strip()
197+
doc['instructions'] = read_instructions(filename)
195198
# Get the workflow specification file.
196199
filename = getfile(
197200
basedir=basedir,
@@ -256,6 +259,46 @@ def getfile(basedir, manifest_value, user_argument):
256259
return os.path.join(basedir, manifest_value)
257260

258261

262+
def read_instructions(filename: str) -> str:
263+
"""Read instruction text from a given file. If the filename is None the
264+
result will be None as well.
265+
266+
Returns
267+
-------
268+
string
269+
"""
270+
# Read instructions from file if given.
271+
instruction_text = None
272+
if filename is not None:
273+
with open(filename, 'r') as f:
274+
instruction_text = f.read().strip()
275+
parent = os.path.dirname(os.path.abspath(filename))
276+
277+
# Replace function for file imports.
278+
def replace_include(match):
279+
"""Function to replace references to files in markdown text.
280+
281+
All file names should be relative to the path of the main document that
282+
imports the file.
283+
284+
Parameters
285+
----------
286+
match: re.MatchObject
287+
Regular expression match object.
288+
289+
Returns
290+
-------
291+
string
292+
"""
293+
ref = match.group()
294+
# Strip expression of import reference syntax.
295+
expr = ref[2:-2]
296+
# Read file and return content.
297+
return read_instructions(os.path.join(parent, expr))
298+
299+
return re.sub(REGEX_INCLUDE, replace_include, instruction_text)
300+
301+
259302
def unique_name(name, existing_names):
260303
"""Ensure that the workflow name in the project metadata is not empty, not
261304
longer than 512 character, and unique.

flowserv/util/files.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import shutil
1919
import yaml
2020

21+
from yamlinclude import YamlIncludeConstructor
2122
from typing import Dict, IO, List, Optional, Union
2223

2324

@@ -151,6 +152,10 @@ def read_object(filename: str, format: Optional[str] = None) -> Dict:
151152
else:
152153
format = FORMAT_YAML
153154
if format.upper() == FORMAT_YAML:
155+
YamlIncludeConstructor.add_to_loader_class(
156+
loader_class=yaml.FullLoader,
157+
base_dir=os.path.dirname(os.path.abspath(filename))
158+
)
154159
with open(filename, 'r') as f:
155160
return yaml.load(f.read(), Loader=yaml.FullLoader)
156161
elif format.upper() == FORMAT_JSON:

flowserv/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
# terms of the MIT License; see LICENSE file for more details.
88

99
"""Information about the current version of the flowServ package."""
10-
__version__ = '0.9.0'
10+
__version__ = '0.9.1'

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ requests
1010
SQLAlchemy>=1.3.18
1111
papermill
1212
Click
13+
pyyaml-include
1314
boto3
1415
google-cloud-storage
1516
docker

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
'pyyaml>=5.1',
2626
'requests',
2727
'SQLAlchemy>=1.3.18',
28-
'Click'
28+
'Click',
29+
'pyyaml-include'
2930
]
3031
aws_requires = ['boto3']
3132
docker_requires = ['docker']
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Include me
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Include me, too
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# These are the main instructions.
2+
3+
We also include:
4+
5+
* {{include1.md}}, and
6+
* {{include2.md}}.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
name: names
2+
label: 'Input file'
3+
dtype: file
4+
target: data/names.txt
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
name: sleeptime
2+
label: 'Sleep time (s)'
3+
dtype: int
4+
defaultValue: 10
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
workflow:
2+
version: 0.3.0
3+
inputs:
4+
files:
5+
- $[[code]]
6+
- code/analyze.py
7+
- data/sequences.txt
8+
parameters:
9+
codefile: $[[code]]
10+
inputfile: data/sequences.txt
11+
outputfile: results/predictions.txt
12+
workflow:
13+
type: serial
14+
specification:
15+
steps:
16+
- environment: 'python:3.7'
17+
commands:
18+
- ${python} "${codefile}"
19+
--inputfile "${inputfile}"
20+
--outputfile "${outputfile}"
21+
- ${python} code/analyze.py
22+
--inputfile "${outputfile}"
23+
--outputfile results/eval.json
24+
outputs:
25+
files:
26+
- results/predictions.txt
27+
- results/eval.json
28+
parameters:
29+
- !include parameters/para1.yaml
30+
- !include parameters/para2.yaml

tests/controller/serial/engine/test_postproc_workflow.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def test_postproc_workflow(tmpdir):
6969
user_id = create_user(api)
7070
# Create four groups and run the workflow with a slightly different input
7171
# file.
72+
prev_postproc = None
7273
for i in range(4):
7374
with service(user_id=user_id) as api:
7475
group_id = create_group(api, workflow_id)
@@ -108,13 +109,14 @@ def test_postproc_workflow(tmpdir):
108109
assert 'postproc' in wh
109110
serialize.validate_workflow_handle(wh)
110111
attmpts = 0
111-
while wh['postproc']['state'] in st.ACTIVE_STATES:
112+
while wh['postproc']['state'] in st.ACTIVE_STATES or wh['postproc']['id'] == prev_postproc:
112113
time.sleep(1)
113114
with service() as api:
114115
wh = api.workflows().get_workflow(workflow_id=workflow_id)
115116
attmpts += 1
116117
if attmpts > 60:
117118
raise RuntimeError('max. attempts reached')
119+
prev_postproc = wh['postproc']['id']
118120
serialize.validate_workflow_handle(wh)
119121
with service() as api:
120122
ranking = api.workflows().get_ranking(workflow_id=workflow_id)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# This file is part of the Reproducible and Reusable Data Analysis Workflow
2+
# Server (flowServ).
3+
#
4+
# Copyright (C) 2019-2021 NYU.
5+
#
6+
# flowServ is free software; you can redistribute it and/or modify it under the
7+
# terms of the MIT License; see LICENSE file for more details.
8+
9+
"""Unit tests for reading a workflow templates that contains references to
10+
additional files using !include (pyyaml-include) and {{}} (in instructions
11+
markdown).
12+
"""
13+
14+
import os
15+
16+
from flowserv.model.workflow.manifest import read_instructions
17+
18+
import flowserv.util as util
19+
20+
21+
DIR = os.path.dirname(os.path.realpath(__file__))
22+
BENCHMARK_DIR = os.path.join(DIR, '../../.files/benchmark/include-test')
23+
INSTRUCTIONS_FILE = os.path.join(BENCHMARK_DIR, 'instructions.md')
24+
TEMPLATE_FILE = os.path.join(BENCHMARK_DIR, 'template.yaml')
25+
26+
27+
def test_read_instructions_with_include():
28+
"""Test reading a template that includes other files."""
29+
text = read_instructions(filename=INSTRUCTIONS_FILE)
30+
assert '# These are the main instructions.' in text
31+
assert '* Include me, and' in text
32+
assert '* Include me, too.' in text
33+
34+
35+
def test_read_template_with_include():
36+
"""Test reading a template that includes other files."""
37+
doc = util.read_object(filename=TEMPLATE_FILE)
38+
assert doc['parameters'] == [
39+
{'name': 'names', 'label': 'Input file', 'dtype': 'file', 'target': 'data/names.txt'},
40+
{'name': 'sleeptime', 'label': 'Sleep time (s)', 'dtype': 'int', 'defaultValue': 10}
41+
]

0 commit comments

Comments
 (0)