Skip to content

Commit 88820df

Browse files
committed
New packaging support
1 parent 28b22ae commit 88820df

24 files changed

+1528
-841
lines changed

metaflow/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .exception import CommandException, MetaflowException
1616
from .flowspec import _FlowState
1717
from .graph import FlowGraph
18+
from .meta_files import read_included_dist_info
1819
from .metaflow_config import (
1920
DEFAULT_DATASTORE,
2021
DEFAULT_DECOSPECS,
@@ -27,6 +28,7 @@
2728
from .metaflow_current import current
2829
from metaflow.system import _system_monitor, _system_logger
2930
from .metaflow_environment import MetaflowEnvironment
31+
from .package.mfenv import PackagedDistributionFinder
3032
from .plugins import (
3133
DATASTORES,
3234
ENVIRONMENTS,
@@ -326,6 +328,11 @@ def start(
326328
echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
327329
echo(" for *%s*" % resolve_identity(), fg="magenta")
328330

331+
# Check if we need to setup the distribution finder (if running )
332+
dist_info = read_included_dist_info()
333+
if dist_info:
334+
sys.meta_path.append(PackagedDistributionFinder(dist_info))
335+
329336
# Setup the context
330337
cli_args._set_top_kwargs(ctx.params)
331338
ctx.obj.echo = echo

metaflow/client/core.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
from metaflow.includefile import IncludedFile
3333
from metaflow.metaflow_config import DEFAULT_METADATA, MAX_ATTEMPTS
3434
from metaflow.metaflow_environment import MetaflowEnvironment
35+
from metaflow.meta_files import MFCONF_DIR, MFENV_DIR
36+
from metaflow.package.mfenv import MFEnv
3537
from metaflow.plugins import ENVIRONMENTS, METADATA_PROVIDERS
38+
from metaflow.meta_files import MetaFile
3639
from metaflow.unbounded_foreach import CONTROL_TASK_TAG
3740
from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode
3841

39-
from ..info_file import INFO_FILE
4042
from .filecache import FileCache
4143

4244
if TYPE_CHECKING:
@@ -824,9 +826,8 @@ def __init__(self, flow_name: str, code_package: str):
824826
)
825827
code_obj = BytesIO(blobdata)
826828
self._tar = tarfile.open(fileobj=code_obj, mode="r:gz")
827-
# The JSON module in Python3 deals with Unicode. Tar gives bytes.
828-
info_str = (
829-
self._tar.extractfile(os.path.basename(INFO_FILE)).read().decode("utf-8")
829+
info_str = MFEnv.get_archive_content(self._tar, MetaFile.INFO_FILE).decode(
830+
encoding="utf-8"
830831
)
831832
self._info = json.loads(info_str)
832833
self._flowspec = self._tar.extractfile(self._info["script"]).read()
@@ -917,6 +918,9 @@ def extract(self) -> TemporaryDirectory:
917918
# This file is created when using the conda/pypi features available in
918919
# nflx-metaflow-extensions: https://github.com/Netflix/metaflow-nflx-extensions
919920
"condav2-1.cnd",
921+
# Going forward, we only need to exclude MFENV_DIR and MFCONF_DIR
922+
MFENV_DIR,
923+
MFCONF_DIR,
920924
]
921925
members = [
922926
m

metaflow/cmd/develop/stubs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
if _py_ver >= (3, 8):
1717
from importlib import metadata
1818
elif _py_ver >= (3, 7):
19-
from metaflow._vendor.v3_7 import importlib_metadata as metadata
19+
from metaflow._vendor import importlib_metadata as metadata
2020
else:
2121
from metaflow._vendor.v3_6 import importlib_metadata as metadata
2222

metaflow/decorators.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,6 @@
2222

2323
from metaflow._vendor import click
2424

25-
try:
26-
unicode
27-
except NameError:
28-
unicode = str
29-
basestring = str
3025

3126
# Contains the decorators on which _init was called. We want to ensure it is called
3227
# only once on each decorator and, as the _init() function below can be called in
@@ -190,7 +185,7 @@ def make_decorator_spec(self):
190185
# escaping but for more complex types (typically dictionaries or lists),
191186
# we dump using JSON.
192187
for k, v in attrs.items():
193-
if isinstance(v, (int, float, unicode, basestring)):
188+
if isinstance(v, (int, float, str)):
194189
attr_list.append("%s=%s" % (k, str(v)))
195190
else:
196191
attr_list.append("%s=%s" % (k, json.dumps(v).replace('"', '\\"')))
@@ -316,15 +311,26 @@ def package_init(self, flow, step_name, environment):
316311

317312
def add_to_package(self):
318313
"""
319-
Called to add custom packages needed for a decorator. This hook will be
314+
Called to add custom files needed for a decorator. This hook will be
320315
called in the `MetaflowPackage` class where metaflow compiles the code package
321-
tarball. This hook is invoked in the `MetaflowPackage`'s `path_tuples`
322-
function. The `path_tuples` function is a generator that yields a tuple of
323-
`(file_path, arcname)`.`file_path` is the path of the file in the local file system;
324-
the `arcname` is the path of the file in the constructed tarball or the path of the file
325-
after decompressing the tarball.
326-
327-
Returns a list of tuples where each tuple represents (file_path, arcname)
316+
tarball. This hook can return one of two things:
317+
- a generator yielding a tuple of `(file_path, arcname)` to add files to
318+
the code package. `file_path` is the path to the file on the local filesystem
319+
and `arcname` is the path relative to the packaged code.
320+
- a generator yielding a tuple of `(content, arcname, type)` where:
321+
- type is a AddToPackageType
322+
- for CODE_FILE:
323+
- content: path to the file to include
324+
- arcname: path relative to the code directory in the package
325+
- for CODE_MODULE:
326+
- content: name of the module
327+
- arcame: None (ignored)
328+
- for CONFIG_FILE:
329+
- content: path to the file to include
330+
- arcname: path relative to the config directory in the package
331+
- for CONFIG_CONTENT:
332+
- content: bytes to include
333+
- arcname: path relative to the config directory in the package
328334
"""
329335
return []
330336

@@ -686,12 +692,8 @@ def foo(self):
686692
f.is_step = True
687693
f.decorators = []
688694
f.config_decorators = []
689-
try:
690-
# python 3
691-
f.name = f.__name__
692-
except:
693-
# python 2
694-
f.name = f.__func__.func_name
695+
f.wrappers = []
696+
f.name = f.__name__
695697
return f
696698

697699

metaflow/extension_support/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from itertools import chain
1313
from pathlib import Path
1414

15-
from metaflow.info_file import read_info_file
15+
from metaflow.meta_files import read_info_file
1616

1717

1818
#
@@ -313,7 +313,7 @@ def multiload_all(modules, extension_point, dst_globals):
313313
if _py_ver >= (3, 8):
314314
from importlib import metadata
315315
elif _py_ver >= (3, 7):
316-
from metaflow._vendor.v3_7 import importlib_metadata as metadata
316+
from metaflow._vendor import importlib_metadata as metadata
317317
else:
318318
from metaflow._vendor.v3_6 import importlib_metadata as metadata
319319

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
# This file serves as a __init__.py for metaflow_extensions when it is packaged
2-
# and needs to remain empty.
1+
# This file serves as a __init__.py for metaflow_extensions or metaflow
2+
# packages when they are packaged and needs to remain empty.

metaflow/info_file.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

metaflow/meta_files.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import json
2+
import os
3+
4+
from enum import Enum
5+
from typing import Any, Dict, Optional, Union
6+
7+
from .util import get_metaflow_root
8+
9+
_info_file_content = None
10+
_info_file_present = None
11+
_included_dist_info = None
12+
_included_dist_present = None
13+
14+
# Ideally these would be in package/mfenv.py but that would cause imports to fail so
15+
# moving here. The reason is that this is needed to read extension information which needs
16+
# to happen before mfenv gets packaged.
17+
18+
MFENV_DIR = (
19+
".mfenv" # Directory containing "system" code (metaflow and user dependencies)
20+
)
21+
MFCONF_DIR = ".mfconf" # Directory containing Metaflow's configuration files
22+
MFENV_MARKER = (
23+
".mfenv_install" # Special file containing metadata about how Metaflow is packaged
24+
)
25+
26+
27+
class MetaFile(Enum):
28+
INFO_FILE = "INFO"
29+
CONFIG_FILE = "CONFIG_PARAMETERS"
30+
INCLUDED_DIST_INFO = "INCLUDED_DIST_INFO"
31+
32+
33+
def meta_file_name(name: Union[MetaFile, str]) -> str:
34+
if isinstance(name, MetaFile):
35+
return name.value
36+
return name
37+
38+
39+
def generic_get_filename(
40+
name: Union[MetaFile, str], is_meta: Optional[bool] = None
41+
) -> Optional[str]:
42+
# We are not in a MFEnv package (so this is an old style package). Everything
43+
# is at metaflow root. There is no distinction between code and config.
44+
real_name = meta_file_name(name)
45+
46+
path_to_file = os.path.join(get_metaflow_root(), real_name)
47+
if os.path.isfile(path_to_file):
48+
return path_to_file
49+
return None
50+
51+
52+
def v1_get_filename(
53+
name: Union[MetaFile, str],
54+
meta_info: Dict[str, Any],
55+
is_meta: Optional[bool] = None,
56+
) -> Optional[str]:
57+
if is_meta is None:
58+
is_meta = isinstance(name, MetaFile)
59+
if is_meta:
60+
conf_dir = meta_info.get("conf_dir")
61+
if conf_dir is None:
62+
raise ValueError(
63+
"Invalid package -- package info does not contain conf_dir key"
64+
)
65+
return os.path.join(conf_dir, meta_file_name(name))
66+
# Not meta -- so code
67+
code_dir = meta_info.get("code_dir")
68+
if code_dir is None:
69+
raise ValueError(
70+
"Invalid package -- package info does not contain code_dir key"
71+
)
72+
return os.path.join(code_dir, meta_file_name(name))
73+
74+
75+
get_filname_map = {1: v1_get_filename}
76+
77+
78+
def get_filename(
79+
name: Union[MetaFile, str], is_meta: Optional[bool] = None
80+
) -> Optional[str]:
81+
if os.path.exists(os.path.join(get_metaflow_root(), MFENV_MARKER)):
82+
with open(
83+
os.path.join(get_metaflow_root(), MFENV_MARKER), "r", encoding="utf-8"
84+
) as f:
85+
meta_info = json.load(f)
86+
version = meta_info.get("version")
87+
if version not in get_filname_map:
88+
raise ValueError(
89+
"Unsupported packaging version '%s'. Please update Metaflow" % version
90+
)
91+
return get_filname_map[version](name, meta_info, is_meta)
92+
return generic_get_filename(name, is_meta)
93+
94+
95+
def read_info_file():
96+
# The info file is a bit special because it needs to be read to determine what
97+
# extensions to load. We need to therefore not load anything yet. This explains
98+
# the slightly wheird code structure where there is a bit of the file loading logic
99+
# here that is then used in MFEnv (it logically belongs in MFEnv but that file can't
100+
# be loaded just yet).
101+
global _info_file_content
102+
global _info_file_present
103+
104+
if _info_file_present is None:
105+
info_filename = get_filename(MetaFile.INFO_FILE)
106+
if info_filename is not None:
107+
with open(info_filename, "r", encoding="utf-8") as f:
108+
_info_file_content = json.load(f)
109+
_info_file_present = True
110+
else:
111+
_info_file_present = False
112+
if _info_file_present:
113+
return _info_file_content
114+
return None
115+
116+
117+
def read_included_dist_info():
118+
global _included_dist_info
119+
global _included_dist_present
120+
121+
from metaflow.package.mfenv import MFEnv
122+
123+
if _included_dist_present is None:
124+
c = MFEnv.get_content(MetaFile.INCLUDED_DIST_INFO)
125+
if c is not None:
126+
_included_dist_info = json.loads(c.decode("utf-8"))
127+
_included_dist_present = True
128+
else:
129+
_included_dist_present = False
130+
if _included_dist_present:
131+
return _included_dist_info
132+
return None

metaflow/metaflow_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@
460460
"stubgen",
461461
"userconf",
462462
"conda",
463+
"package",
463464
]
464465

465466
for typ in DEBUG_OPTIONS:

metaflow/metaflow_environment.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from metaflow.exception import MetaflowException
99
from metaflow.extension_support import dump_module_info
1010
from metaflow.mflog import BASH_MFLOG, BASH_FLUSH_LOGS
11+
12+
from .meta_files import MFENV_DIR
1113
from . import R
1214

1315

@@ -49,8 +51,26 @@ def bootstrap_commands(self, step_name, datastore_type):
4951

5052
def add_to_package(self):
5153
"""
52-
A list of tuples (file, arcname) to add to the job package.
53-
`arcname` is an alternative name for the file in the job package.
54+
Called to add custom files needed for this environment. This hook will be
55+
called in the `MetaflowPackage` class where metaflow compiles the code package
56+
tarball. This hook can return one of two things:
57+
- a generator yielding a tuple of `(file_path, arcname)` to add files to
58+
the code package. `file_path` is the path to the file on the local filesystem
59+
and `arcname` is the path relative to the packaged code.
60+
- a generator yielding a tuple of `(content, arcname, type)` where:
61+
- type is a AddToPackageType
62+
- for CODE_FILE:
63+
- content: path to the file to include
64+
- arcname: path relative to the code directory in the package
65+
- for CODE_MODULE:
66+
- content: name of the module
67+
- arcame: None (ignored)
68+
- for CONFIG_FILE:
69+
- content: path to the file to include
70+
- arcname: path relative to the config directory in the package
71+
- for CONFIG_CONTENT:
72+
- content: bytes to include
73+
- arcname: path relative to the config directory in the package
5474
"""
5575
return []
5676

@@ -177,6 +197,7 @@ def get_package_commands(self, code_package_url, datastore_type):
177197
"after 6 tries. Exiting...' && exit 1; "
178198
"fi" % code_package_url,
179199
"TAR_OPTIONS='--warning=no-timestamp' tar xf job.tar",
200+
"export PYTHONPATH=`pwd`/%s:$PYTHONPATH" % MFENV_DIR,
180201
"mflog 'Task is starting.'",
181202
"flush_mflogs",
182203
]

metaflow/metaflow_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from os import path, name, environ, listdir
1212

1313
from metaflow.extension_support import update_package_info
14-
from metaflow.info_file import CURRENT_DIRECTORY, read_info_file
14+
from metaflow.meta_files import read_info_file
1515

1616

1717
# True/False correspond to the value `public`` in get_version

0 commit comments

Comments
 (0)