Skip to content

Commit 0cc63f9

Browse files
authored
[Launch] allow the creation of a bundle from the nucleus client (#301)
* first * update python * bump * fix circleci * reformat * lint * infer name * bundle_name instead * fix args * fix query * fix query * poetry fixes * circle ci remove * add comments * use master * fix mypy * sort * import in fn instead of main
1 parent 05a695e commit 0cc63f9

File tree

3 files changed

+585
-633
lines changed

3 files changed

+585
-633
lines changed

nucleus/__init__.py

Lines changed: 191 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
import os
4242
import warnings
43-
from typing import Dict, List, Optional, Sequence, Union
43+
from typing import Any, Dict, List, Optional, Sequence, Union
4444

4545
import pkg_resources
4646
import pydantic
@@ -518,6 +518,196 @@ def create_model(
518518
client=self,
519519
)
520520

521+
def create_launch_model(
522+
self,
523+
name: str,
524+
reference_id: str,
525+
bundle_args: Dict[str, Any],
526+
metadata: Optional[Dict] = None,
527+
) -> Model:
528+
"""
529+
Adds a :class:`Model` to Nucleus, as well as a Launch bundle from a given function.
530+
531+
Parameters:
532+
name: A human-readable name for the model.
533+
reference_id: Unique, user-controlled ID for the model. This can be
534+
used, for example, to link to an external storage of models which
535+
may have its own id scheme.
536+
bundle_args: Dict for kwargs for the creation of a Launch bundle,
537+
more details on the keys below.
538+
metadata: An arbitrary dictionary of additional data about this model
539+
that can be stored and retrieved. For example, you can store information
540+
about the hyperparameters used in training this model.
541+
542+
Returns:
543+
:class:`Model`: The newly created model as an object.
544+
545+
Details on `bundle_args`:
546+
Grabs a s3 signed url and uploads a model bundle to Scale Launch.
547+
548+
A model bundle consists of exactly {predict_fn_or_cls}, {load_predict_fn + model}, or {load_predict_fn + load_model_fn}.
549+
Pre/post-processing code can be included inside load_predict_fn/model or in predict_fn_or_cls call.
550+
551+
Parameters:
552+
model_bundle_name: Name of model bundle you want to create. This acts as a unique identifier.
553+
predict_fn_or_cls: Function or a Callable class that runs end-to-end (pre/post processing and model inference) on the call.
554+
I.e. `predict_fn_or_cls(REQUEST) -> RESPONSE`.
555+
model: Typically a trained Neural Network, e.g. a Pytorch module
556+
load_predict_fn: Function that when called with model, returns a function that carries out inference
557+
I.e. `load_predict_fn(model) -> func; func(REQUEST) -> RESPONSE`
558+
load_model_fn: Function that when run, loads a model, e.g. a Pytorch module
559+
I.e. `load_predict_fn(load_model_fn()) -> func; func(REQUEST) -> RESPONSE`
560+
bundle_url: Only for self-hosted mode. Desired location of bundle.
561+
Overrides any value given by self.bundle_location_fn
562+
requirements: A list of python package requirements, e.g.
563+
["tensorflow==2.3.0", "tensorflow-hub==0.11.0"]. If no list has been passed, will default to the currently
564+
imported list of packages.
565+
app_config: Either a Dictionary that represents a YAML file contents or a local path to a YAML file.
566+
env_params: A dictionary that dictates environment information e.g.
567+
the use of pytorch or tensorflow, which cuda/cudnn versions to use.
568+
Specifically, the dictionary should contain the following keys:
569+
"framework_type": either "tensorflow" or "pytorch".
570+
"pytorch_version": Version of pytorch, e.g. "1.5.1", "1.7.0", etc. Only applicable if framework_type is pytorch
571+
"cuda_version": Version of cuda used, e.g. "11.0".
572+
"cudnn_version" Version of cudnn used, e.g. "cudnn8-devel".
573+
"tensorflow_version": Version of tensorflow, e.g. "2.3.0". Only applicable if framework_type is tensorflow
574+
globals_copy: Dictionary of the global symbol table. Normally provided by `globals()` built-in function.
575+
"""
576+
from launch import LaunchClient
577+
578+
launch_client = LaunchClient(api_key=self.api_key)
579+
580+
model_exists = any(model.name == name for model in self.list_models())
581+
bundle_exists = any(
582+
bundle.name == name + "-nucleus-autogen"
583+
for bundle in launch_client.list_model_bundles()
584+
)
585+
586+
if bundle_exists or model_exists:
587+
raise ModelCreationError(
588+
"Bundle with the given name already exists, please try a different name"
589+
)
590+
591+
kwargs = {
592+
"model_bundle_name": name + "-nucleus-autogen",
593+
**bundle_args,
594+
}
595+
596+
bundle = launch_client.create_model_bundle(**kwargs)
597+
return self.create_model(
598+
name,
599+
reference_id,
600+
metadata,
601+
bundle.name,
602+
)
603+
604+
def create_launch_model_from_dir(
605+
self,
606+
name: str,
607+
reference_id: str,
608+
bundle_from_dir_args: Dict[str, Any],
609+
metadata: Optional[Dict] = None,
610+
) -> Model:
611+
"""
612+
Adds a :class:`Model` to Nucleus, as well as a Launch bundle from a directory.
613+
614+
Parameters:
615+
name: A human-readable name for the model.
616+
reference_id: Unique, user-controlled ID for the model. This can be
617+
used, for example, to link to an external storage of models which
618+
may have its own id scheme.
619+
bundle_from_dir_args: Dict for kwargs for the creation of a bundle from directory,
620+
more details on the keys below.
621+
metadata: An arbitrary dictionary of additional data about this model
622+
that can be stored and retrieved. For example, you can store information
623+
about the hyperparameters used in training this model.
624+
625+
Returns:
626+
:class:`Model`: The newly created model as an object.
627+
628+
Details on `bundle_from_dir_args`
629+
Packages up code from one or more local filesystem folders and uploads them as a bundle to Scale Launch.
630+
In this mode, a bundle is just local code instead of a serialized object.
631+
632+
For example, if you have a directory structure like so, and your current working directory is also `my_root`:
633+
634+
```
635+
my_root/
636+
my_module1/
637+
__init__.py
638+
...files and directories
639+
my_inference_file.py
640+
my_module2/
641+
__init__.py
642+
...files and directories
643+
```
644+
645+
then calling `create_model_bundle_from_dirs` with `base_paths=["my_module1", "my_module2"]` essentially
646+
creates a zip file without the root directory, e.g.:
647+
648+
```
649+
my_module1/
650+
__init__.py
651+
...files and directories
652+
my_inference_file.py
653+
my_module2/
654+
__init__.py
655+
...files and directories
656+
```
657+
658+
and these contents will be unzipped relative to the server side `PYTHONPATH`. Bear these points in mind when
659+
referencing Python module paths for this bundle. For instance, if `my_inference_file.py` has `def f(...)`
660+
as the desired inference loading function, then the `load_predict_fn_module_path` argument should be
661+
`my_module1.my_inference_file.f`.
662+
663+
664+
Keys for `bundle_from_dir_args`:
665+
model_bundle_name: Name of model bundle you want to create. This acts as a unique identifier.
666+
base_paths: The paths on the local filesystem where the bundle code lives.
667+
requirements_path: A path on the local filesystem where a requirements.txt file lives.
668+
env_params: A dictionary that dictates environment information e.g.
669+
the use of pytorch or tensorflow, which cuda/cudnn versions to use.
670+
Specifically, the dictionary should contain the following keys:
671+
"framework_type": either "tensorflow" or "pytorch".
672+
"pytorch_version": Version of pytorch, e.g. "1.5.1", "1.7.0", etc. Only applicable if framework_type is pytorch
673+
"cuda_version": Version of cuda used, e.g. "11.0".
674+
"cudnn_version" Version of cudnn used, e.g. "cudnn8-devel".
675+
"tensorflow_version": Version of tensorflow, e.g. "2.3.0". Only applicable if framework_type is tensorflow
676+
load_predict_fn_module_path: A python module path for a function that, when called with the output of
677+
load_model_fn_module_path, returns a function that carries out inference.
678+
load_model_fn_module_path: A python module path for a function that returns a model. The output feeds into
679+
the function located at load_predict_fn_module_path.
680+
app_config: Either a Dictionary that represents a YAML file contents or a local path to a YAML file.
681+
"""
682+
from launch import LaunchClient
683+
684+
launch_client = LaunchClient(api_key=self.api_key)
685+
686+
model_exists = any(model.name == name for model in self.list_models())
687+
bundle_exists = any(
688+
bundle.name == name + "-nucleus-autogen"
689+
for bundle in launch_client.list_model_bundles()
690+
)
691+
692+
if bundle_exists or model_exists:
693+
raise ModelCreationError(
694+
"Bundle with the given name already exists, please try a different name"
695+
)
696+
697+
kwargs = {
698+
"model_bundle_name": name + "-nucleus-autogen",
699+
**bundle_from_dir_args,
700+
}
701+
702+
bundle = launch_client.create_model_bundle_from_dir(**kwargs)
703+
704+
return self.create_model(
705+
name,
706+
reference_id,
707+
metadata,
708+
bundle.name,
709+
)
710+
521711
@deprecated(
522712
"Model runs have been deprecated and will be removed. Use a Model instead"
523713
)

0 commit comments

Comments
 (0)