Skip to content

Commit d837108

Browse files
committed
Added an XGBoost example, from training on a notebook to deploying on Algorithmia
1 parent 2e6f0fd commit d837108

File tree

9 files changed

+21422
-0
lines changed

9 files changed

+21422
-0
lines changed
6 KB
Binary file not shown.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Developing an XGBoost model on a Jupyter notebook and deploying to Algorithmia
2+
3+
This Jupyter notebook to Algorithmia example is to demonstrate how you can programmatically create an algorithm on [Algorithmia](https://algorithmia.com), train a model and deploy it for serving, all from within your Jupyter notebook.
4+
5+
Step by step, we will:
6+
7+
- Create an algorithm on Algorithmia
8+
- Clone the algorithm's repository on our local machine, so that we develop it locally
9+
- Create the basic algorithm script and the dependencies file. We will code our script in advance, assuming that our model will be sitting on a remote path on - Algorithmia and our script will load the model from there. We will then make these assumptions true!
10+
- Commit and push these files to Algorithmia and get our Algorithm's container built
11+
- Load our training data
12+
- Preprocess the data
13+
- Setup an XGBoost model and do a mini hyperparameter search
14+
- Fit the data on our model
15+
- Get the predictions
16+
- Check the accuracy
17+
- Repeat the model development iterations until we are happy with our model :)
18+
19+
And finally, once we are happy with the model performance, we will upload it to Algorithmia and have it up and ready to serve our upcoming prediction requests!
20+
21+
22+
You can also check out the built algorithm at https://algorithmia.com/algorithms/asli/xgboost_basic_sentiment_analysis and see the final product in action.
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import Algorithmia
2+
import urllib.parse
3+
from git import Git, Repo, remote
4+
from retry import retry
5+
6+
7+
class Progress(remote.RemoteProgress):
8+
def line_dropped(self, line):
9+
print(line)
10+
11+
def update(self, *args):
12+
print(self._cur_line)
13+
14+
15+
class AlgorithmiaUtils:
16+
def __init__(self, api_key, username, algo_name, local_dir):
17+
self.api_key = api_key
18+
self.username = username
19+
self.algo_name = algo_name
20+
self.local_dir = local_dir
21+
22+
self.algo_namespace = f"{self.username}/{self.algo_name}"
23+
self.algo_script_path = "{}/{}/src/{}.py".format(
24+
self.local_dir, algo_name, algo_name
25+
)
26+
self.dependency_file_path = "{}/{}/{}".format(
27+
self.local_dir, algo_name, "requirements.txt"
28+
)
29+
30+
self.algo_client = Algorithmia.client(self.api_key)
31+
32+
def create_algorithm(self):
33+
details = {
34+
"summary": "ALGO_SUMMARY",
35+
"label": "ALGO_LABEL",
36+
"tagline": "ALGO_TAGLINE",
37+
}
38+
settings = {
39+
"source_visibility": "closed",
40+
"package_set": "python37",
41+
"license": "apl",
42+
"network_access": "full",
43+
"pipeline_enabled": True,
44+
}
45+
self.algo_client.algo(self.algo_namespace).create(details, settings)
46+
47+
def clone_algorithm_repo(self):
48+
# Encoding the API key, so we can use it in the git URL
49+
encoded_api_key = urllib.parse.quote_plus(self.api_key)
50+
51+
algo_repo = "https://{}:{}@git.algorithmia.com/git/{}/{}.git".format(
52+
self.username, encoded_api_key, self.username, self.algo_name
53+
)
54+
p = Progress()
55+
self.repo = Repo.clone_from(
56+
algo_repo, "{}/{}".format(self.local_dir, self.algo_name), progress=p
57+
)
58+
59+
def push_algo_script_with_dependencies(self):
60+
if not self.repo:
61+
self.repo = Repo("{}/{}".format(local_dir, algo_name))
62+
files = ["src/{}.py".format(self.algo_name), "requirements.txt"]
63+
self.repo.index.add(files)
64+
self.repo.index.commit("Updated algorithm files")
65+
p = Progress()
66+
self.repo.remote(name="origin").push(progress=p)
67+
68+
def upload_model_to_algorithmia(
69+
self, local_path, algorithmia_data_path, model_name
70+
):
71+
if not self.algo_client.dir(algorithmia_data_path).exists():
72+
self.algo_client.dir(algorithmia_data_path).create()
73+
algorithmia_path = "{}/{}".format(algorithmia_data_path, model_name)
74+
result = self.algo_client.file(algorithmia_path).putFile(local_path)
75+
# TODO: Act on the result object, have a return value
76+
77+
# Call algorithm until the algo hash endpoint becomes available, up to 10 seconds
78+
@retry(Algorithmia.errors.AlgorithmException, tries=10, delay=1)
79+
def call_latest_algo_version(self, input):
80+
latest_algo_hash = (
81+
self.algo_client.algo(self.algo_namespace).info().version_info.git_hash
82+
)
83+
algo = self.algo_client.algo(
84+
"{}/{}".format(self.algo_namespace, latest_algo_hash)
85+
)
86+
algo.set_options(timeout=60, stdout=False)
87+
algo_pipe_result = algo.pipe(input)
88+
return algo_pipe_result
Binary file not shown.

0 commit comments

Comments
 (0)