Skip to content

Commit b38540b

Browse files
Add simplified version of the tf text classification example (#9)
1 parent 2ae6575 commit b38540b

File tree

5 files changed

+1161
-0
lines changed

5 files changed

+1161
-0
lines changed
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
import Algorithmia
2+
import argparse
3+
from Algorithmia.errors import AlgorithmException
4+
5+
def parse_arguments():
6+
parser = argparse.ArgumentParser()
7+
parser.add_argument("-k", "--api_key", nargs="?")
8+
parser.add_argument("-u", "--username", nargs="?")
9+
parser.add_argument("-a", "--algoname", nargs="?")
10+
parser.add_argument("-m", "--model_script", nargs="?")
11+
parser.add_argument("-d", "--model_dependency_file", nargs="?")
12+
parser.add_argument("-p", "--data_path", nargs="?", default=".my/mycollection")
13+
14+
args = parser.parse_args()
15+
return args
16+
17+
def main(args=None):
18+
if isinstance(args, type(None)):
19+
args = parse_arguments()
20+
deploy(args)
21+
22+
def deploy(args):
23+
# A data collection, where we'll be storing our files
24+
data_path = "data://{}".format(args.data_path)
25+
26+
# Create a new algorithmia client
27+
client = Algorithmia.client(args.api_key)
28+
29+
# Create data collection if it doesn't exist
30+
if not client.dir(data_path).exists():
31+
client.dir(data_path).create()
32+
33+
### 2. Create local directory for algorithm files ###
34+
35+
import os
36+
import shutil
37+
38+
# Define local work directory
39+
local_dir = "algo"
40+
41+
# Delete local directory if it already exists
42+
if os.path.exists(local_dir):
43+
shutil.rmtree(local_dir)
44+
45+
# Create local work directory
46+
os.makedirs(local_dir)
47+
48+
### 3. Upload model file ###
49+
50+
model_name = "text_classification_model.h5"
51+
local_model = "{}/{}".format(local_dir, model_name)
52+
data_model = "{}/{}".format(data_path, model_name)
53+
54+
# Serialize the model locally
55+
model.save(local_model)
56+
57+
# Upload our model file to our data collection
58+
_ = client.file(data_model).putFile(local_model)
59+
60+
### 4. Upload word index ###
61+
62+
import pickle
63+
64+
word_index_name = "word_index.pickle"
65+
local_word_index = "{}/{}".format(local_dir, word_index_name)
66+
data_word_index = "{}/{}".format(data_path, word_index_name)
67+
68+
# Serialize (pickle) our word index
69+
with open(local_word_index, "wb") as fh:
70+
pickle.dump(word_index, fh)
71+
72+
# Upload word index file to our data collection
73+
_ = client.file(data_word_index).putFile(local_word_index)
74+
75+
### 5. Create new algorithm ###
76+
77+
# Algorithms are refered with the following schema: username/algoname
78+
algo_namespace = "{}/{}".format(args.username, args.algoname)
79+
80+
# Here are some details you can define for your algorithm
81+
details = {
82+
"summary": "This algorithms returns the sentiment of movie reviews.",
83+
"label": "Movie Review Sentiment Analysis",
84+
"tagline": "movie_review_sentiment"
85+
}
86+
87+
# 1. We're making our algorithm closed-sourced – "source_visibility"
88+
#
89+
# 2. We're selecting a package set that has tensorflow-gpu already installed. – "package_set"
90+
# Even though we could manually install it later, using the optimized
91+
# & pre-installed image allows you to compile things faster.
92+
#
93+
# 3. We're selectig the Algorithmia Platform License (aka. "apl"). – "license"
94+
#
95+
# 4. We're giving our algorithm internet access. – "network_access"
96+
#
97+
# 5. We're allowing our algorithm to call other algorithms. – "pipeline_enabled"
98+
settings = {
99+
"source_visibility": "closed",
100+
"package_set": "tensorflow-gpu-1.12",
101+
"license": "apl",
102+
"network_access": "full",
103+
"pipeline_enabled": True
104+
}
105+
106+
# Let's also provide a sample input for our algorithm
107+
version_info = {
108+
"sample_input": '{\n "text": "This is a positive review. I really liked it! It was great!"\n}'
109+
}
110+
111+
print("Algorithm namepace: {}".format(algo_namespace))
112+
113+
# Create a new algorithm
114+
client.algo(algo_namespace).create(details, settings, version_info)
115+
116+
# Print the URL to the algorithm
117+
print("Algorithm URL: https://algorithmia.com/algorithms/{}".format(algo_namespace))
118+
119+
### 6. Git clone our algorithm locally ###
120+
121+
import urllib.parse
122+
from git import Git, Repo, remote
123+
124+
# Encode API key, so we can use it in the git URL
125+
encoded_api_key= urllib.parse.quote_plus(args.api_key)
126+
127+
algo_repo = "https://{}:{}@git.algorithmia.com/git/{}/{}.git".format(args.username, encoded_api_key, args.username, args.algoname)
128+
129+
_ = Repo.clone_from(algo_repo, "{}/{}".format(local_dir, args.algoname))
130+
131+
cloned_repo = Repo("{}/{}".format(local_dir, args.algoname))
132+
133+
### 7. The algorithm script & dependency file ###
134+
135+
algo_script_path = "{}/{}/src/{}.py".format(local_dir, args.algoname, args.algoname)
136+
dependency_file_path = "{}/{}/{}".format(local_dir, args.algoname, "requirements.txt")
137+
138+
shutil.copyfile(args.model_script, algo_script_path)
139+
shutil.copyfile(args.model_dependency_file, dependency_file_path)
140+
141+
### 8. Upload our source code ###
142+
143+
files = ["src/{}.py".format(args.algoname), "requirements.txt"]
144+
cloned_repo.index.add(files)
145+
146+
cloned_repo.index.commit("Add algorithm files")
147+
148+
origin = cloned_repo.remote(name='origin')
149+
150+
print("Pushing source code upstream, uploading model file & compiling algorithm...")
151+
152+
_ = origin.push()
153+
154+
# Print the URL to the algorithm source code
155+
print("Algorithm Source Code is available at: https://algorithmia.com/algorithms/{}/source".format(algo_namespace))
156+
157+
### 9. Call & test our algorithm ###
158+
159+
print("Testing new compiled algorithm via API endpoint...")
160+
latest_hash = client.algo(algo_namespace).info().version_info.git_hash
161+
162+
# Call algorithm until the algo hash endpoint becomes available, up to 10 seconds
163+
@retry(AlgorithmException, tries=10, delay=1)
164+
def get_probability(ALGO, VERSION, INPUT):
165+
return client.algo("{}/{}".format(ALGO, VERSION)).pipe(INPUT).result["prob"]
166+
167+
algo_input = {
168+
"text": "This is a very positive review for the movie. I absolutely loved it!"
169+
}
170+
171+
# Call the algorithm endpoint with the latest hash
172+
prob = get_probability(algo_namespace, latest_hash, algo_input)
173+
174+
print("Test complete!")
175+
176+
### 10. Publish our algorithm ###
177+
178+
print("Publishing and deploying algorithm...")
179+
180+
# Now let's publish/deploy our algorithm
181+
client.algo(algo_namespace).publish()
182+
183+
latest_version = client.algo(algo_namespace).info().version_info.semantic_version
184+
185+
# Call the algorithm endpoint with the latest version
186+
prob = get_probability(algo_namespace, latest_version, algo_input)
187+
188+
print("Algorithm has been deployed!")
189+
190+
if __name__ == "__main__":
191+
main()
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import Algorithmia
2+
from tensorflow import keras
3+
import numpy as np
4+
import pickle
5+
6+
# Create our Algorithmia client
7+
client = Algorithmia.client()
8+
9+
# Define where our files live in our data collection
10+
data_model = "data://.my/mycollection/text_classification_model.h5"
11+
data_word_index = "data://.my/mycollection/word_index.pickle"
12+
13+
# Download & initialize our model
14+
model_file = client.file(data_model).getFile().name
15+
model = keras.models.load_model(model_file)
16+
17+
# Download & initialize our word index
18+
word_index_file = client.file(data_word_index).getFile().name
19+
with open(word_index_file, "rb") as fh:
20+
word_index = pickle.load(fh)
21+
22+
# Function for vectorizing our input text
23+
def vectorize_text(text):
24+
vector = []
25+
words = text.split(" ")
26+
for word in words:
27+
if word in word_index:
28+
vector.append(word_index[word])
29+
30+
return keras.preprocessing.sequence.pad_sequences([np.array(vector, dtype=np.int32)],
31+
value=word_index["<PAD>"],
32+
padding='post',
33+
maxlen=256)
34+
35+
def apply(input):
36+
# Get input text
37+
input_text = input["text"]
38+
# Vectorize input text
39+
input_vector = vectorize_text(input_text)
40+
# Get probability using our model
41+
prob = float(model.predict(input_vector)[0][0])
42+
# Return result back to user
43+
return {"prob": prob}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
algorithmia>=1.0.0,<2.0
2+
six
3+
tensorflow-gpu==1.12.0
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
tensorflow==1.12.2
2+
algorithmia==1.2.1
3+
jupyter==1.0.0
4+
gitpython==2.1.11
5+
matplotlib==3.0.2
6+
urllib3==1.24.3
7+
retry==0.9.2
8+
numpy==1.16.1

0 commit comments

Comments
 (0)