Skip to content

Commit 0b1f2d7

Browse files
authored
[SN-131]corrected_HF.ipynb
1 parent 6d27c3c commit 0b1f2d7

File tree

1 file changed

+18
-15
lines changed

1 file changed

+18
-15
lines changed

examples/integrations/huggingface/huggingface.ipynb

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,7 @@
3737
"metadata": {},
3838
"outputs": [],
3939
"source": [
40-
"#install the required packages\n",
41-
"\n",
42-
"!pip install -q \"labelbox[data]\"\n",
40+
"!pip install -q \"labelbox\"\n",
4341
"!pip install -q transformers"
4442
]
4543
},
@@ -56,8 +54,6 @@
5654
"metadata": {},
5755
"outputs": [],
5856
"source": [
59-
"# import libraries\n",
60-
"\n",
6157
"import labelbox as lb\n",
6258
"import transformers\n",
6359
"transformers.logging.set_verbosity(50)\n",
@@ -100,7 +96,8 @@
10096
"metadata": {},
10197
"outputs": [],
10298
"source": [
103-
"# get images from a Labelbox dataset, those images needs to be available so you may need a token from your cloud provider\n",
99+
"# Get images from a Labelbox dataset,\n",
100+
"# Ensure the images are available by obtaining a token from your cloud provider if necessary\n",
104101
"DATASET_ID = \"\""
105102
]
106103
},
@@ -126,7 +123,7 @@
126123
"\tprint(export_task.errors)\n",
127124
"export_json = export_task.result\n",
128125
"\n",
129-
"data_row_urls = [i['data_row']['row_data'] for i in export_json]"
126+
"data_row_urls = [dr_url['data_row']['row_data'] for dr_url in export_json]"
130127
]
131128
},
132129
{
@@ -142,7 +139,7 @@
142139
"metadata": {},
143140
"outputs": [],
144141
"source": [
145-
"# get ResNet-50 from HuggingFace\n",
142+
"# Get ResNet-50 from HuggingFace\n",
146143
"image_processor = transformers.AutoImageProcessor.from_pretrained(\"microsoft/resnet-50\")\n",
147144
"model = transformers.ResNetModel.from_pretrained(\"microsoft/resnet-50\")"
148145
]
@@ -160,11 +157,11 @@
160157
"metadata": {},
161158
"outputs": [],
162159
"source": [
163-
"#create a new embedding in your workspace, use the right dimensions to your use case, here we use 2048 for ResNet-50\n",
160+
"# Create a new embedding in your workspace, use the right dimensions to your use case, here we use 2048 for ResNet-50\n",
164161
"new_custom_embedding_id = client.create_embedding(name=\"My new awesome embedding\", dims=2048).id\n",
165162
"\n",
166-
"#or use an existing embedding from your workspace\n",
167-
"#existing_embedding_id = client.get_embedding_by_name(name=\"ResNet img 2048\").id"
163+
"# Or use an existing embedding from your workspace\n",
164+
"# existing_embedding_id = client.get_embedding_by_name(name=\"ResNet img 2048\").id"
168165
]
169166
},
170167
{
@@ -180,15 +177,19 @@
180177
"metadata": {},
181178
"outputs": [],
182179
"source": [
183-
"data_rows = []\n",
184180
"img_emb = []\n",
185181
"\n",
186182
"for url in tqdm(data_row_urls):\n",
187183
" try:\n",
188184
" response = requests.get(url, stream=True)\n",
189185
" if response.status_code == 200:\n",
186+
" # Open the image, convert to RGB, and resize to 224x224\n",
190187
" image = Image.open(response.raw).convert('RGB').resize((224, 224))\n",
188+
"\n",
189+
" # Preprocess the image for model input\n",
191190
" img_hf = image_processor(image, return_tensors=\"pt\")\n",
191+
"\n",
192+
" # Pass the image through the model to get embeddings\n",
192193
" with torch.no_grad():\n",
193194
" last_layer = model(**img_hf, output_hidden_states=True).last_hidden_state\n",
194195
" resnet_embeddings = F.adaptive_avg_pool2d(last_layer, (1, 1))\n",
@@ -199,12 +200,14 @@
199200
" except Exception as e:\n",
200201
" print(f\"Error processing URL: {url}. Exception: {e}\")\n",
201202
" continue\n",
203+
"\n",
204+
"data_rows = []\n",
202205
" \n",
203-
"# create data rows payload to send to a dataset\n",
206+
"# Create data rows payload to send to a dataset\n",
204207
"for url, embedding in tqdm(zip(data_row_urls, img_emb)):\n",
205208
" data_rows.append({\n",
206209
" \"row_data\": url,\n",
207-
" \"embeddings\": [{\"embedding_id\": existing_embedding_id, \"vector\": embedding[0].tolist()}]\n",
210+
" \"embeddings\": [{\"embedding_id\": new_custom_embedding_id, \"vector\": embedding[0].tolist()}]\n",
208211
" })"
209212
]
210213
},
@@ -214,7 +217,7 @@
214217
"metadata": {},
215218
"outputs": [],
216219
"source": [
217-
"#upload to a new dataset\n",
220+
"# Upload to a new dataset\n",
218221
"dataset = client.create_dataset(name='image_custom_embedding_resnet', iam_integration=None)\n",
219222
"task = dataset.create_data_rows(data_rows)\n",
220223
"print(task.errors)"

0 commit comments

Comments
 (0)