|
37 | 37 | "metadata": {},
|
38 | 38 | "outputs": [],
|
39 | 39 | "source": [
|
40 |
| - "#install the required packages\n", |
41 |
| - "\n", |
42 |
| - "!pip install -q \"labelbox[data]\"\n", |
| 40 | + "!pip install -q \"labelbox\"\n", |
43 | 41 | "!pip install -q transformers"
|
44 | 42 | ]
|
45 | 43 | },
|
|
56 | 54 | "metadata": {},
|
57 | 55 | "outputs": [],
|
58 | 56 | "source": [
|
59 |
| - "# import libraries\n", |
60 |
| - "\n", |
61 | 57 | "import labelbox as lb\n",
|
62 | 58 | "import transformers\n",
|
63 | 59 | "transformers.logging.set_verbosity(50)\n",
|
|
100 | 96 | "metadata": {},
|
101 | 97 | "outputs": [],
|
102 | 98 | "source": [
|
103 |
| - "# get images from a Labelbox dataset, those images needs to be available so you may need a token from your cloud provider\n", |
| 99 | + "# Get images from a Labelbox dataset,\n", |
| 100 | + "# Ensure the images are available by obtaining a token from your cloud provider if necessary\n", |
104 | 101 | "DATASET_ID = \"\""
|
105 | 102 | ]
|
106 | 103 | },
|
|
126 | 123 | "\tprint(export_task.errors)\n",
|
127 | 124 | "export_json = export_task.result\n",
|
128 | 125 | "\n",
|
129 |
| - "data_row_urls = [i['data_row']['row_data'] for i in export_json]" |
| 126 | + "data_row_urls = [dr_url['data_row']['row_data'] for dr_url in export_json]" |
130 | 127 | ]
|
131 | 128 | },
|
132 | 129 | {
|
|
142 | 139 | "metadata": {},
|
143 | 140 | "outputs": [],
|
144 | 141 | "source": [
|
145 |
| - "# get ResNet-50 from HuggingFace\n", |
| 142 | + "# Get ResNet-50 from HuggingFace\n", |
146 | 143 | "image_processor = transformers.AutoImageProcessor.from_pretrained(\"microsoft/resnet-50\")\n",
|
147 | 144 | "model = transformers.ResNetModel.from_pretrained(\"microsoft/resnet-50\")"
|
148 | 145 | ]
|
|
160 | 157 | "metadata": {},
|
161 | 158 | "outputs": [],
|
162 | 159 | "source": [
|
163 |
| - "#create a new embedding in your workspace, use the right dimensions to your use case, here we use 2048 for ResNet-50\n", |
| 160 | + "# Create a new embedding in your workspace, use the right dimensions to your use case, here we use 2048 for ResNet-50\n", |
164 | 161 | "new_custom_embedding_id = client.create_embedding(name=\"My new awesome embedding\", dims=2048).id\n",
|
165 | 162 | "\n",
|
166 |
| - "#or use an existing embedding from your workspace\n", |
167 |
| - "#existing_embedding_id = client.get_embedding_by_name(name=\"ResNet img 2048\").id" |
| 163 | + "# Or use an existing embedding from your workspace\n", |
| 164 | + "# existing_embedding_id = client.get_embedding_by_name(name=\"ResNet img 2048\").id" |
168 | 165 | ]
|
169 | 166 | },
|
170 | 167 | {
|
|
180 | 177 | "metadata": {},
|
181 | 178 | "outputs": [],
|
182 | 179 | "source": [
|
183 |
| - "data_rows = []\n", |
184 | 180 | "img_emb = []\n",
|
185 | 181 | "\n",
|
186 | 182 | "for url in tqdm(data_row_urls):\n",
|
187 | 183 | " try:\n",
|
188 | 184 | " response = requests.get(url, stream=True)\n",
|
189 | 185 | " if response.status_code == 200:\n",
|
| 186 | + " # Open the image, convert to RGB, and resize to 224x224\n", |
190 | 187 | " image = Image.open(response.raw).convert('RGB').resize((224, 224))\n",
|
| 188 | + "\n", |
| 189 | + " # Preprocess the image for model input\n", |
191 | 190 | " img_hf = image_processor(image, return_tensors=\"pt\")\n",
|
| 191 | + "\n", |
| 192 | + " # Pass the image through the model to get embeddings\n", |
192 | 193 | " with torch.no_grad():\n",
|
193 | 194 | " last_layer = model(**img_hf, output_hidden_states=True).last_hidden_state\n",
|
194 | 195 | " resnet_embeddings = F.adaptive_avg_pool2d(last_layer, (1, 1))\n",
|
|
199 | 200 | " except Exception as e:\n",
|
200 | 201 | " print(f\"Error processing URL: {url}. Exception: {e}\")\n",
|
201 | 202 | " continue\n",
|
| 203 | + "\n", |
| 204 | + "data_rows = []\n", |
202 | 205 | " \n",
|
203 |
| - "# create data rows payload to send to a dataset\n", |
| 206 | + "# Create data rows payload to send to a dataset\n", |
204 | 207 | "for url, embedding in tqdm(zip(data_row_urls, img_emb)):\n",
|
205 | 208 | " data_rows.append({\n",
|
206 | 209 | " \"row_data\": url,\n",
|
207 |
| - " \"embeddings\": [{\"embedding_id\": existing_embedding_id, \"vector\": embedding[0].tolist()}]\n", |
| 210 | + " \"embeddings\": [{\"embedding_id\": new_custom_embedding_id, \"vector\": embedding[0].tolist()}]\n", |
208 | 211 | " })"
|
209 | 212 | ]
|
210 | 213 | },
|
|
214 | 217 | "metadata": {},
|
215 | 218 | "outputs": [],
|
216 | 219 | "source": [
|
217 |
| - "#upload to a new dataset\n", |
| 220 | + "# Upload to a new dataset\n", |
218 | 221 | "dataset = client.create_dataset(name='image_custom_embedding_resnet', iam_integration=None)\n",
|
219 | 222 | "task = dataset.create_data_rows(data_rows)\n",
|
220 | 223 | "print(task.errors)"
|
|
0 commit comments