|
5 | 5 | "id": "complimentary-passing",
|
6 | 6 | "metadata": {},
|
7 | 7 | "source": [
|
8 |
| - "# Basics" |
| 8 | + "# Basics\n" |
9 | 9 | ]
|
10 | 10 | },
|
11 | 11 | {
|
|
24 | 24 | "* For more details : https://docs.labelbox.com/python-sdk/en/index-en#labelbox-python-sdk"
|
25 | 25 | ]
|
26 | 26 | },
|
| 27 | + { |
| 28 | + "cell_type": "markdown", |
| 29 | + "id": "cheap-damages", |
| 30 | + "metadata": {}, |
| 31 | + "source": [ |
| 32 | + "#### The remainder of this notebook is an interactive version of the fundamental concepts docs.\n", |
| 33 | + "* For more details you can read the docs here: \n", |
| 34 | + " * https://docs.labelbox.com/python-sdk/en/index-en#fundamental-concepts" |
| 35 | + ] |
| 36 | + }, |
27 | 37 | {
|
28 | 38 | "cell_type": "code",
|
29 |
| - "execution_count": 1, |
| 39 | + "execution_count": 5, |
30 | 40 | "id": "everyday-street",
|
31 | 41 | "metadata": {},
|
32 | 42 | "outputs": [],
|
|
56 | 66 | },
|
57 | 67 | {
|
58 | 68 | "cell_type": "code",
|
59 |
| - "execution_count": 9, |
| 69 | + "execution_count": 6, |
60 | 70 | "id": "instructional-reply",
|
61 | 71 | "metadata": {},
|
62 | 72 | "outputs": [],
|
|
67 | 77 | "DATASET_NAME = \"Example Jellyfish Dataset\""
|
68 | 78 | ]
|
69 | 79 | },
|
| 80 | + { |
| 81 | + "cell_type": "markdown", |
| 82 | + "id": "chinese-playing", |
| 83 | + "metadata": {}, |
| 84 | + "source": [ |
| 85 | + "#### Client\n", |
| 86 | + "* Starting point for all db interactions" |
| 87 | + ] |
| 88 | + }, |
70 | 89 | {
|
71 | 90 | "cell_type": "code",
|
72 |
| - "execution_count": 10, |
| 91 | + "execution_count": 7, |
73 | 92 | "id": "thick-gasoline",
|
74 | 93 | "metadata": {},
|
75 | 94 | "outputs": [],
|
|
81 | 100 | },
|
82 | 101 | {
|
83 | 102 | "cell_type": "code",
|
84 |
| - "execution_count": 11, |
| 103 | + "execution_count": 8, |
85 | 104 | "id": "victorian-consumer",
|
86 | 105 | "metadata": {},
|
87 | 106 | "outputs": [],
|
|
93 | 112 | },
|
94 | 113 | {
|
95 | 114 | "cell_type": "code",
|
96 |
| - "execution_count": 12, |
| 115 | + "execution_count": 9, |
97 | 116 | "id": "industrial-onion",
|
98 | 117 | "metadata": {},
|
99 | 118 | "outputs": [
|
|
103 | 122 | "<Project ID: ckk4q1viuc0w20704eh69u28h>"
|
104 | 123 | ]
|
105 | 124 | },
|
106 |
| - "execution_count": 12, |
| 125 | + "execution_count": 9, |
107 | 126 | "metadata": {},
|
108 | 127 | "output_type": "execute_result"
|
109 | 128 | }
|
|
112 | 131 | "project"
|
113 | 132 | ]
|
114 | 133 | },
|
| 134 | + { |
| 135 | + "cell_type": "markdown", |
| 136 | + "id": "popular-nylon", |
| 137 | + "metadata": {}, |
| 138 | + "source": [ |
| 139 | + "#### Fields\n", |
| 140 | + "* All db objects have fields (look at the source code to see them https://github.com/Labelbox/labelbox-python/blob/develop/labelbox/schema/project.py)\n", |
| 141 | + "* These fields are attributes of the object" |
| 142 | + ] |
| 143 | + }, |
115 | 144 | {
|
116 | 145 | "cell_type": "code",
|
117 |
| - "execution_count": null, |
118 |
| - "id": "superb-revolution", |
| 146 | + "execution_count": 12, |
| 147 | + "id": "guided-institute", |
119 | 148 | "metadata": {},
|
120 |
| - "outputs": [], |
121 |
| - "source": [] |
| 149 | + "outputs": [ |
| 150 | + { |
| 151 | + "name": "stdout", |
| 152 | + "output_type": "stream", |
| 153 | + "text": [ |
| 154 | + "Sample Project\n", |
| 155 | + "Demonstrating image segmentation and object detection\n", |
| 156 | + "Example Jellyfish Dataset\n" |
| 157 | + ] |
| 158 | + } |
| 159 | + ], |
| 160 | + "source": [ |
| 161 | + "print(project.name)\n", |
| 162 | + "print(project.description)\n", |
| 163 | + "print(dataset.name)" |
| 164 | + ] |
| 165 | + }, |
| 166 | + { |
| 167 | + "cell_type": "markdown", |
| 168 | + "id": "protective-multimedia", |
| 169 | + "metadata": {}, |
| 170 | + "source": [ |
| 171 | + "* Fields can be updated. This will be reflected server side (you will see it in labelbox) " |
| 172 | + ] |
122 | 173 | },
|
123 | 174 | {
|
124 | 175 | "cell_type": "code",
|
125 | 176 | "execution_count": 13,
|
126 |
| - "id": "cubic-joint", |
| 177 | + "id": "according-subdivision", |
| 178 | + "metadata": {}, |
| 179 | + "outputs": [], |
| 180 | + "source": [ |
| 181 | + "project.update(description = \"new description field\")\n", |
| 182 | + "print(project.description)" |
| 183 | + ] |
| 184 | + }, |
| 185 | + { |
| 186 | + "cell_type": "markdown", |
| 187 | + "id": "viral-power", |
| 188 | + "metadata": {}, |
| 189 | + "source": [ |
| 190 | + "#### Pagination\n", |
| 191 | + "* Queries that return a list of database objects return them as a PaginatedCollection\n", |
| 192 | + "* The goal here is to limit the data being returned to only the necessary data." |
| 193 | + ] |
| 194 | + }, |
| 195 | + { |
| 196 | + "cell_type": "code", |
| 197 | + "execution_count": 17, |
| 198 | + "id": "ideal-processing", |
127 | 199 | "metadata": {},
|
128 | 200 | "outputs": [
|
129 | 201 | {
|
130 | 202 | "data": {
|
131 | 203 | "text/plain": [
|
132 |
| - "<labelbox.pagination.PaginatedCollection at 0x10caa6160>" |
| 204 | + "<labelbox.pagination.PaginatedCollection at 0x1110afe80>" |
133 | 205 | ]
|
134 | 206 | },
|
135 |
| - "execution_count": 13, |
| 207 | + "execution_count": 17, |
136 | 208 | "metadata": {},
|
137 | 209 | "output_type": "execute_result"
|
138 | 210 | }
|
139 | 211 | ],
|
140 | 212 | "source": [
|
141 |
| - "#Or you can fetch all based on a condition\n", |
142 |
| - "projects = client.get_projects(where = Project.name == PROJECT_NAME)\n", |
143 |
| - "datasets = client.get_datasets(where = Dataset.name == DATASET_NAME)\n", |
144 |
| - "projects" |
| 213 | + "labels_paginated_collection = project.labels()\n", |
| 214 | + "labels_paginated_collection" |
145 | 215 | ]
|
146 | 216 | },
|
147 | 217 | {
|
148 | 218 | "cell_type": "code",
|
149 |
| - "execution_count": null, |
150 |
| - "id": "rational-marshall", |
| 219 | + "execution_count": 19, |
| 220 | + "id": "convinced-force", |
151 | 221 | "metadata": {},
|
152 |
| - "outputs": [], |
| 222 | + "outputs": [ |
| 223 | + { |
| 224 | + "data": { |
| 225 | + "text/plain": [ |
| 226 | + "<Label ID: cklw9cboq00063h68gqrsvi15>" |
| 227 | + ] |
| 228 | + }, |
| 229 | + "execution_count": 19, |
| 230 | + "metadata": {}, |
| 231 | + "output_type": "execute_result" |
| 232 | + } |
| 233 | + ], |
153 | 234 | "source": [
|
| 235 | + "#Iterate over them to get the items out.\n", |
| 236 | + "next(labels_paginated_collection)\n", |
| 237 | + "#Be careful not to call list(paginated_collection) on a large collection" |
| 238 | + ] |
| 239 | + }, |
| 240 | + { |
| 241 | + "cell_type": "markdown", |
| 242 | + "id": "widespread-startup", |
| 243 | + "metadata": {}, |
| 244 | + "source": [ |
| 245 | + "#### Query parameters\n", |
| 246 | + "* Query with the following conventions:\n", |
| 247 | + " * `DbObject.Field`" |
| 248 | + ] |
| 249 | + }, |
| 250 | + { |
| 251 | + "cell_type": "code", |
| 252 | + "execution_count": 28, |
| 253 | + "id": "cubic-joint", |
| 254 | + "metadata": {}, |
| 255 | + "outputs": [ |
| 256 | + { |
| 257 | + "name": "stdout", |
| 258 | + "output_type": "stream", |
| 259 | + "text": [ |
| 260 | + "<labelbox.pagination.PaginatedCollection object at 0x114255640>\n", |
| 261 | + "<Project {'auto_audit_number_of_labels': 3, 'auto_audit_percentage': 0.1, 'created_at': datetime.datetime(2021, 1, 20, 1, 2, 31, tzinfo=datetime.timezone.utc), 'description': 'new description field', 'last_activity_time': datetime.datetime(2021, 3, 19, 13, 46, 50, 920000, tzinfo=datetime.timezone.utc), 'name': 'Sample Project', 'setup_complete': datetime.datetime(2021, 1, 20, 1, 2, 31, 152000, tzinfo=datetime.timezone.utc), 'uid': 'ckk4q1viuc0w20704eh69u28h', 'updated_at': datetime.datetime(2021, 3, 19, 13, 46, 50, 920000, tzinfo=datetime.timezone.utc)}>\n", |
| 262 | + "None\n", |
| 263 | + "None\n" |
| 264 | + ] |
| 265 | + } |
| 266 | + ], |
| 267 | + "source": [ |
| 268 | + "datasets = client.get_datasets(where = Dataset.name == DATASET_NAME )\n", |
| 269 | + "\n", |
| 270 | + "projects = client.get_projects(where = (\n", |
| 271 | + " (Project.name == PROJECT_NAME)\n", |
| 272 | + " & \n", |
| 273 | + " (Project.description == \"new description field\")\n", |
| 274 | + "))\n", |
| 275 | + " \n", |
154 | 276 | "#The above two queries return PaginatedCollections because the filter parameters aren't guarenteed to be unique.\n",
|
155 |
| - "#This object is an iterable containing the query results\n", |
156 |
| - "next(projects)" |
| 277 | + "#So even if there is one element returned it is in a paginatedCollection.\n", |
| 278 | + "print(projects)\n", |
| 279 | + "print(next(projects, None))\n", |
| 280 | + "print(next(projects, None))\n", |
| 281 | + "print(next(projects, None))\n", |
| 282 | + "#We can see there is only one." |
| 283 | + ] |
| 284 | + }, |
| 285 | + { |
| 286 | + "cell_type": "markdown", |
| 287 | + "id": "french-toner", |
| 288 | + "metadata": {}, |
| 289 | + "source": [ |
| 290 | + "#### Querying Limitations\n", |
| 291 | + "* The DbObject used for the query must be the same as the DbObject returned by the querying function. \n", |
| 292 | + "* eg. is not valid since get_project returns a Project but we are filtering on a Dataset\n", |
| 293 | + "> `>>> projects = client.get_projects(where = Dataset.name == \"dataset_name\")`\n" |
| 294 | + ] |
| 295 | + }, |
| 296 | + { |
| 297 | + "cell_type": "markdown", |
| 298 | + "id": "defensive-bidder", |
| 299 | + "metadata": {}, |
| 300 | + "source": [ |
| 301 | + "#### Relationship\n", |
| 302 | + "* This solves the above problem of querying by a relationship\n", |
| 303 | + "* You can find all realtionships of a DB object in the source code\n", |
| 304 | + " * eg. for a Project ( https://github.com/Labelbox/labelbox-python/blob/develop/labelbox/schema/project.py))" |
157 | 305 | ]
|
158 | 306 | },
|
159 | 307 | {
|
160 | 308 | "cell_type": "code",
|
161 |
| - "execution_count": null, |
| 309 | + "execution_count": 31, |
162 | 310 | "id": "handmade-yugoslavia",
|
163 | 311 | "metadata": {},
|
164 |
| - "outputs": [], |
| 312 | + "outputs": [ |
| 313 | + { |
| 314 | + "data": { |
| 315 | + "text/plain": [ |
| 316 | + "[<Project ID: ckk4q1viuc0w107041siuht7p>]" |
| 317 | + ] |
| 318 | + }, |
| 319 | + "execution_count": 31, |
| 320 | + "metadata": {}, |
| 321 | + "output_type": "execute_result" |
| 322 | + } |
| 323 | + ], |
165 | 324 | "source": [
|
166 |
| - "# Filtering is only supported using the object you are querying for\n", |
167 |
| - "#eg. is not valid since get_project returns a Project but we are filtering on a Dataset\n", |
168 |
| - "projects = client.get_projects(where = Dataset.name == \"dataset_name\") #INVALID\n", |
169 |
| - "\n", |
170 |
| - "## Instead we should use relationships.\n", |
171 |
| - "#If we want all projects where there is a particular attached dataset we can do\n", |
172 |
| - "list(dataset.project())\n", |
173 |
| - "#Filtering Takeaways\n", |
174 |
| - "#1. Filtering only works on a single object type at a time\n", |
175 |
| - "#2. The where clause requires that we pass an object that is of the same type that is being retured by the query\n", |
176 |
| - "#3. If we want to filter based on a relationship, we should use the relationship attribute of objects" |
| 325 | + "#Dataset has a Relationship to a Project so we can use the following\n", |
| 326 | + "list(dataset.projects())\n", |
| 327 | + "#This will return all projects that are attached to this dataset" |
177 | 328 | ]
|
178 | 329 | },
|
179 | 330 | {
|
180 | 331 | "cell_type": "code",
|
181 |
| - "execution_count": 15, |
| 332 | + "execution_count": 32, |
182 | 333 | "id": "future-bargain",
|
183 | 334 | "metadata": {},
|
184 | 335 | "outputs": [
|
|
188 | 339 | "[<Dataset ID: cklv1qzlv1oqn0y9ne7b9gtpb>]"
|
189 | 340 | ]
|
190 | 341 | },
|
191 |
| - "execution_count": 15, |
| 342 | + "execution_count": 32, |
192 | 343 | "metadata": {},
|
193 | 344 | "output_type": "execute_result"
|
194 | 345 | }
|
195 | 346 | ],
|
196 | 347 | "source": [
|
197 |
| - "# If you are interested in the relationship between objects then \n", |
198 |
| - "#You can only filter on attributes of either a dataset or a project.\n", |
199 |
| - "#If you want all datasets that belongs to a particular project then you can do that with the following query.\n", |
200 | 348 | "sample_project_datasets = project.datasets()\n",
|
201 | 349 | "list(sample_project_datasets)"
|
202 | 350 | ]
|
203 | 351 | },
|
| 352 | + { |
| 353 | + "cell_type": "markdown", |
| 354 | + "id": "metric-speaker", |
| 355 | + "metadata": {}, |
| 356 | + "source": [ |
| 357 | + "#### Delete\n", |
| 358 | + "* Most DBObjects support deletion" |
| 359 | + ] |
| 360 | + }, |
| 361 | + { |
| 362 | + "cell_type": "code", |
| 363 | + "execution_count": 37, |
| 364 | + "id": "persistent-briefs", |
| 365 | + "metadata": {}, |
| 366 | + "outputs": [], |
| 367 | + "source": [ |
| 368 | + "#Eg.\n", |
| 369 | + "##### project.delete()\n", |
| 370 | + "##### dataset.delete()\n", |
| 371 | + "##### data_row.delete()" |
| 372 | + ] |
| 373 | + }, |
| 374 | + { |
| 375 | + "cell_type": "markdown", |
| 376 | + "id": "confused-peace", |
| 377 | + "metadata": {}, |
| 378 | + "source": [ |
| 379 | + "* We reccomend using bulk operations where possible.\n", |
| 380 | + "* You can find specific deletion instructions in tutorials on each object." |
| 381 | + ] |
| 382 | + }, |
204 | 383 | {
|
205 | 384 | "cell_type": "code",
|
206 | 385 | "execution_count": null,
|
207 |
| - "id": "bacterial-yield", |
| 386 | + "id": "thirty-interval", |
208 | 387 | "metadata": {},
|
209 | 388 | "outputs": [],
|
210 | 389 | "source": []
|
|
0 commit comments