Skip to content

Commit 14cf3ce

Browse files
committed
[DEV] Recursive TXT and YOLO parsing
1 parent 249af87 commit 14cf3ce

File tree

1 file changed

+88
-47
lines changed

1 file changed

+88
-47
lines changed

src/globox/annotationset.py

Lines changed: 88 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
from .atomic import open_atomic
2424
from .boundingbox import BoundingBox, BoxFormat
2525
from .errors import ParsingError, UnknownImageFormat
26-
from .file_utils import PathLike, glob
27-
from .image_utils import IMAGE_EXTENSIONS, get_image_size
26+
from .file_utils import PathLike
27+
from .image_utils import IMAGE_EXTENSIONS, get_image_size, glob_images
2828
from .thread_utils import thread_map
2929

3030
T = TypeVar("T")
@@ -216,19 +216,61 @@ def from_iter(
216216
def from_folder(
217217
folder: PathLike,
218218
*,
219-
extension: str,
220219
parser: Callable[[Path], Annotation],
221-
recursive=False,
220+
extension: Optional[str] = None,
221+
is_ann_file: Optional[Callable[[Path], bool]] = None,
222+
recursive: bool = False,
222223
verbose: bool = False,
223224
) -> "AnnotationSet":
225+
"""Create an AnnotationSet from a folder of annotation files.
226+
227+
Parameters
228+
----------
229+
folder : PathLike
230+
The path to the folder containing the annotation files.
231+
parser : Callable[[Path], Annotation]
232+
A function that takes a file path and returns an Annotation object.
233+
extension : Optional[str], optional
234+
The file extension of the annotation files (e.g., ".json"). If provided, only files with this extension will be considered. If None, `is_ann_file` must be provided.
235+
is_ann_file : Optional[Callable[[Path], bool]], optional
236+
A function that takes a file path and returns True if the file is an annotation file. If provided, this function will be used to filter files. If None, `extension` must be provided.
237+
recursive : bool, optional
238+
Whether to search for annotation files recursively in subdirectories, by default False
239+
verbose : bool, optional
240+
Whether to print tqdm progress output during parsing, by default False
241+
242+
Returns
243+
-------
244+
AnnotationSet
245+
A set of annotations parsed from the files in the folder.
246+
247+
Raises
248+
------
249+
ValueError
250+
If the folder is not a directory or does not exist, if both `extension` and `is_ann_file` are None and if `extension` is provided but does not start with a dot.
251+
"""
224252
folder = Path(folder).expanduser().resolve()
225253

226-
assert (
227-
folder.is_dir()
228-
), f"Filepath '{folder}' is not a folder or does not exist."
254+
if not folder.is_dir():
255+
raise ValueError(f"Path '{folder}' is not a folder or does not exist.")
256+
257+
if extension is None and is_ann_file is None:
258+
raise ValueError("Either `extension` or `is_ann_file` must be provided.")
259+
260+
if extension is not None:
261+
if not extension.startswith("."):
262+
raise ValueError("`extension` must start with a dot.")
263+
264+
def is_ann_file(p: Path) -> bool:
265+
return p.suffix.lower() == extension
266+
267+
annotation_files = [
268+
f
269+
for f in (folder.glob("**/*") if recursive else folder.glob("*"))
270+
if is_ann_file(f) and not f.name.startswith(".")
271+
]
229272

230-
files = list(glob(folder, extension, recursive=recursive))
231-
return AnnotationSet.from_iter(parser, files, verbose=verbose)
273+
return AnnotationSet.from_iter(parser, annotation_files, verbose=verbose)
232274

233275
@staticmethod
234276
def from_txt(
@@ -241,69 +283,58 @@ def from_txt(
241283
image_extension: str = ".jpg",
242284
separator: Optional[str] = None,
243285
conf_last: bool = False,
286+
recursive: bool = False,
244287
verbose: bool = False,
245288
) -> "AnnotationSet":
246-
"""This method won't try to retreive the image sizes by default. Specify `image_folder` if you need them.
247-
`image_folder` is required when `relative` is True."""
248-
# TODO: Add error handling
249-
250289
folder = Path(folder).expanduser().resolve()
251290

252-
assert folder.is_dir()
253-
assert image_extension.startswith(".")
254-
255-
if relative:
256-
assert (
257-
image_folder is not None
258-
), "When `relative` is set to True, `image_folder` must be provided to read image sizes."
291+
if relative and not image_folder:
292+
raise ParsingError(
293+
"When `relative` is set to True, `image_folder` must be provided to read image sizes."
294+
)
259295

260296
if image_folder is not None:
261297
image_folder = Path(image_folder).expanduser().resolve()
262-
assert image_folder.is_dir()
263298

264-
def _get_annotation(file: Path) -> Annotation:
265-
if image_folder is not None:
266-
image_path: Path | None = None
299+
if not image_folder.is_dir():
300+
raise ParsingError("Invalid `image_folder`: not a directory.")
267301

268-
for image_ext in IMAGE_EXTENSIONS:
269-
image_id = file.with_suffix(image_ext).name
270-
path = image_folder / image_id # type: ignore
302+
if not image_extension.startswith("."):
303+
raise ParsingError("`image_extension` must start with a dot.")
271304

272-
if path.is_file():
273-
image_path = path
274-
break
305+
img_paths = {
306+
p.stem: p
307+
for p in glob_images(image_folder, recursive=recursive)
308+
if p.suffix == image_extension
309+
}
275310

276-
assert (
277-
image_path is not None
278-
), f"Image {file.name} does not exist, unable to read the image size."
311+
def get_ann_img_size(p: Path) -> tuple[int, int] | None:
312+
img_path = img_paths[p.stem]
313+
return get_image_size(img_path)
314+
else:
279315

280-
image_id = image_path.name
316+
def get_ann_img_size(p: Path) -> tuple[int, int] | None:
317+
return None
281318

282-
try:
283-
image_size = get_image_size(image_path)
284-
except UnknownImageFormat:
285-
raise ParsingError(
286-
f"Unable to read image size of file {image_path}. "
287-
f"The file may be corrupted or the file format not supported."
288-
)
289-
else:
290-
image_size = None
291-
image_id = file.with_suffix(image_extension).name
319+
def parse_annotation(p: Path) -> Annotation:
320+
image_size = get_ann_img_size(p)
321+
image_id = p.with_suffix(image_extension).name
292322

293323
return Annotation.from_txt(
294-
file_path=file,
324+
p,
295325
image_id=image_id,
326+
image_size=image_size,
296327
box_format=box_format,
297328
relative=relative,
298-
image_size=image_size,
299329
separator=separator,
300330
conf_last=conf_last,
301331
)
302332

303333
return AnnotationSet.from_folder(
304334
folder,
335+
parser=parse_annotation,
305336
extension=file_extension,
306-
parser=_get_annotation,
337+
recursive=recursive,
307338
verbose=verbose,
308339
)
309340

@@ -314,6 +345,7 @@ def _from_yolo(
314345
image_folder: PathLike,
315346
image_extension=".jpg",
316347
conf_last: bool = False,
348+
recursive: bool = False,
317349
verbose: bool = False,
318350
) -> "AnnotationSet":
319351
return AnnotationSet.from_txt(
@@ -324,6 +356,7 @@ def _from_yolo(
324356
image_extension=image_extension,
325357
separator=None,
326358
conf_last=conf_last,
359+
recursive=recursive,
327360
verbose=verbose,
328361
)
329362

@@ -334,6 +367,7 @@ def from_yolo(
334367
image_folder: PathLike,
335368
image_extension=".jpg",
336369
conf_last: bool = False,
370+
recursive: bool = False,
337371
verbose: bool = False,
338372
) -> "AnnotationSet":
339373
warn(
@@ -347,6 +381,7 @@ def from_yolo(
347381
image_folder=image_folder,
348382
image_extension=image_extension,
349383
conf_last=conf_last,
384+
recursive=recursive,
350385
verbose=verbose,
351386
)
352387

@@ -356,13 +391,15 @@ def from_yolo_darknet(
356391
*,
357392
image_folder: PathLike,
358393
image_extension=".jpg",
394+
recursive: bool = False,
359395
verbose: bool = False,
360396
) -> "AnnotationSet":
361397
return AnnotationSet._from_yolo(
362398
folder,
363399
image_folder=image_folder,
364400
image_extension=image_extension,
365401
conf_last=False,
402+
recursive=recursive,
366403
verbose=verbose,
367404
)
368405

@@ -372,13 +409,15 @@ def from_yolo_v5(
372409
*,
373410
image_folder: PathLike,
374411
image_extension=".jpg",
412+
recursive: bool = False,
375413
verbose: bool = False,
376414
) -> "AnnotationSet":
377415
return AnnotationSet._from_yolo(
378416
folder,
379417
image_folder=image_folder,
380418
image_extension=image_extension,
381419
conf_last=True,
420+
recursive=recursive,
382421
verbose=verbose,
383422
)
384423

@@ -388,12 +427,14 @@ def from_yolo_v7(
388427
*,
389428
image_folder: PathLike,
390429
image_extension=".jpg",
430+
recursive: bool = False,
391431
verbose: bool = False,
392432
) -> "AnnotationSet":
393433
return AnnotationSet.from_yolo_v5(
394434
folder,
395435
image_folder=image_folder,
396436
image_extension=image_extension,
437+
recursive=recursive,
397438
verbose=verbose,
398439
)
399440

0 commit comments

Comments
 (0)