Skip to content

Commit

Permalink
from_coco_dict_or_path, added threading for processing coco_dict["ima…
Browse files Browse the repository at this point in the history
…ges"] (#827)

Co-authored-by: fatih <[email protected]>
  • Loading branch information
ilkermanap and fcakyon authored Feb 12, 2023
1 parent 4992e2e commit f598518
Showing 1 changed file with 91 additions and 25 deletions.
116 changes: 91 additions & 25 deletions sahi/utils/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import copy
import logging
import os
import threading
from collections import Counter, defaultdict
from dataclasses import dataclass
from multiprocessing import Pool
from pathlib import Path
from threading import Lock, Thread
from typing import Dict, List, Optional, Set, Union

import numpy as np
Expand Down Expand Up @@ -992,6 +994,8 @@ def from_coco_dict_or_path(
remapping_dict: Optional[Dict] = None,
ignore_negative_samples: bool = False,
clip_bboxes_to_img_dims: bool = False,
use_threads: bool = False,
num_threads: int = 10,
):
"""
Creates coco object from COCO formatted dict or COCO dataset file path.
Expand All @@ -1008,6 +1012,10 @@ def from_coco_dict_or_path(
If True ignores images without annotations in all operations.
clip_bboxes_to_img_dims: bool = False
Limits bounding boxes to image dimensions.
use_threads: bool = False
Use threads when processing the json image list, defaults to False
num_threads: int = 10
Slice the image list to given number of chunks, defaults to 10
Properties:
images: list of CocoImage
Expand All @@ -1030,6 +1038,8 @@ def from_coco_dict_or_path(
else:
coco_dict = coco_dict_or_path

dict_size = len(coco_dict["images"])

# arrange image id to annotation id mapping
coco.add_categories_from_coco_category_list(coco_dict["categories"])
image_id_to_annotation_list = get_imageid2annotationlist_mapping(coco_dict)
Expand All @@ -1038,33 +1048,89 @@ def from_coco_dict_or_path(
# https://github.com/obss/sahi/issues/98
image_id_set: Set = set()

for coco_image_dict in tqdm(coco_dict["images"], "Loading coco annotations"):
coco_image = CocoImage.from_coco_image_dict(coco_image_dict)
image_id = coco_image_dict["id"]
# https://github.com/obss/sahi/issues/98
if image_id in image_id_set:
print(f"duplicate image_id: {image_id}, will be ignored.")
continue
else:
image_id_set.add(image_id)
# select annotations of the image
annotation_list = image_id_to_annotation_list[image_id]
for coco_annotation_dict in annotation_list:
# apply category remapping if remapping_dict is provided
if coco.remapping_dict is not None:
# apply category remapping (id:id)
category_id = coco.remapping_dict[coco_annotation_dict["category_id"]]
# update category id
coco_annotation_dict["category_id"] = category_id
lock = Lock()

def fill_image_id_set(start, finish, image_list, _image_id_set, _image_id_to_annotation_list, _coco, lock):
for coco_image_dict in tqdm(
image_list[start:finish], f"Loading coco annotations between {start} and {finish}"
):
coco_image = CocoImage.from_coco_image_dict(coco_image_dict)
image_id = coco_image_dict["id"]
# https://github.com/obss/sahi/issues/98
if image_id in _image_id_set:
print(f"duplicate image_id: {image_id}, will be ignored.")
continue
else:
category_id = coco_annotation_dict["category_id"]
# get category name (id:name)
category_name = category_mapping[category_id]
coco_annotation = CocoAnnotation.from_coco_annotation_dict(
category_name=category_name, annotation_dict=coco_annotation_dict
lock.acquire()
_image_id_set.add(image_id)
lock.release()

# select annotations of the image
annotation_list = _image_id_to_annotation_list[image_id]
for coco_annotation_dict in annotation_list:
# apply category remapping if remapping_dict is provided
if _coco.remapping_dict is not None:
# apply category remapping (id:id)
category_id = _coco.remapping_dict[coco_annotation_dict["category_id"]]
# update category id
coco_annotation_dict["category_id"] = category_id
else:
category_id = coco_annotation_dict["category_id"]
# get category name (id:name)
category_name = category_mapping[category_id]
coco_annotation = CocoAnnotation.from_coco_annotation_dict(
category_name=category_name, annotation_dict=coco_annotation_dict
)
coco_image.add_annotation(coco_annotation)
_coco.add_image(coco_image)

chunk_size = dict_size / num_threads

if use_threads is True:
for i in range(num_threads):
start = i * chunk_size
finish = start + chunk_size
if finish > dict_size:
finish = dict_size
t = Thread(
target=fill_image_id_set,
args=(start, finish, coco_dict["images"], image_id_set, image_id_to_annotation_list, coco, lock),
)
coco_image.add_annotation(coco_annotation)
coco.add_image(coco_image)
t.start()

main_thread = threading.currentThread()
for t in threading.enumerate():
if t is not main_thread:
t.join()

else:
for coco_image_dict in tqdm(coco_dict["images"], "Loading coco annotations"):
coco_image = CocoImage.from_coco_image_dict(coco_image_dict)
image_id = coco_image_dict["id"]
# https://github.com/obss/sahi/issues/98
if image_id in image_id_set:
print(f"duplicate image_id: {image_id}, will be ignored.")
continue
else:
image_id_set.add(image_id)
# select annotations of the image
annotation_list = image_id_to_annotation_list[image_id]
for coco_annotation_dict in annotation_list:
# apply category remapping if remapping_dict is provided
if coco.remapping_dict is not None:
# apply category remapping (id:id)
category_id = coco.remapping_dict[coco_annotation_dict["category_id"]]
# update category id
coco_annotation_dict["category_id"] = category_id
else:
category_id = coco_annotation_dict["category_id"]
# get category name (id:name)
category_name = category_mapping[category_id]
coco_annotation = CocoAnnotation.from_coco_annotation_dict(
category_name=category_name, annotation_dict=coco_annotation_dict
)
coco_image.add_annotation(coco_annotation)
coco.add_image(coco_image)

if clip_bboxes_to_img_dims:
coco = coco.get_coco_with_clipped_bboxes()
Expand Down

0 comments on commit f598518

Please sign in to comment.