diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py index bcc210e96469..4f23639b442a 100644 --- a/datumaro/datumaro/components/project.py +++ b/datumaro/datumaro/components/project.py @@ -367,6 +367,8 @@ def categories(self): def get(self, item_id, subset=None, path=None): if path: raise KeyError("Requested dataset item path is not found") + if subset is None: + subset = '' return self._subsets[subset].items[item_id] def put(self, item, item_id=None, subset=None, path=None): diff --git a/datumaro/datumaro/plugins/labelme_format.py b/datumaro/datumaro/plugins/labelme_format.py index 22a07d701211..41069da9dab9 100644 --- a/datumaro/datumaro/plugins/labelme_format.py +++ b/datumaro/datumaro/plugins/labelme_format.py @@ -59,7 +59,9 @@ def get_subset(self, name): def _parse(self, path): categories = { - AnnotationType.label: LabelCategories(attributes={'occluded'}) + AnnotationType.label: LabelCategories(attributes={ + 'occluded', 'username' + }) } items = [] @@ -136,10 +138,17 @@ def get_label_id(label): if deleted_elem is not None and deleted_elem.text: deleted = bool(int(deleted_elem.text)) + user = '' + poly_elem = obj_elem.find('polygon') segm_elem = obj_elem.find('segm') type_elem = obj_elem.find('type') # the only value is 'bounding_box' if poly_elem is not None: + user_elem = poly_elem.find('username') + if user_elem is not None and user_elem.text: + user = user_elem.text + attributes.append(('username', user)) + points = [] for point_elem in poly_elem.iter('pt'): x = float(point_elem.find('x').text) @@ -153,20 +162,25 @@ def get_label_id(label): ymin = min(points[1::2]) ymax = max(points[1::2]) ann_items.append(Bbox(xmin, ymin, xmax - xmin, ymax - ymin, - label=label, attributes=attributes, + label=label, attributes=attributes, id=obj_id, )) else: ann_items.append(Polygon(points, - label=label, attributes=attributes, + label=label, attributes=attributes, id=obj_id, )) elif segm_elem is not None: + user_elem = segm_elem.find('username') + if user_elem is not None and user_elem.text: + user = user_elem.text + attributes.append(('username', user)) + mask_path = osp.join(dataset_root, LabelMePath.MASKS_DIR, segm_elem.find('mask').text) if not osp.isfile(mask_path): raise Exception("Can't find mask at '%s'" % mask_path) mask = load_mask(mask_path) mask = np.any(mask, axis=2) - ann_items.append(Mask(image=mask, label=label, + ann_items.append(Mask(image=mask, label=label, id=obj_id, attributes=attributes)) if not deleted: @@ -368,7 +382,7 @@ def _save_item(self, item, subset_dir): ET.SubElement(obj_elem, 'deleted').text = '0' ET.SubElement(obj_elem, 'verified').text = '0' ET.SubElement(obj_elem, 'occluded').text = \ - 'yes' if ann.attributes.get('occluded') == True else 'no' + 'yes' if ann.attributes.pop('occluded', '') == True else 'no' ET.SubElement(obj_elem, 'date').text = '' ET.SubElement(obj_elem, 'id').text = str(obj_id) @@ -390,7 +404,8 @@ def _save_item(self, item, subset_dir): ET.SubElement(point_elem, 'x').text = '%.2f' % x ET.SubElement(point_elem, 'y').text = '%.2f' % y - ET.SubElement(poly_elem, 'username').text = '' + ET.SubElement(poly_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) elif ann.type == AnnotationType.polygon: poly_elem = ET.SubElement(obj_elem, 'polygon') for x, y in zip(ann.points[::2], ann.points[1::2]): @@ -398,7 +413,8 @@ def _save_item(self, item, subset_dir): ET.SubElement(point_elem, 'x').text = '%.2f' % x ET.SubElement(point_elem, 'y').text = '%.2f' % y - ET.SubElement(poly_elem, 'username').text = '' + ET.SubElement(poly_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) elif ann.type == AnnotationType.mask: mask_filename = '%s_mask_%s.png' % (item.id, obj_id) save_image(osp.join(subset_dir, LabelMePath.MASKS_DIR, @@ -416,13 +432,14 @@ def _save_item(self, item, subset_dir): '%.2f' % (bbox[0] + bbox[2]) ET.SubElement(box_elem, 'ymax').text = \ '%.2f' % (bbox[1] + bbox[3]) + + ET.SubElement(segm_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) else: raise NotImplementedError("Unknown shape type '%s'" % ann.type) attrs = [] for k, v in ann.attributes.items(): - if k == 'occluded': - continue if isinstance(v, bool): attrs.append(k) else: diff --git a/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_1.png b/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_1.png new file mode 100644 index 000000000000..a37c5508f9b6 Binary files /dev/null and b/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_1.png differ diff --git a/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_5.png b/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_5.png new file mode 100644 index 000000000000..c20e4871ae4c Binary files /dev/null and b/datumaro/tests/assets/labelme_dataset/Masks/img1_mask_5.png differ diff --git a/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png b/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png new file mode 100644 index 000000000000..6a582819f323 Binary files /dev/null and b/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png differ diff --git a/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png b/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png new file mode 100644 index 000000000000..415e1f88b2ca Binary files /dev/null and b/datumaro/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png differ diff --git a/datumaro/tests/assets/labelme_dataset/img1.png b/datumaro/tests/assets/labelme_dataset/img1.png new file mode 100644 index 000000000000..26f7b564ab91 Binary files /dev/null and b/datumaro/tests/assets/labelme_dataset/img1.png differ diff --git a/datumaro/tests/assets/labelme_dataset/img1.xml b/datumaro/tests/assets/labelme_dataset/img1.xml new file mode 100644 index 000000000000..ff8ae1b46e3b --- /dev/null +++ b/datumaro/tests/assets/labelme_dataset/img1.xml @@ -0,0 +1 @@ +img1.pngexample_folderThe MIT-CSAIL database of objects and scenesLabelMe Webtoolwindow0025-May-2012 00:09:480admin433445344537433777102license plate00no27-Jul-2014 02:58:501brussell58666268img1_mask_1.png58666268img1_scribble_1.pngo100yesa13,415-Nov-2019 14:38:512anonymous3012422124261522181422122712q100nokj215-Nov-2019 14:39:003anonymous352143224028283131223225b100yeshg215-Nov-2019 14:39:094bounding_boxanonymous1319231923301330m100nod615-Nov-2019 14:39:305bounding_boxanonymous56147023img1_mask_5.png55137023img1_scribble_5.pnghg00nogfd lkj lkj hi515-Nov-2019 14:41:576anonymous642174247232623460276222 \ No newline at end of file diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py index 2ec731e33ab3..35fa2ca848b4 100644 --- a/datumaro/tests/test_labelme_format.py +++ b/datumaro/tests/test_labelme_format.py @@ -1,11 +1,14 @@ import numpy as np +import os.path as osp from unittest import TestCase from datumaro.components.extractor import (Extractor, DatasetItem, AnnotationType, Bbox, Mask, Polygon, LabelCategories ) -from datumaro.plugins.labelme_format import LabelMeImporter, LabelMeConverter +from datumaro.components.project import Dataset +from datumaro.plugins.labelme_format import LabelMeExtractor, LabelMeImporter, \ + LabelMeConverter from datumaro.util.test_utils import TestDir, compare_datasets @@ -35,7 +38,8 @@ def __iter__(self): Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={ 'occluded': True }), - Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2), + Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, + attributes={ 'username': 'test' }), Bbox(1, 2, 3, 4, group=3), Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3, attributes={ 'occluded': True } @@ -58,20 +62,28 @@ def __iter__(self): DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ - Bbox(0, 4, 4, 8, label=0, group=2, attributes={ - 'occluded': False - }), - Polygon([0, 4, 4, 4, 5, 6], label=1, attributes={ - 'occluded': True - }), + Bbox(0, 4, 4, 8, label=0, group=2, id=0, + attributes={ + 'occluded': False, 'username': '', + } + ), + Polygon([0, 4, 4, 4, 5, 6], label=1, id=1, + attributes={ + 'occluded': True, 'username': '', + } + ), Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, - attributes={ 'occluded': False } + id=2, attributes={ + 'occluded': False, 'username': 'test' + } ), - Bbox(1, 2, 3, 4, group=1, attributes={ - 'occluded': False + Bbox(1, 2, 3, 4, group=1, id=3, attributes={ + 'occluded': False, 'username': '', }), Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1, - attributes={ 'occluded': True } + id=4, attributes={ + 'occluded': True, 'username': '' + } ), ] ), @@ -90,31 +102,113 @@ def categories(self): SrcExtractor(), LabelMeConverter(save_images=True), test_dir, target_dataset=DstExtractor()) -class LabelMeImporterTest(TestCase): - def test_can_detect(self): - class TestExtractor(Extractor): + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset') + +class LabelMeExtractorTest(TestCase): + def test_can_load(self): + class DstExtractor(Extractor): def __iter__(self): + img1 = np.ones((77, 102, 3)) * 255 + img1[6:32, 7:41] = 0 + + mask1 = np.zeros((77, 102), dtype=int) + mask1[67:69, 58:63] = 1 + + mask2 = np.zeros((77, 102), dtype=int) + mask2[13:25, 54:71] = [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] + return iter([ - DatasetItem(id=1, subset='train', - image=np.ones((16, 16, 3)), + DatasetItem(id='img1', image=img1, annotations=[ - Bbox(0, 4, 4, 8, label=2), + Polygon([43, 34, 45, 34, 45, 37, 43, 37], + label=0, id=0, + attributes={ + 'occluded': False, + 'username': 'admin' + } + ), + Mask(mask1, label=1, id=1, + attributes={ + 'occluded': False, + 'username': 'brussell' + } + ), + Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], + label=2, group=2, id=2, + attributes={ + 'a1': '1', + 'occluded': True, + 'username': 'anonymous' + } + ), + Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], + label=3, group=2, id=3, + attributes={ + 'kj': '1', + 'occluded': False, + 'username': 'anonymous' + } + ), + Bbox(13, 19, 10, 11, label=4, group=2, id=4, + attributes={ + 'hg': '1', + 'occluded': True, + 'username': 'anonymous' + } + ), + Mask(mask2, label=5, group=1, id=5, + attributes={ + 'd': '1', + 'occluded': False, + 'username': 'anonymous' + } + ), + Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], + label=6, group=1, id=6, + attributes={ + 'gfd lkj lkj hi': '1', + 'occluded': False, + 'username': 'anonymous' + } + ), ] ), ]) def categories(self): label_cat = LabelCategories() - for label in range(10): - label_cat.add('label_' + str(label)) + label_cat.add('window') + label_cat.add('license plate') + label_cat.add('o1') + label_cat.add('q1') + label_cat.add('b1') + label_cat.add('m1') + label_cat.add('hg') return { AnnotationType.label: label_cat, } - def generate_dummy(path): - LabelMeConverter()(TestExtractor(), save_dir=path) + parsed = Dataset.from_extractors(LabelMeExtractor(DUMMY_DATASET_DIR)) + compare_datasets(self, expected=DstExtractor(), actual=parsed) - with TestDir() as test_dir: - generate_dummy(test_dir) +class LabelMeImporterTest(TestCase): + def test_can_detect(self): + self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR)) - self.assertTrue(LabelMeImporter.detect(test_dir)) \ No newline at end of file + def test_can_import(self): + parsed = LabelMeImporter()(DUMMY_DATASET_DIR).make_dataset() + self.assertEqual(1, len(parsed))