Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dataset export facility #813

Merged
merged 44 commits into from
Nov 22, 2019
Merged
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
d3314c0
Add datumaro django application
zhiltsov-max Oct 29, 2019
9d79a5f
Add cvat task datumaro bindings
zhiltsov-max Nov 1, 2019
70fd1f2
Add REST api for task export
zhiltsov-max Nov 1, 2019
de9e2de
Add scheduler service
zhiltsov-max Nov 1, 2019
ef8761a
prod django version compatibility
zhiltsov-max Nov 1, 2019
7d2b926
Add scheduler start to debugger
zhiltsov-max Nov 1, 2019
e19d490
Add Datumaro sources
zhiltsov-max Nov 1, 2019
0faace7
Fix dockerfile
zhiltsov-max Nov 1, 2019
8a523a8
Fix module placement
zhiltsov-max Nov 5, 2019
9c4ea0e
Fix custom modules import
zhiltsov-max Nov 5, 2019
5a62e30
Add remote cvat image extractor
zhiltsov-max Nov 5, 2019
a3a2a8d
Add generic VOC converter
zhiltsov-max Nov 5, 2019
15d19a9
Rename app
zhiltsov-max Nov 7, 2019
352a4d2
Fix dataset conversions
zhiltsov-max Nov 7, 2019
9e2eb39
Update dockerfile
zhiltsov-max Nov 7, 2019
2b60ade
Codacy fixes
zhiltsov-max Nov 8, 2019
8062319
Move datumaro to the root dir
zhiltsov-max Nov 11, 2019
b72cab1
Fix attributes conversion
zhiltsov-max Nov 11, 2019
e1d60c5
Fix dockerfile
zhiltsov-max Nov 12, 2019
490e9ee
Add default subset name
zhiltsov-max Nov 13, 2019
7f68974
Codacy fixes
zhiltsov-max Nov 13, 2019
fae5449
Codacy, requirements, put datumaro in dataset archive
zhiltsov-max Nov 13, 2019
d2f9afb
Codacy
zhiltsov-max Nov 13, 2019
bdf72eb
Codacy
zhiltsov-max Nov 13, 2019
3ba5ed0
Codacy
zhiltsov-max Nov 14, 2019
333e967
Move redis scheduler to cvat
zhiltsov-max Nov 14, 2019
bbdd97b
Bulding enhancements
zhiltsov-max Nov 15, 2019
bdcc71e
Codacy
zhiltsov-max Nov 15, 2019
27e3fec
Include datumaro tests in CI
zhiltsov-max Nov 15, 2019
bdb5b96
Fix voc test
zhiltsov-max Nov 15, 2019
4af1c17
Codacy
zhiltsov-max Nov 15, 2019
a0cd75c
Fix codacy issues
Nov 15, 2019
8f638f4
Codacy
zhiltsov-max Nov 20, 2019
9d7d2d7
Merge branch 'zm/datumaro-integration' of https://github.com/opencv/c…
zhiltsov-max Nov 20, 2019
d46c3b6
Return pretty ctor interface
zhiltsov-max Nov 20, 2019
6766f8d
Codacy
zhiltsov-max Nov 20, 2019
098cf05
Add documentation
zhiltsov-max Nov 21, 2019
6cb4025
Make matplotlib optional
zhiltsov-max Nov 21, 2019
458c550
Codacy
zhiltsov-max Nov 21, 2019
ac50d9c
Codacy
zhiltsov-max Nov 21, 2019
afad234
Codacy
zhiltsov-max Nov 21, 2019
c8c15d1
Move datumaro directory info from model
zhiltsov-max Nov 22, 2019
852379d
Merge branch 'develop' into zm/datumaro-integration
Nov 22, 2019
9aad324
Updated CHANGELOG.md
Nov 22, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Codacy
zhiltsov-max committed Nov 20, 2019
commit 8f638f402c4b3a2bb0965cb6f31b3d49c2b3405f
8 changes: 6 additions & 2 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,8 @@ def __init__(self, url):
items = OrderedDict(items)
self._items = items

self._subsets = None

def __iter__(self):
for item in self._items.values():
yield item
@@ -40,7 +42,7 @@ def __len__(self):
return len(self._items)

def subsets(self):
return None
return self._subsets

def get(self, item_id, subset=None, path=None):
if path or subset:
@@ -75,6 +77,8 @@ def __init__(self, url, db_task, user):
dm_annotations = sorted(dm_annotations, key=lambda e: e[0])
self._items = OrderedDict(dm_annotations)

self._subsets = None

def __iter__(self):
for item in self._items.values():
yield item
@@ -83,7 +87,7 @@ def __len__(self):
return len(self._items)

def subsets(self):
return None
return self._subsets

def get(self, item_id, subset=None, path=None):
if path or subset:
7 changes: 4 additions & 3 deletions datumaro/datumaro/cli/project/diff.py
Original file line number Diff line number Diff line change
@@ -134,7 +134,8 @@ def update_bbox_confusion(self, bbox_diff):
for b_bbox in b_unmatched:
self.bbox_confusion_matrix[(self._UNMATCHED_LABEL, b_bbox.label)] += 1

def draw_text_with_background(self, frame, text, origin,
@classmethod
def draw_text_with_background(cls, frame, text, origin,
font=cv2.FONT_HERSHEY_SIMPLEX, scale=1.0,
color=(0, 0, 0), thickness=1, bgcolor=(1, 1, 1)):
text_size, baseline = cv2.getTextSize(text, font, scale, thickness)
@@ -178,7 +179,7 @@ def get_label_diff_file(self):
return self.label_diff_writer

def save_item_label_diff(self, item_a, item_b, diff):
matches, a_unmatched, b_unmatched = diff
_, a_unmatched, b_unmatched = diff

if 0 < len(a_unmatched) + len(b_unmatched):
if self.output_format is Format.simple:
@@ -198,7 +199,7 @@ def save_item_label_diff(self, item_a, item_b, diff):
'<%s\n' % self.get_label(b_label))

def save_item_bbox_diff(self, item_a, item_b, diff):
matches, mispred, a_unmatched, b_unmatched = diff
_, mispred, a_unmatched, b_unmatched = diff

if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred):
img_a = item_a.image.copy()
2 changes: 2 additions & 0 deletions datumaro/datumaro/components/algorithms/rise.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: MIT

# pylint: disable=unused-variable

import cv2
import numpy as np
from math import ceil
2 changes: 2 additions & 0 deletions datumaro/datumaro/components/comparator.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ def __init__(self,
def iou(box_a, box_b):
return box_a.iou(box_b)

# pylint: disable=no-self-use
def compare_dataset_labels(self, extractor_a, extractor_b):
a_label_cat = extractor_a.categories().get(AnnotationType.label)
b_label_cat = extractor_b.categories().get(AnnotationType.label)
@@ -34,6 +35,7 @@ def compare_dataset_labels(self, extractor_a, extractor_b):
if a_label != b_label:
mismatches.append((a_label, b_label))
return mismatches
# pylint: enable=no-self-use

def compare_item_labels(self, item_a, item_b):
conf_threshold = self.conf_threshold
2 changes: 2 additions & 0 deletions datumaro/datumaro/components/converters/datumaro.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: MIT

# pylint: disable=no-self-use

import cv2
import json
import os
5 changes: 1 addition & 4 deletions datumaro/datumaro/components/converters/ms_coco.py
Original file line number Diff line number Diff line change
@@ -13,10 +13,7 @@

from datumaro.components.converter import Converter
from datumaro.components.extractor import (
DEFAULT_SUBSET_NAME,
AnnotationType, Annotation,
LabelObject, MaskObject, PointsObject, PolygonObject,
PolyLineObject, BboxObject, CaptionObject,
DEFAULT_SUBSET_NAME, AnnotationType, PointsObject, BboxObject
)
from datumaro.components.formats.ms_coco import CocoAnnotationType, CocoPath
from datumaro.util import find
5 changes: 1 addition & 4 deletions datumaro/datumaro/components/converters/voc.py
Original file line number Diff line number Diff line change
@@ -10,10 +10,7 @@
from lxml import etree as ET

from datumaro.components.converter import Converter
from datumaro.components.extractor import (
DEFAULT_SUBSET_NAME,
AnnotationType, LabelObject, MaskObject, BboxObject, CaptionObject,
)
from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType
from datumaro.components.formats.voc import VocLabel, VocAction, \
VocBodyPart, VocPose, VocTask, VocPath, VocColormap, VocInstColormap
from datumaro.util import find
2 changes: 1 addition & 1 deletion datumaro/datumaro/components/extractors/datumaro.py
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ def __init__(self, path):
parsed_anns = None
subsets = {}
for subset_name, subset_path in found_subsets.items():
if subset_name == DatumaroPath.DEFAULT_SUBSET:
if subset_name == DEFAULT_SUBSET_NAME:
subset_name = None
subset = self.Subset(subset_name, self)
with open(subset_path, 'r') as f:
4 changes: 3 additions & 1 deletion datumaro/datumaro/components/extractors/ms_coco.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
AnnotationType,
LabelObject, MaskObject, PointsObject, PolygonObject,
BboxObject, CaptionObject,
LabelCategories, MaskCategories, PointsCategories
LabelCategories, PointsCategories
)
from datumaro.components.formats.ms_coco import CocoAnnotationType, CocoPath
from datumaro.util.image import lazy_image
@@ -117,6 +117,7 @@ def _load_categories(self):
self._load_person_kp_categories(person_kp_loader)
self._categories[AnnotationType.points] = person_kp_categories

# pylint: disable=no-self-use
def _load_label_categories(self, loader):
catIds = loader.getCatIds()
cats = loader.loadCats(catIds)
@@ -128,6 +129,7 @@ def _load_label_categories(self, loader):
categories.add(name=cat['name'], parent=cat['supercategory'])

return categories, label_map
# pylint: enable=no-self-use

def _load_person_kp_categories(self, loader):
catIds = loader.getCatIds()
6 changes: 4 additions & 2 deletions datumaro/datumaro/components/extractors/voc.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@

_inverse_inst_colormap = invert_colormap(VocInstColormap)

# pylint: disable=pointless-statement
def _make_voc_categories():
categories = {}

@@ -41,6 +42,7 @@ def label_id(class_index):
categories[AnnotationType.mask] = mask_categories

return categories
# pylint: enable=pointless-statement

class VocExtractor(Extractor):
class Subset(Extractor):
@@ -558,7 +560,7 @@ def __init__(self, path):
ann_parts = filter(None, ann_dir.strip().split('_'))
if len(ann_parts) != 4:
continue
comp, subset_name, mark = ann_parts
_, subset_name, mark = ann_parts
if mark not in ['cls', 'inst']:
continue

@@ -617,7 +619,7 @@ def __init__(self, path):
ann_parts = filter(None, ann_file.strip().split('_'))
if len(ann_parts) != 4:
continue
comp, mark, subset_name, label = ann_parts
comp, mark, subset_name, _ = ann_parts
if mark != task_desc['mark']:
continue

2 changes: 0 additions & 2 deletions datumaro/datumaro/components/formats/datumaro.py
Original file line number Diff line number Diff line change
@@ -8,7 +8,5 @@ class DatumaroPath:
ANNOTATIONS_DIR = 'annotations'
MASKS_DIR = 'masks'

DEFAULT_SUBSET = '_default'

IMAGE_EXT = '.jpg'
MASK_EXT = '.png'
2 changes: 2 additions & 0 deletions datumaro/datumaro/components/launcher.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
from datumaro.components.extractor import DatasetItem, Extractor


# pylint: disable=no-self-use
class Launcher:
def __init__(self):
pass
@@ -20,6 +21,7 @@ def preferred_input_size(self):

def get_categories(self):
return None
# pylint: enable=no-self-use

class InferenceWrapper(Extractor):
class ItemWrapper(DatasetItem):
6 changes: 4 additions & 2 deletions datumaro/datumaro/components/launchers/openvino.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: MIT

# pylint: disable=exec-used

import cv2
import os
import os.path as osp
@@ -25,11 +27,11 @@ def __init__(self, path):

process_outputs = context['process_outputs']
assert callable(process_outputs)
self.process_outputs = process_outputs
self.__dict__['process_outputs'] = process_outputs

get_categories = context.get('get_categories')
assert callable(get_categories) or get_categories is None
self.get_categories = get_categories
self.__dict__['get_categories'] = get_categories

@staticmethod
def get_categories():
4 changes: 3 additions & 1 deletion datumaro/datumaro/components/project.py
Original file line number Diff line number Diff line change
@@ -705,6 +705,8 @@ def local_model_dir(self, model_name):
def local_source_dir(self, source_name):
return osp.join(self.config.sources_dir, source_name)

# pylint: disable=function-redefined
def load_project_as_dataset(url):
# implement the function declared above
return Project.load(url).make_dataset()
return Project.load(url).make_dataset()
# pylint: enable=function-redefined
2 changes: 2 additions & 0 deletions datumaro/datumaro/util/test_utils.py
Original file line number Diff line number Diff line change
@@ -21,11 +21,13 @@ def __init__(self, path, is_dir=False, ignore_errors=False):
def __enter__(self):
return self

# pylint: disable=redefined-builtin
def __exit__(self, type=None, value=None, traceback=None):
if self.is_dir:
shutil.rmtree(self.path, ignore_errors=self.ignore_errors)
else:
os.remove(self.path)
# pylint: enable=redefined-builtin

class TestDir(FileRemover):
def __init__(self, path=None, ignore_errors=False):