diff --git a/CHANGELOG.md b/CHANGELOG.md
index 95b2667ced..412ae53e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/987>)
 - Add intermediate skill docs for filter
   (<https://github.com/openvinotoolkit/datumaro/pull/996>)
+- Add VocInstanceSegmentationImporter and VocInstanceSegmentationExporter
+  (<https://github.com/openvinotoolkit/datumaro/pull/997>)
 
 ### Enhancements
 - Use autosummary for fully-automatic Python module docs generation
@@ -29,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/978>)
 - Fix Synthia-Rand data format
   (<https://github.com/openvinotoolkit/datumaro/pull/987>)
+- Fix `person_layout` categories and `action_classification` attributes in imported Pascal-VOC dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/997>)
 
 ## 04/05/2023 - Release 1.2.1
 ### Bug fixes
diff --git a/datumaro/plugins/data_formats/voc/base.py b/datumaro/plugins/data_formats/voc/base.py
index faf2728c5e..9653463f26 100644
--- a/datumaro/plugins/data_formats/voc/base.py
+++ b/datumaro/plugins/data_formats/voc/base.py
@@ -45,11 +45,11 @@
 T = TypeVar("T")
 
 
-class _VocBase(SubsetBase):
+class VocBase(SubsetBase):
     def __init__(
         self,
         path: str,
-        task: VocTask,
+        task: Optional[VocTask] = VocTask.voc,
         *,
         subset: Optional[str] = None,
         ctx: Optional[ImportContext] = None,
@@ -68,19 +68,20 @@ def __init__(
 
         self._categories = self._load_categories(self._dataset_dir)
 
-        label_color = lambda label_idx: self._categories[AnnotationType.mask].colormap.get(
-            label_idx, None
-        )
-        log.debug(
-            "Loaded labels: %s",
-            ", ".join(
-                "'%s' %s" % (l.name, ("(%s, %s, %s)" % c) if c else "")
-                for i, l, c in (
-                    (i, l, label_color(i))
-                    for i, l in enumerate(self._categories[AnnotationType.label].items)
-                )
-            ),
-        )
+        if self._task in [VocTask.voc, VocTask.voc_segmentation, VocTask.voc_instance_segmentation]:
+            label_color = lambda label_idx: self._categories[AnnotationType.mask].colormap.get(
+                label_idx, None
+            )
+            log.debug(
+                "Loaded labels: %s",
+                ", ".join(
+                    "'%s' %s" % (l.name, ("(%s, %s, %s)" % c) if c else "")
+                    for i, l, c in (
+                        (i, l, label_color(i))
+                        for i, l in enumerate(self._categories[AnnotationType.label].items)
+                    )
+                ),
+            )
         self._items = {item: None for item in self._load_subset_list(path)}
 
     def _get_label_id(self, label: str) -> int:
@@ -98,7 +99,7 @@ def _load_categories(self, dataset_path):
             if osp.isfile(label_map_path):
                 label_map = parse_label_map(label_map_path)
 
-        return make_voc_categories(label_map)
+        return make_voc_categories(label_map, self._task)
 
     def _load_subset_list(self, subset_path):
         subset_list = []
@@ -108,7 +109,7 @@ def _load_subset_list(self, subset_path):
                 if not line or line[0] == "#":
                     continue
 
-                if self._task == VocTask.person_layout:
+                if self._task == VocTask.voc_layout:
                     objects = line.split('"')
                     if 1 < len(objects):
                         if len(objects) == 3:
@@ -125,14 +126,7 @@ def _load_subset_list(self, subset_path):
                 subset_list.append(line)
             return subset_list
 
-
-class VocClassificationBase(_VocBase):
-    def __init__(self, path, **kwargs):
-        super().__init__(path, VocTask.classification, **kwargs)
-
     def __iter__(self):
-        annotations = self._load_annotations()
-
         image_dir = osp.join(self._dataset_dir, VocPath.IMAGES_DIR)
         if osp.isdir(image_dir):
             images = {
@@ -142,62 +136,9 @@ def __iter__(self):
         else:
             images = {}
 
-        for item_id in self._ctx.progress_reporter.iter(
-            self._items, desc=f"Parsing labels in '{self._subset}'"
-        ):
-            log.debug("Reading item '%s'", item_id)
-            image = images.get(item_id)
-            if image:
-                image = Image.from_file(path=image)
-            yield DatasetItem(
-                id=item_id, subset=self._subset, media=image, annotations=annotations.get(item_id)
-            )
-
-    def _load_annotations(self):
-        annotations = {}
-        task_dir = osp.dirname(self._path)
-        for label_id, label in enumerate(self._categories[AnnotationType.label]):
-            ann_file = osp.join(task_dir, f"{label.name}_{self._subset}.txt")
-            if not osp.isfile(ann_file):
-                continue
-
-            with open(ann_file, encoding="utf-8") as f:
-                for i, line in enumerate(f):
-                    line = line.strip()
-                    if not line or line[0] == "#":
-                        continue
-
-                    parts = line.rsplit(maxsplit=1)
-                    if len(parts) != 2:
-                        raise InvalidAnnotationError(
-                            f"{osp.basename(ann_file)}:{i+1}: "
-                            "invalid number of fields in line, expected 2"
-                        )
-
-                    item, present = parts
-                    if present not in ["-1", "0", "1"]:
-                        # Both -1 and 0 are used in the original VOC, they mean the same
-                        raise InvalidAnnotationError(
-                            f"{osp.basename(ann_file)}:{i+1}: "
-                            f"unexpected class existence value '{present}', expected -1, 0 or 1"
-                        )
-
-                    if present == "1":
-                        annotations.setdefault(item, []).append(Label(label_id))
-
-        return annotations
-
-
-class _VocXmlBase(_VocBase):
-    def __iter__(self):
-        image_dir = osp.join(self._dataset_dir, VocPath.IMAGES_DIR)
-        if osp.isdir(image_dir):
-            images = {
-                osp.splitext(osp.relpath(p, image_dir))[0].replace("\\", "/"): p
-                for p in find_images(image_dir, recursive=True)
-            }
-        else:
-            images = {}
+        annotations = (
+            self._parse_labels() if self._task in [VocTask.voc, VocTask.voc_classification] else {}
+        )
 
         anno_dir = osp.join(self._dataset_dir, VocPath.ANNOTATIONS_DIR)
 
@@ -208,11 +149,14 @@ def __iter__(self):
             size = None
 
             try:
-                anns = []
+                anns = annotations.get(item_id, [])
                 image = None
 
                 ann_file = osp.join(anno_dir, item_id + ".xml")
-                if osp.isfile(ann_file):
+                if osp.isfile(ann_file) and self._task not in [
+                    VocTask.voc_classification,
+                    VocTask.voc_segmentation,
+                ]:
                     root_elem = ElementTree.parse(ann_file).getroot()
                     if root_elem.tag != "annotation":
                         raise MissingFieldError("annotation")
@@ -226,7 +170,14 @@ def __iter__(self):
                     if filename_elem is not None:
                         image = osp.join(image_dir, filename_elem.text)
 
-                    anns = self._parse_annotations(root_elem, item_id=(item_id, self._subset))
+                    anns += self._parse_annotations(root_elem, item_id=(item_id, self._subset))
+
+                if self._task in [
+                    VocTask.voc,
+                    VocTask.voc_segmentation,
+                    VocTask.voc_instance_segmentation,
+                ]:
+                    anns += self._parse_masks(item_id)
 
                 if image is None:
                     image = images.pop(item_id, None)
@@ -271,76 +222,30 @@ def _parse_bool_field(root, xpath: str, default: bool = False) -> Optional[bool]
             raise InvalidFieldError(xpath)
         return elem.text == "1"
 
-    def _parse_annotations(self, root_elem, *, item_id: Tuple[str, str]) -> List[Annotation]:
-        item_annotations = []
+    def _parse_attribute(self, object_elem):
+        attributes = {}
 
-        for obj_id, object_elem in enumerate(root_elem.iterfind("object")):
-            try:
-                obj_id += 1
-                attributes = {}
-                group = obj_id
-
-                obj_label_id = self._get_label_id(self._parse_field(object_elem, "name"))
+        for key in ["difficult", "truncated", "occluded"]:
+            attributes[key] = self._parse_bool_field(object_elem, key, default=False)
 
-                obj_bbox = self._parse_bbox(object_elem)
-
-                for key in ["difficult", "truncated", "occluded"]:
-                    attributes[key] = self._parse_bool_field(object_elem, key, default=False)
-
-                pose_elem = object_elem.find("pose")
-                if pose_elem is not None:
-                    attributes["pose"] = pose_elem.text
-
-                point_elem = object_elem.find("point")
-                if point_elem is not None:
-                    point_x = self._parse_field(point_elem, "x", float)
-                    point_y = self._parse_field(point_elem, "y", float)
-                    attributes["point"] = (point_x, point_y)
-
-                actions_elem = object_elem.find("actions")
-                actions = {
-                    a: False
-                    for a in self._categories[AnnotationType.label].items[obj_label_id].attributes
-                }
-                if actions_elem is not None:
-                    for action_elem in actions_elem:
-                        actions[action_elem.tag] = self._parse_bool_field(
-                            actions_elem, action_elem.tag
-                        )
-                for action, present in actions.items():
-                    attributes[action] = present
-
-                has_parts = False
-                for part_elem in object_elem.findall("part"):
-                    part_label_id = self._get_label_id(self._parse_field(part_elem, "name"))
-                    part_bbox = self._parse_bbox(part_elem)
-
-                    if self._task is not VocTask.person_layout:
-                        break
-                    has_parts = True
-                    item_annotations.append(Bbox(*part_bbox, label=part_label_id, group=group))
-
-                attributes_elem = object_elem.find("attributes")
-                if attributes_elem is not None:
-                    for attr_elem in attributes_elem.iter("attribute"):
-                        attributes[self._parse_field(attr_elem, "name")] = self._parse_field(
-                            attr_elem, "value"
-                        )
+        pose_elem = object_elem.find("pose")
+        if pose_elem is not None:
+            attributes["pose"] = pose_elem.text
 
-                if self._task is VocTask.person_layout and not has_parts:
-                    continue
-                if self._task is VocTask.action_classification and not actions:
-                    continue
+        point_elem = object_elem.find("point")
+        if point_elem is not None:
+            point_x = self._parse_field(point_elem, "x", float)
+            point_y = self._parse_field(point_elem, "y", float)
+            attributes["point"] = (point_x, point_y)
 
-                item_annotations.append(
-                    Bbox(
-                        *obj_bbox, label=obj_label_id, attributes=attributes, id=obj_id, group=group
-                    )
+        attributes_elem = object_elem.find("attributes")
+        if attributes_elem is not None:
+            for attr_elem in attributes_elem.iter("attribute"):
+                attributes[self._parse_field(attr_elem, "name")] = self._parse_field(
+                    attr_elem, "value"
                 )
-            except Exception as e:
-                self._ctx.error_policy.report_annotation_error(e, item_id=item_id)
 
-        return item_annotations
+        return attributes
 
     @classmethod
     def _parse_bbox(cls, object_elem):
@@ -354,60 +259,65 @@ def _parse_bbox(cls, object_elem):
         ymax = cls._parse_field(bbox_elem, "ymax", float)
         return [xmin, ymin, xmax - xmin, ymax - ymin]
 
+    def _parse_annotations(self, root_elem, *, item_id: Tuple[str, str]) -> List[Annotation]:
+        item_annotations = []
 
-class VocDetectionBase(_VocXmlBase):
-    def __init__(self, path, **kwargs):
-        super().__init__(path, task=VocTask.detection, **kwargs)
-
-
-class VocLayoutBase(_VocXmlBase):
-    def __init__(self, path, **kwargs):
-        super().__init__(path, task=VocTask.person_layout, **kwargs)
-
-
-class VocActionBase(_VocXmlBase):
-    def __init__(self, path, **kwargs):
-        super().__init__(path, task=VocTask.action_classification, **kwargs)
-
+        obj_id = 0
+        for object_elem in root_elem.iterfind("object"):
+            try:
+                label_name = self._parse_field(object_elem, "name")
 
-class VocSegmentationBase(_VocBase):
-    def __init__(self, path, **kwargs):
-        super().__init__(path, task=VocTask.segmentation, **kwargs)
+                # person_layout and action_classification are only available for background and person
+                if self._task in [VocTask.voc_layout, VocTask.voc_action] and (
+                    label_name not in ["person", "background"]
+                ):
+                    continue
 
-    def __iter__(self):
-        image_dir = osp.join(self._dataset_dir, VocPath.IMAGES_DIR)
-        if osp.isdir(image_dir):
-            images = {
-                osp.splitext(osp.relpath(p, image_dir))[0].replace("\\", "/"): p
-                for p in find_images(image_dir, recursive=True)
-            }
-        else:
-            images = {}
+                obj_label_id = self._get_label_id(label_name)
+                obj_bbox = self._parse_bbox(object_elem)
+                attributes = self._parse_attribute(object_elem)
 
-        for item_id in self._ctx.progress_reporter.iter(
-            self._items, desc=f"Parsing segmentation in '{self._subset}'"
-        ):
-            log.debug("Reading item '%s'", item_id)
+                group = obj_id
 
-            image = images.get(item_id)
-            if image:
-                image = Image.from_file(path=image)
+                if self._task in [VocTask.voc, VocTask.voc_layout]:
+                    for part_elem in object_elem.findall("part"):
+                        part_label_id = self._get_label_id(self._parse_field(part_elem, "name"))
+                        part_bbox = self._parse_bbox(part_elem)
+
+                        item_annotations.append(Bbox(*part_bbox, label=part_label_id, group=group))
+
+                if self._task in [VocTask.voc, VocTask.voc_action]:
+                    actions_elem = object_elem.find("actions")
+                    actions = {
+                        a: False
+                        for a in self._categories[AnnotationType.label]
+                        .items[obj_label_id]
+                        .attributes
+                    }
+                    if actions_elem is not None:
+                        for action_elem in actions_elem:
+                            actions[action_elem.tag] = self._parse_bool_field(
+                                actions_elem, action_elem.tag
+                            )
+                    for action, present in actions.items():
+                        attributes[action] = present
 
-            try:
-                yield DatasetItem(
-                    id=item_id,
-                    subset=self._subset,
-                    media=image,
-                    annotations=self._load_annotations(item_id),
+                item_annotations.append(
+                    Bbox(
+                        *obj_bbox, label=obj_label_id, attributes=attributes, id=obj_id, group=group
+                    )
                 )
+                obj_id += 1
             except Exception as e:
-                self._ctx.error_policy.report_item_error(e, item_id=(item_id, self._subset))
+                self._ctx.error_policy.report_annotation_error(e, item_id=item_id)
+
+        return item_annotations
 
     @staticmethod
     def _lazy_extract_mask(mask, c):
         return lambda: mask == c
 
-    def _load_annotations(self, item_id):
+    def _parse_masks(self, item_id):
         item_annotations = []
 
         class_mask = None
@@ -457,3 +367,67 @@ def _load_annotations(self, item_id):
                 item_annotations.append(Mask(image=image, label=label_id))
 
         return item_annotations
+
+    def _parse_labels(self):
+        annotations = {}
+        task_dir = osp.dirname(self._path)
+        for label_id, label in enumerate(self._categories[AnnotationType.label]):
+            ann_file = osp.join(task_dir, f"{label.name}_{self._subset}.txt")
+            if not osp.isfile(ann_file):
+                continue
+
+            with open(ann_file, encoding="utf-8") as f:
+                for i, line in enumerate(f):
+                    line = line.strip()
+                    if not line or line[0] == "#":
+                        continue
+
+                    parts = line.rsplit(maxsplit=1)
+                    if len(parts) != 2:
+                        raise InvalidAnnotationError(
+                            f"{osp.basename(ann_file)}:{i+1}: "
+                            "invalid number of fields in line, expected 2"
+                        )
+
+                    item, present = parts
+                    if present not in ["-1", "0", "1"]:
+                        # Both -1 and 0 are used in the original VOC, they mean the same
+                        raise InvalidAnnotationError(
+                            f"{osp.basename(ann_file)}:{i+1}: "
+                            f"unexpected class existence value '{present}', expected -1, 0 or 1"
+                        )
+
+                    if present == "1":
+                        annotations.setdefault(item, []).append(Label(label_id))
+
+        return annotations
+
+
+class VocClassificationBase(VocBase):
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=VocTask.voc_classification, **kwargs)
+
+
+class VocDetectionBase(VocBase):
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=VocTask.voc_detection, **kwargs)
+
+
+class VocSegmentationBase(VocBase):
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=VocTask.voc_segmentation, **kwargs)
+
+
+class VocInstanceSegmentationBase(VocBase):
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=VocTask.voc_instance_segmentation, **kwargs)
+
+
+class VocLayoutBase(VocBase):
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=VocTask.voc_layout, **kwargs)
+
+
+class VocActionBase(VocBase):
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=VocTask.voc_action, **kwargs)
diff --git a/datumaro/plugins/data_formats/voc/exporter.py b/datumaro/plugins/data_formats/voc/exporter.py
index b15dbf05a4..e3172ac437 100644
--- a/datumaro/plugins/data_formats/voc/exporter.py
+++ b/datumaro/plugins/data_formats/voc/exporter.py
@@ -25,7 +25,7 @@
 )
 from datumaro.components.dataset import ItemStatus
 from datumaro.components.dataset_base import DatasetItem
-from datumaro.components.errors import InvalidAnnotationError, MediaTypeError
+from datumaro.components.errors import DatasetExportError, InvalidAnnotationError, MediaTypeError
 from datumaro.components.exporter import Exporter
 from datumaro.components.media import Image
 from datumaro.util import find, str_to_bool
@@ -72,6 +72,12 @@ def _write_xml_bbox(bbox, parent_elem):
 
 class LabelmapType(Enum):
     voc = auto()
+    voc_classification = auto()
+    voc_detection = auto()
+    voc_segmentation = auto()
+    voc_instance_segmentation = auto()
+    voc_layout = auto()
+    voc_action = auto()
     source = auto()
 
 
@@ -89,7 +95,7 @@ class VocExporter(Exporter):
     BUILTIN_ATTRS = {"difficult", "pose", "truncated", "occluded"}
 
     @staticmethod
-    def _split_tasks_string(s):
+    def _split_task_string(s):
         return [VocTask[i.strip()] for i in s.split(",")]
 
     @staticmethod
@@ -132,10 +138,11 @@ def build_cmdline_parser(cls, **kwargs):
             help="Write subset lists even if they are empty " "(default: %(default)s)",
         )
         parser.add_argument(
-            "--tasks",
-            type=cls._split_tasks_string,
-            help="VOC task filter, comma-separated list of {%s} "
-            "(default: all)" % ", ".join(t.name for t in VocTask),
+            "--task",
+            type=cls._split_task_string,
+            default=VocTask.voc,
+            help="VOC task filter, one of list {%s} "
+            "(default: voc)" % ", ".join(t.name for t in VocTask),
         )
 
         return parser
@@ -144,7 +151,7 @@ def __init__(
         self,
         extractor,
         save_dir,
-        tasks=None,
+        task=None,
         apply_colormap=True,
         label_map=None,
         allow_attributes=True,
@@ -153,14 +160,13 @@ def __init__(
     ):
         super().__init__(extractor, save_dir, **kwargs)
 
-        assert tasks is None or isinstance(tasks, (VocTask, list, set))
-        if tasks is None:
-            tasks = set(VocTask)
-        elif isinstance(tasks, VocTask):
-            tasks = {tasks}
-        else:
-            tasks = set(t if t in VocTask else VocTask[t] for t in tasks)
-        self._tasks = tasks
+        task = VocTask.voc if task is None else task
+        if not isinstance(task, VocTask):
+            raise DatasetExportError(
+                f"The task must be an instance of {VocTask} but {task} is given."
+            )
+
+        self._task = task
 
         self._apply_colormap = apply_colormap
         self._allow_attributes = allow_attributes
@@ -184,10 +190,10 @@ def apply(self):
     def make_dirs(self):
         save_dir = self._save_dir
         subsets_dir = osp.join(save_dir, VocPath.SUBSETS_DIR)
-        cls_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.classification])
-        action_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.action_classification])
-        layout_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.person_layout])
-        segm_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.segmentation])
+        cls_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.voc_classification])
+        action_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.voc_action])
+        layout_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.voc_layout])
+        segm_subsets_dir = osp.join(subsets_dir, VocPath.TASK_DIR[VocTask.voc_segmentation])
         ann_dir = osp.join(save_dir, VocPath.ANNOTATIONS_DIR)
         img_dir = osp.join(save_dir, VocPath.IMAGES_DIR)
         segm_dir = osp.join(save_dir, VocPath.SEGMENTATION_DIR)
@@ -236,20 +242,26 @@ def save_subsets(self):
                 except Exception as e:
                     self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))
 
-            if self._tasks & {
-                VocTask.classification,
-                VocTask.detection,
-                VocTask.action_classification,
-                VocTask.person_layout,
-            }:
+            if self._task in [
+                VocTask.voc,
+                VocTask.voc_classification,
+                VocTask.voc_detection,
+                VocTask.voc_action,
+                VocTask.voc_layout,
+                VocTask.voc_instance_segmentation,
+            ]:
                 self.save_clsdet_lists(subset_name, lists.clsdet_list)
-                if self._tasks & {VocTask.classification}:
+                if self._task in [VocTask.voc, VocTask.voc_classification]:
                     self.save_class_lists(subset_name, lists.class_lists)
-            if self._tasks & {VocTask.action_classification}:
+            if self._task in [VocTask.voc, VocTask.voc_action]:
                 self.save_action_lists(subset_name, lists.action_list)
-            if self._tasks & {VocTask.person_layout}:
+            if self._task in [VocTask.voc, VocTask.voc_layout]:
                 self.save_layout_lists(subset_name, lists.layout_list)
-            if self._tasks & {VocTask.segmentation}:
+            if self._task in [
+                VocTask.voc,
+                VocTask.voc_segmentation,
+                VocTask.voc_instance_segmentation,
+            ]:
                 self.save_segm_lists(subset_name, lists.segm_list)
 
     def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists: _SubsetLists):
@@ -264,7 +276,13 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
             elif isinstance(a, Mask):
                 masks.append(a)
 
-        if self._tasks & {VocTask.detection, VocTask.person_layout, VocTask.action_classification}:
+        if self._task in [
+            VocTask.voc,
+            VocTask.voc_detection,
+            VocTask.voc_instance_segmentation,
+            VocTask.voc_layout,
+            VocTask.voc_action,
+        ]:
             root_elem = ET.Element("annotation")
             if "_" in item.id:
                 folder = item.id[: item.id.find("_")]
@@ -298,10 +316,10 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
             layout_bboxes = []
             for bbox in bboxes:
                 label = self.get_label(bbox.label)
-                if self._is_part(label):
-                    layout_bboxes.append(bbox)
-                elif self._is_label(label):
+                if self._is_label(label):
                     main_bboxes.append(bbox)
+                elif self._is_part(label):
+                    layout_bboxes.append(bbox)
 
             for new_obj_id, obj in enumerate(main_bboxes):
                 attr = obj.attributes
@@ -328,14 +346,16 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
                 if bbox is not None:
                     _write_xml_bbox(bbox, obj_elem)
 
-                for part_bbox in filter(
-                    lambda x: obj.group and obj.group == x.group, layout_bboxes
-                ):
-                    part_elem = ET.SubElement(obj_elem, "part")
-                    ET.SubElement(part_elem, "name").text = self.get_label(part_bbox.label)
-                    _write_xml_bbox(part_bbox.get_bbox(), part_elem)
+                if self._task in [VocTask.voc, VocTask.voc_layout]:
+                    for part_bbox in layout_bboxes:
+                        if part_bbox.group != obj.group:
+                            continue
 
-                    objects_with_parts.append(new_obj_id)
+                        part_elem = ET.SubElement(obj_elem, "part")
+                        ET.SubElement(part_elem, "name").text = self.get_label(part_bbox.label)
+                        _write_xml_bbox(part_bbox.get_bbox(), part_elem)
+
+                        objects_with_parts.append(new_obj_id)
 
                 label_actions = self._get_actions(obj_label)
                 actions_elem = ET.Element("actions")
@@ -349,6 +369,7 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
                         ET.SubElement(actions_elem, action).text = "%d" % present
 
                     objects_with_actions[new_obj_id][action] = present
+
                 if len(actions_elem) != 0:
                     obj_elem.append(actions_elem)
 
@@ -366,11 +387,6 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
                     if len(attrs_elem):
                         obj_elem.append(attrs_elem)
 
-            if self._tasks & {
-                VocTask.detection,
-                VocTask.person_layout,
-                VocTask.action_classification,
-            }:
                 ann_path = osp.join(self._ann_dir, item.id + ".xml")
                 os.makedirs(osp.dirname(ann_path), exist_ok=True)
                 with open(ann_path, "w", encoding="utf-8") as f:
@@ -378,10 +394,10 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
 
             lists.clsdet_list[item.id] = True
 
-            if objects_with_parts:
+            if self._task in [VocTask.voc, VocTask.voc_layout] and objects_with_parts:
                 lists.layout_list[item.id] = objects_with_parts
 
-            if objects_with_actions:
+            if self._task in [VocTask.voc, VocTask.voc_action] and objects_with_actions:
                 lists.action_list[item.id] = objects_with_actions
 
         for label_ann in labels:
@@ -394,7 +410,10 @@ def _export_annotations(self, item: DatasetItem, *, image_filename: str, lists:
 
             lists.clsdet_list[item.id] = True
 
-        if masks and VocTask.segmentation in self._tasks:
+        if (
+            self._task in [VocTask.voc, VocTask.voc_segmentation, VocTask.voc_instance_segmentation]
+            and masks
+        ):
             compiled_mask = CompiledMask.from_instance_masks(
                 masks, instance_labels=[self._label_id_mapping(m.label) for m in masks]
             )
@@ -522,6 +541,7 @@ def save_clsdet_lists(self, subset_name, clsdet_list):
 
         ann_file = osp.join(self._cls_subsets_dir, subset_name + ".txt")
         items = {k: True for k in clsdet_list}
+
         if self._patch and osp.isfile(ann_file):
             self._get_filtered_lines(ann_file, self._patch, subset_name, items)
 
@@ -592,9 +612,11 @@ def save_label_map(self):
             write_label_map(path, self._label_map)
 
     def _load_categories(self, label_map_source):
-        if label_map_source == LabelmapType.voc.name:
-            # use the default VOC colormap
-            label_map = make_voc_label_map()
+        if (
+            label_map_source in [t.name for t in LabelmapType]
+            and label_map_source != LabelmapType.source.name
+        ):
+            label_map = make_voc_label_map(task=self._task)
 
         elif (
             label_map_source == LabelmapType.source.name
@@ -642,13 +664,21 @@ def _load_categories(self, label_map_source):
                 label_map[bg_label] = [color, [], []]
             label_map.move_to_end(bg_label, last=False)
 
-        self._categories = make_voc_categories(label_map)
+        self._categories = make_voc_categories(label_map, task=self._task)
 
         # Update colors with assigned values
-        colormap = self._categories[AnnotationType.mask].colormap
-        for label_id, color in colormap.items():
-            label_desc = label_map[self._categories[AnnotationType.label].items[label_id].name]
-            label_desc[0] = color
+        if label_map_source in [
+            LabelmapType.voc.name,
+            LabelmapType.voc_segmentation.name,
+            LabelmapType.voc_instance_segmentation.name,
+        ]:
+            colormap = self._categories[AnnotationType.mask].colormap
+            for label_id, color in colormap.items():
+                if label_id:
+                    label_desc = label_map[
+                        self._categories[AnnotationType.label].items[label_id].name
+                    ]
+                    label_desc[0] = color
 
         self._label_map = label_map
         self._label_id_mapping = self._make_label_id_map()
@@ -678,7 +708,7 @@ def _make_label_id_map(self):
         )
 
         void_labels = [
-            src_label for src_id, src_label in src_labels.items() if src_label not in dst_labels
+            src_label for src_label in src_labels.values() if src_label not in dst_labels
         ]
         if void_labels:
             log.warning(
@@ -741,11 +771,13 @@ def patch(cls, dataset, patch, save_dir, **kwargs):
             if not to_remove:
                 continue
 
-            if conv._tasks & {
-                VocTask.detection,
-                VocTask.action_classification,
-                VocTask.person_layout,
-            }:
+            if conv._task in [
+                VocTask.voc,
+                VocTask.voc_detection,
+                VocTask.voc_instance_segmentation,
+                VocTask.voc_action,
+                VocTask.voc_layout,
+            ]:
                 ann_path = osp.join(conv._ann_dir, item.id + ".xml")
                 if osp.isfile(ann_path):
                     os.remove(ann_path)
@@ -766,29 +798,35 @@ def patch(cls, dataset, patch, save_dir, **kwargs):
 
 class VocClassificationExporter(VocExporter):
     def __init__(self, *args, **kwargs):
-        kwargs["tasks"] = VocTask.classification
+        kwargs["task"] = VocTask.voc_classification
         super().__init__(*args, **kwargs)
 
 
 class VocDetectionExporter(VocExporter):
     def __init__(self, *args, **kwargs):
-        kwargs["tasks"] = VocTask.detection
+        kwargs["task"] = VocTask.voc_detection
         super().__init__(*args, **kwargs)
 
 
-class VocLayoutExporter(VocExporter):
+class VocSegmentationExporter(VocExporter):
     def __init__(self, *args, **kwargs):
-        kwargs["tasks"] = VocTask.person_layout
+        kwargs["task"] = VocTask.voc_segmentation
         super().__init__(*args, **kwargs)
 
 
-class VocActionExporter(VocExporter):
+class VocInstanceSegmentationExporter(VocExporter):
     def __init__(self, *args, **kwargs):
-        kwargs["tasks"] = VocTask.action_classification
+        kwargs["task"] = VocTask.voc_instance_segmentation
         super().__init__(*args, **kwargs)
 
 
-class VocSegmentationExporter(VocExporter):
+class VocLayoutExporter(VocExporter):
+    def __init__(self, *args, **kwargs):
+        kwargs["task"] = VocTask.voc_layout
+        super().__init__(*args, **kwargs)
+
+
+class VocActionExporter(VocExporter):
     def __init__(self, *args, **kwargs):
-        kwargs["tasks"] = VocTask.segmentation
+        kwargs["task"] = VocTask.voc_action
         super().__init__(*args, **kwargs)
diff --git a/datumaro/plugins/data_formats/voc/format.py b/datumaro/plugins/data_formats/voc/format.py
index ce86af423e..cffb536556 100644
--- a/datumaro/plugins/data_formats/voc/format.py
+++ b/datumaro/plugins/data_formats/voc/format.py
@@ -24,11 +24,13 @@
 
 
 class VocTask(Enum):
-    classification = auto()
-    detection = auto()
-    segmentation = auto()
-    action_classification = auto()
-    person_layout = auto()
+    voc = auto()
+    voc_classification = auto()
+    voc_detection = auto()
+    voc_segmentation = auto()
+    voc_instance_segmentation = auto()
+    voc_action = auto()
+    voc_layout = auto()
 
 
 class VocLabel(Enum):
@@ -116,11 +118,13 @@ class VocPath:
     LABELMAP_FILE = "labelmap.txt"
 
     TASK_DIR = {
-        VocTask.classification: "Main",
-        VocTask.detection: "Main",
-        VocTask.segmentation: "Segmentation",
-        VocTask.action_classification: "Action",
-        VocTask.person_layout: "Layout",
+        VocTask.voc: "Main",
+        VocTask.voc_classification: "Main",
+        VocTask.voc_detection: "Main",
+        VocTask.voc_segmentation: "Segmentation",
+        VocTask.voc_instance_segmentation: "Segmentation",
+        VocTask.voc_action: "Action",
+        VocTask.voc_layout: "Layout",
     }
 
 
@@ -131,11 +135,47 @@ class VocPath:
 # TODO: refactor, make type annotations conform with actual usage
 
 
-def make_voc_label_map() -> LabelMapConfig:
-    labels = sorted(VocLabel, key=lambda l: l.value)
-    label_map = OrderedDict((label.name, [VocColormap[label.value], [], []]) for label in labels)
-    label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart]
-    label_map[VocLabel.person.name][2] = [a.name for a in VocAction]
+def make_voc_label_map(task: VocTask = None) -> LabelMapConfig:
+    if task == VocTask.voc_action:
+        label_map = OrderedDict(
+            {
+                VocLabel.background.name: [VocColormap[VocLabel.background.value], [], []],
+                VocLabel.person.name: [
+                    VocColormap[VocLabel.person.value],
+                    [],
+                    [a.name for a in VocAction],
+                ],
+            }
+        )
+    elif task == VocTask.voc_layout:
+        label_map = OrderedDict(
+            {
+                VocLabel.background.name: [VocColormap[VocLabel.background.value], [], []],
+                VocLabel.person.name: [
+                    VocColormap[VocLabel.person.value],
+                    [p.name for p in VocBodyPart],
+                    [],
+                ],
+            }
+        )
+    elif task in [
+        VocTask.voc_classification,
+        VocTask.voc_detection,
+        VocTask.voc_segmentation,
+        VocTask.voc_instance_segmentation,
+    ]:
+        labels = sorted(VocLabel, key=lambda l: l.value)
+        label_map = OrderedDict(
+            (label.name, [VocColormap[label.value], [], []]) for label in labels
+        )
+    else:
+        labels = sorted(VocLabel, key=lambda l: l.value)
+        label_map = OrderedDict(
+            (label.name, [VocColormap[label.value], [], []]) for label in labels
+        )
+        label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart]
+        label_map[VocLabel.person.name][2] = [a.name for a in VocAction]
+
     return label_map
 
 
@@ -277,9 +317,11 @@ def write_meta_file(path: str, label_map: LabelMapConfig):
     dump_json_file(get_meta_file(path), dataset_meta)
 
 
-def make_voc_categories(label_map: Optional[LabelMapConfig] = None) -> CategoriesInfo:
+def make_voc_categories(
+    label_map: Optional[LabelMapConfig] = None, task: Optional[VocTask] = VocTask.voc
+) -> CategoriesInfo:
     if label_map is None:
-        label_map = make_voc_label_map()
+        label_map = make_voc_label_map(task)
 
     categories = {}
 
@@ -288,10 +330,18 @@ def make_voc_categories(label_map: Optional[LabelMapConfig] = None) -> Categorie
 
     for label, desc in label_map.items():
         label_categories.add(label, attributes=desc[2])
-    for part in OrderedDict((k, None) for k in chain(*(desc[1] for desc in label_map.values()))):
-        label_categories.add(part)
+
+    if task in [VocTask.voc, VocTask.voc_layout]:
+        for part in OrderedDict(
+            (k, None) for k in chain(*(desc[1] for desc in label_map.values()))
+        ):
+            label_categories.add(part)
+
     categories[AnnotationType.label] = label_categories
 
+    if task not in [VocTask.voc, VocTask.voc_segmentation, VocTask.voc_instance_segmentation]:
+        return categories
+
     has_colors = any(v[0] is not None for v in label_map.values())
     if not has_colors:  # generate new colors
         colormap = generate_colormap(len(label_map))
diff --git a/datumaro/plugins/data_formats/voc/importer.py b/datumaro/plugins/data_formats/voc/importer.py
index ab4b8c2662..38aef25b4d 100644
--- a/datumaro/plugins/data_formats/voc/importer.py
+++ b/datumaro/plugins/data_formats/voc/importer.py
@@ -10,13 +10,15 @@
 from .format import VocPath, VocTask
 
 
-class VocImporter(Importer):
+class _VocImporter(Importer):
     _TASKS = {
-        VocTask.classification: ("voc_classification", "Main"),
-        VocTask.detection: ("voc_detection", "Main"),
-        VocTask.segmentation: ("voc_segmentation", "Segmentation"),
-        VocTask.person_layout: ("voc_layout", "Layout"),
-        VocTask.action_classification: ("voc_action", "Action"),
+        VocTask.voc: ("voc", "Main"),
+        VocTask.voc_classification: ("voc_classification", "Main"),
+        VocTask.voc_detection: ("voc_detection", "Main"),
+        VocTask.voc_segmentation: ("voc_segmentation", "Segmentation"),
+        VocTask.voc_instance_segmentation: ("voc_instance_segmentation", "Segmentation"),
+        VocTask.voc_layout: ("voc_layout", "Layout"),
+        VocTask.voc_action: ("voc_action", "Action"),
     }
 
     @classmethod
@@ -25,8 +27,6 @@ def detect(cls, context: FormatDetectionContext) -> None:
         # `voc_detection`, etc. To remove the ambiguity (and thus make it
         # possible to use autodetection with the VOC datasets), disable
         # autodetection for the single-task formats.
-        if len(cls._TASKS) == 1:
-            context.raise_unsupported()
 
         with context.require_any():
             task_dirs = {task_dir for _, task_dir in cls._TASKS.values()}
@@ -64,26 +64,36 @@ def find_sources(cls, path):
         return subsets
 
 
-class VocClassificationImporter(VocImporter):
-    _TASK = VocTask.classification
-    _TASKS = {_TASK: VocImporter._TASKS[_TASK]}
+class VocImporter(_VocImporter):
+    _TASK = VocTask.voc
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
 
 
-class VocDetectionImporter(VocImporter):
-    _TASK = VocTask.detection
-    _TASKS = {_TASK: VocImporter._TASKS[_TASK]}
+class VocClassificationImporter(_VocImporter):
+    _TASK = VocTask.voc_classification
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
 
 
-class VocSegmentationImporter(VocImporter):
-    _TASK = VocTask.segmentation
-    _TASKS = {_TASK: VocImporter._TASKS[_TASK]}
+class VocDetectionImporter(_VocImporter):
+    _TASK = VocTask.voc_detection
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
 
 
-class VocLayoutImporter(VocImporter):
-    _TASK = VocTask.person_layout
-    _TASKS = {_TASK: VocImporter._TASKS[_TASK]}
+class VocSegmentationImporter(_VocImporter):
+    _TASK = VocTask.voc_segmentation
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
 
 
-class VocActionImporter(VocImporter):
-    _TASK = VocTask.action_classification
-    _TASKS = {_TASK: VocImporter._TASKS[_TASK]}
+class VocInstanceSegmentationImporter(_VocImporter):
+    _TASK = VocTask.voc_instance_segmentation
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
+
+
+class VocLayoutImporter(_VocImporter):
+    _TASK = VocTask.voc_layout
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
+
+
+class VocActionImporter(_VocImporter):
+    _TASK = VocTask.voc_action
+    _TASKS = {_TASK: _VocImporter._TASKS[_TASK]}
diff --git a/tests/integration/cli/test_merge.py b/tests/integration/cli/test_merge.py
index 774927936c..8f4985f6ef 100644
--- a/tests/integration/cli/test_merge.py
+++ b/tests/integration/cli/test_merge.py
@@ -181,8 +181,8 @@ def test_can_run_self_merge(self):
                             3,
                             4,
                             label=2,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "score": 0.5,
                                 "occluded": False,
@@ -196,8 +196,8 @@ def test_can_run_self_merge(self):
                             2,
                             3,
                             label=3,
-                            id=2,
-                            group=2,
+                            id=1,
+                            group=1,
                             attributes={
                                 "score": 0.5,
                                 "occluded": False,
@@ -294,8 +294,8 @@ def test_can_run_multimerge(self):
                             3,
                             4,
                             label=2,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "score": 0.5,
                                 "occluded": False,
@@ -309,8 +309,8 @@ def test_can_run_multimerge(self):
                             2,
                             3,
                             label=3,
-                            id=2,
-                            group=2,
+                            id=1,
+                            group=1,
                             attributes={
                                 "score": 0.5,
                                 "occluded": False,
diff --git a/tests/integration/cli/test_voc_format.py b/tests/integration/cli/test_voc_format.py
index b74c4ca41c..6033305640 100644
--- a/tests/integration/cli/test_voc_format.py
+++ b/tests/integration/cli/test_voc_format.py
@@ -96,9 +96,9 @@ def test_preparing_dataset_for_train_model(self):
                             8.0,
                             5.0,
                             attributes={"truncated": False, "occluded": False, "difficult": False},
-                            id=1,
+                            id=0,
                             label=2,
-                            group=1,
+                            group=0,
                         )
                     ],
                 ),
@@ -112,21 +112,21 @@ def test_preparing_dataset_for_train_model(self):
                             4.0,
                             4.0,
                             attributes={"truncated": False, "occluded": False, "difficult": False},
-                            id=1,
+                            id=0,
                             label=3,
-                            group=1,
+                            group=0,
                         )
                     ],
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_detection),
         )
 
         dataset_path = osp.join(DUMMY_DATASETS_DIR, "voc_dataset2")
 
         with TestDir() as test_dir:
             run(self, "project", "create", "-o", test_dir)
-            run(self, "project", "import", "-p", test_dir, "-f", "voc", dataset_path)
+            run(self, "project", "import", "-p", test_dir, "-f", "voc_detection", dataset_path)
 
             run(
                 self,
@@ -163,15 +163,15 @@ def test_preparing_dataset_for_train_model(self):
                 "-p",
                 test_dir,
                 "-f",
-                "voc",
+                "voc_detection",
                 "-o",
                 export_path,
                 "--",
                 "--label-map",
-                "voc",
+                "voc_detection",
             )
 
-            parsed_dataset = Dataset.import_from(export_path, format="voc")
+            parsed_dataset = Dataset.import_from(export_path, format="voc_detection")
             compare_datasets(self, expected_dataset, parsed_dataset)
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -193,9 +193,9 @@ def test_export_to_voc_format(self):
                             4.0,
                             2.0,
                             attributes={"difficult": False, "truncated": False, "occluded": False},
-                            id=1,
+                            id=0,
                             label=3,
-                            group=1,
+                            group=0,
                         ),
                         Bbox(
                             3.0,
@@ -203,9 +203,9 @@ def test_export_to_voc_format(self):
                             2.0,
                             3.0,
                             attributes={"difficult": False, "truncated": False, "occluded": False},
-                            id=2,
+                            id=1,
                             label=5,
-                            group=2,
+                            group=1,
                         ),
                     ],
                 )
@@ -276,9 +276,9 @@ def test_convert_to_voc_format(self):
                                 "visibility": "1.0",
                                 "ignored": "False",
                             },
-                            id=1,
+                            id=0,
                             label=3,
-                            group=1,
+                            group=0,
                         )
                     ],
                 )
@@ -350,7 +350,7 @@ def test_convert_from_voc_format(self):
                 self,
                 "convert",
                 "-if",
-                "voc",
+                "voc_classification",
                 "-i",
                 voc_dir,
                 "-f",
@@ -365,15 +365,14 @@ def test_convert_from_voc_format(self):
             compare_datasets(self, expected_dataset, target_dataset, require_media=True)
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
-    def test_can_save_and_load_voc_dataset(self):
+    def test_can_save_and_load_voc_instance_segmentation_dataset(self):
         source_dataset = Dataset.from_iterable(
             [
                 DatasetItem(
                     id="2007_000001",
                     subset="train",
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
-                    annotations=[Label(i) for i in range(22) if i % 2 == 1]
-                    + [
+                    annotations=[
                         Bbox(
                             4.0,
                             5.0,
@@ -386,7 +385,6 @@ def test_can_save_and_load_voc_dataset(self):
                                 "difficult": False,
                                 "truncated": False,
                                 "occluded": False,
-                                **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
                             },
                         ),
                         Bbox(
@@ -395,8 +393,8 @@ def test_can_save_and_load_voc_dataset(self):
                             2.0,
                             2.0,
                             label=8,
-                            id=2,
-                            group=2,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": True,
@@ -404,7 +402,6 @@ def test_can_save_and_load_voc_dataset(self):
                                 "pose": "Unspecified",
                             },
                         ),
-                        Bbox(5.5, 6.0, 2.0, 2.0, label=22, id=0, group=1),
                         Mask(image=np.ones([10, 20]), label=2, group=1),
                     ],
                 ),
@@ -414,12 +411,18 @@ def test_can_save_and_load_voc_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_instance_segmentation),
         )
 
         voc_dir = osp.join(DUMMY_DATASETS_DIR, "voc_dataset1")
         with TestDir() as test_dir:
-            self._test_can_save_and_load(test_dir, voc_dir, source_dataset, "voc", label_map="voc")
+            self._test_can_save_and_load(
+                test_dir,
+                voc_dir,
+                source_dataset,
+                "voc_instance_segmentation",
+                label_map="voc_instance_segmentation",
+            )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_and_load_voc_layout_dataset(self):
@@ -435,17 +438,16 @@ def test_can_save_and_load_voc_layout_dataset(self):
                             5.0,
                             2.0,
                             2.0,
-                            label=15,
-                            id=1,
-                            group=1,
+                            label=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": False,
                                 "occluded": False,
-                                **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
                             },
                         ),
-                        Bbox(5.5, 6.0, 2.0, 2.0, label=22, id=0, group=1),
+                        Bbox(5.5, 6.0, 2.0, 2.0, label=2, id=0, group=0),
                     ],
                 ),
                 DatasetItem(
@@ -454,27 +456,14 @@ def test_can_save_and_load_voc_layout_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_layout),
         )
 
         dataset_dir = osp.join(DUMMY_DATASETS_DIR, "voc_dataset1")
-        rpath = osp.join("ImageSets", "Layout", "train.txt")
-        matrix = [
-            ("voc_layout", "", ""),
-            ("voc_layout", "train", rpath),
-            ("voc", "train", rpath),
-        ]
-        for format, subset, path in matrix:
-            with self.subTest(format=format, subset=subset, path=path):
-                if subset:
-                    expected = expected_dataset.get_subset(subset)
-                else:
-                    expected = expected_dataset
-
-                with TestDir() as test_dir:
-                    self._test_can_save_and_load(
-                        test_dir, dataset_dir, expected, format, result_path=path, label_map="voc"
-                    )
+        with TestDir() as test_dir:
+            self._test_can_save_and_load(
+                test_dir, dataset_dir, expected_dataset, "voc_layout", label_map="voc_layout"
+            )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_and_load_voc_classification_dataset(self):
@@ -492,7 +481,7 @@ def test_can_save_and_load_voc_classification_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_classification),
         )
 
         dataset_dir = osp.join(DUMMY_DATASETS_DIR, "voc_dataset1")
@@ -510,7 +499,12 @@ def test_can_save_and_load_voc_classification_dataset(self):
 
                 with TestDir() as test_dir:
                     self._test_can_save_and_load(
-                        test_dir, dataset_dir, expected, format, result_path=path, label_map="voc"
+                        test_dir,
+                        dataset_dir,
+                        expected,
+                        format,
+                        result_path=path,
+                        label_map="voc_classification",
                     )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -528,13 +522,12 @@ def test_can_save_and_load_voc_detection_dataset(self):
                             2.0,
                             2.0,
                             label=15,
-                            id=2,
-                            group=2,
+                            id=1,
+                            group=1,
                             attributes={
                                 "difficult": False,
                                 "truncated": False,
                                 "occluded": False,
-                                **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
                             },
                         ),
                         Bbox(
@@ -543,8 +536,8 @@ def test_can_save_and_load_voc_detection_dataset(self):
                             2.0,
                             2.0,
                             label=8,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": True,
@@ -560,7 +553,7 @@ def test_can_save_and_load_voc_detection_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_detection),
         )
 
         dataset_dir = osp.join(DUMMY_DATASETS_DIR, "voc_dataset1")
@@ -578,7 +571,12 @@ def test_can_save_and_load_voc_detection_dataset(self):
 
                 with TestDir() as test_dir:
                     self._test_can_save_and_load(
-                        test_dir, dataset_dir, expected, format, result_path=path, label_map="voc"
+                        test_dir,
+                        dataset_dir,
+                        expected,
+                        format,
+                        result_path=path,
+                        label_map="voc_detection",
                     )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -597,7 +595,7 @@ def test_can_save_and_load_voc_segmentation_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation),
         )
 
         dataset_dir = osp.join(DUMMY_DATASETS_DIR, "voc_dataset1")
@@ -605,7 +603,6 @@ def test_can_save_and_load_voc_segmentation_dataset(self):
         matrix = [
             ("voc_segmentation", "", ""),
             ("voc_segmentation", "train", rpath),
-            ("voc", "train", rpath),
         ]
         for format, subset, path in matrix:
             with self.subTest(format=format, subset=subset, path=path):
@@ -616,7 +613,12 @@ def test_can_save_and_load_voc_segmentation_dataset(self):
 
                 with TestDir() as test_dir:
                     self._test_can_save_and_load(
-                        test_dir, dataset_dir, expected, format, result_path=path, label_map="voc"
+                        test_dir,
+                        dataset_dir,
+                        expected,
+                        format,
+                        result_path=path,
+                        label_map="voc_segmentation",
                     )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -633,9 +635,9 @@ def test_can_save_and_load_voc_action_dataset(self):
                             5.0,
                             2.0,
                             2.0,
-                            label=15,
-                            id=1,
-                            group=1,
+                            label=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": False,
@@ -651,7 +653,7 @@ def test_can_save_and_load_voc_action_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_action),
         )
 
         dataset_dir = osp.join(DUMMY_DATASETS_DIR, "voc_dataset1")
@@ -659,7 +661,6 @@ def test_can_save_and_load_voc_action_dataset(self):
         matrix = [
             ("voc_action", "", ""),
             ("voc_action", "train", rpath),
-            ("voc", "train", rpath),
         ]
         for format, subset, path in matrix:
             with self.subTest(format=format, subset=subset, path=path):
@@ -670,7 +671,12 @@ def test_can_save_and_load_voc_action_dataset(self):
 
                 with TestDir() as test_dir:
                     self._test_can_save_and_load(
-                        test_dir, dataset_dir, expected, format, result_path=path, label_map="voc"
+                        test_dir,
+                        dataset_dir,
+                        expected,
+                        format,
+                        result_path=path,
+                        label_map="voc_action",
                     )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -694,8 +700,8 @@ def test_label_projection_with_masks(self):
                                 "difficult": False,
                                 "occluded": False,
                             },
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                         ),
                     ],
                 ),
diff --git a/tests/integration/cli/test_yolo_format.py b/tests/integration/cli/test_yolo_format.py
index ef82b877e7..f269319ac2 100644
--- a/tests/integration/cli/test_yolo_format.py
+++ b/tests/integration/cli/test_yolo_format.py
@@ -100,7 +100,7 @@ def test_can_convert_voc_to_yolo(self):
                     annotations=[
                         Bbox(1.0, 2.0, 2.0, 2.0, label=8),
                         Bbox(4.0, 5.0, 2.0, 2.0, label=15),
-                        Bbox(5.5, 6, 2, 2, label=22),
+                        Bbox(5.5, 6.0, 2.0, 2.0, label=22),
                     ],
                 ),
                 DatasetItem(
@@ -109,7 +109,10 @@ def test_can_convert_voc_to_yolo(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=[label.name for label in VOC.make_voc_categories()[AnnotationType.label]],
+            categories=[
+                label.name
+                for label in VOC.make_voc_categories(task=VOC.VocTask.voc)[AnnotationType.label]
+            ],
         )
 
         with TestDir() as test_dir:
diff --git a/tests/unit/data_formats/base.py b/tests/unit/data_formats/base.py
index f13a0aa38f..96d5b26efd 100644
--- a/tests/unit/data_formats/base.py
+++ b/tests/unit/data_formats/base.py
@@ -20,9 +20,15 @@ class TestDataFormatBase:
     EXPORTER: Exporter
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
-    def test_can_detect(self, fxt_dataset_dir: str):
+    def test_can_detect(self, fxt_dataset_dir: str, importer: Optional[Importer] = None):
+        if importer is None:
+            importer = getattr(self, "IMPORTER", None)
+
+        if importer is None:
+            pytest.skip(reason="importer is None.")
+
         detected_formats = DEFAULT_ENVIRONMENT.detect_dataset(fxt_dataset_dir)
-        assert [self.IMPORTER.NAME] == detected_formats
+        assert [importer.NAME] == detected_formats
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_import(
@@ -31,9 +37,17 @@ def test_can_import(
         fxt_expected_dataset: Dataset,
         fxt_import_kwargs: Dict[str, Any],
         request: pytest.FixtureRequest,
+        importer: Optional[Importer] = None,
     ):
+        if importer is None:
+            importer = getattr(self, "IMPORTER", None)
+
+        if importer is None:
+            pytest.skip(reason="importer is None.")
+
         helper_tc = request.getfixturevalue("helper_tc")
-        dataset = Dataset.import_from(fxt_dataset_dir, self.IMPORTER.NAME, **fxt_import_kwargs)
+        dataset = Dataset.import_from(fxt_dataset_dir, importer.NAME, **fxt_import_kwargs)
+
         compare_datasets(helper_tc, fxt_expected_dataset, dataset, require_media=True)
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -61,4 +75,5 @@ def test_can_export_and_import(
             fxt_expected_dataset, save_dir=test_dir, save_media=True, **fxt_export_kwargs
         )
         dataset = Dataset.import_from(test_dir, importer.NAME, **fxt_import_kwargs)
+
         compare_datasets(helper_tc, fxt_expected_dataset, dataset, require_media=True)
diff --git a/tests/unit/data_formats/test_voc_format.py b/tests/unit/data_formats/test_voc_format.py
new file mode 100644
index 0000000000..674d54a71e
--- /dev/null
+++ b/tests/unit/data_formats/test_voc_format.py
@@ -0,0 +1,1588 @@
+# Copyright (C) 2023 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+import os
+import os.path as osp
+import pickle
+from typing import Any, Dict
+
+import numpy as np
+import pytest
+from lxml import etree as ElementTree  # nosec
+
+import datumaro.plugins.data_formats.voc.format as VOC
+from datumaro.components.annotation import (
+    AnnotationType,
+    Bbox,
+    Label,
+    LabelCategories,
+    Mask,
+    MaskCategories,
+)
+from datumaro.components.dataset import Dataset
+from datumaro.components.dataset_base import DatasetItem
+from datumaro.components.environment import DEFAULT_ENVIRONMENT
+from datumaro.components.errors import (
+    AnnotationImportError,
+    DatasetImportError,
+    InvalidAnnotationError,
+    ItemImportError,
+)
+from datumaro.components.exporter import Exporter
+from datumaro.components.importer import Importer
+from datumaro.components.media import Image
+from datumaro.plugins.data_formats.voc.exporter import (
+    VocActionExporter,
+    VocClassificationExporter,
+    VocDetectionExporter,
+    VocInstanceSegmentationExporter,
+    VocLayoutExporter,
+    VocSegmentationExporter,
+)
+from datumaro.plugins.data_formats.voc.format import VocTask
+from datumaro.plugins.data_formats.voc.importer import (
+    VocActionImporter,
+    VocClassificationImporter,
+    VocDetectionImporter,
+    VocInstanceSegmentationImporter,
+    VocLayoutImporter,
+    VocSegmentationImporter,
+)
+from datumaro.util.image import save_image
+from datumaro.util.mask_tools import load_mask
+
+from ...requirements import Requirements, mark_requirement
+from .base import TestDataFormatBase
+
+from tests.utils.assets import get_test_asset_path
+from tests.utils.test_utils import compare_datasets
+
+DUMMY_DATASET_DIR = get_test_asset_path("voc_dataset", "voc_dataset1")
+
+
+@pytest.fixture
+def fxt_classification_dataset():
+    return Dataset.from_iterable(
+        [
+            DatasetItem(
+                id="2007_000001",
+                subset="train",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+                annotations=[Label(label=l) for l in range(len(VOC.VocLabel)) if l % 2 == 1],
+            ),
+            DatasetItem(
+                id="2007_000002",
+                subset="test",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+            ),
+        ],
+        categories=VOC.make_voc_categories(task=VocTask.voc_classification),
+    )
+
+
+@pytest.fixture
+def fxt_detection_dataset():
+    return Dataset.from_iterable(
+        [
+            DatasetItem(
+                id="2007_000001",
+                subset="train",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+                annotations=[
+                    Bbox(
+                        1.0,
+                        2.0,
+                        2.0,
+                        2.0,
+                        label=8,
+                        id=0,
+                        group=0,
+                        attributes={
+                            "difficult": False,
+                            "truncated": True,
+                            "occluded": False,
+                            "pose": "Unspecified",
+                        },
+                    ),
+                    Bbox(
+                        4.0,
+                        5.0,
+                        2.0,
+                        2.0,
+                        label=15,
+                        id=1,
+                        group=1,
+                        attributes={
+                            "difficult": False,
+                            "truncated": False,
+                            "occluded": False,
+                        },
+                    ),
+                ],
+            ),
+            DatasetItem(
+                id="2007_000002",
+                subset="test",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+            ),
+        ],
+        categories=VOC.make_voc_categories(task=VocTask.voc_detection),
+    )
+
+
+@pytest.fixture
+def fxt_segmentation_dataset():
+    return Dataset.from_iterable(
+        [
+            DatasetItem(
+                id="2007_000001",
+                subset="train",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+                annotations=[Mask(image=np.ones([10, 20]), label=2, group=1)],
+            ),
+            DatasetItem(
+                id="2007_000002",
+                subset="test",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+            ),
+        ],
+        categories=VOC.make_voc_categories(task=VocTask.voc_segmentation),
+    )
+
+
+@pytest.fixture
+def fxt_layout_dataset():
+    return Dataset.from_iterable(
+        [
+            DatasetItem(
+                id="2007_000001",
+                subset="train",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+                annotations=[
+                    Bbox(
+                        4.0,
+                        5.0,
+                        2.0,
+                        2.0,
+                        label=1,
+                        id=0,
+                        group=0,
+                        attributes={
+                            "difficult": False,
+                            "truncated": False,
+                            "occluded": False,
+                        },
+                    ),
+                    Bbox(5.5, 6.0, 2.0, 2.0, label=2, group=0),
+                ],
+            ),
+            DatasetItem(
+                id="2007_000002",
+                subset="test",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+            ),
+        ],
+        categories=VOC.make_voc_categories(task=VocTask.voc_layout),
+    )
+
+
+@pytest.fixture
+def fxt_action_dataset():
+    return Dataset.from_iterable(
+        [
+            DatasetItem(
+                id="2007_000001",
+                subset="train",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+                annotations=[
+                    Bbox(
+                        4.0,
+                        5.0,
+                        2.0,
+                        2.0,
+                        label=1,
+                        id=0,
+                        group=0,
+                        attributes={
+                            "difficult": False,
+                            "truncated": False,
+                            "occluded": False,
+                            **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
+                        },
+                    )
+                ],
+            ),
+            DatasetItem(
+                id="2007_000002",
+                subset="test",
+                media=Image.from_numpy(data=np.ones((10, 20, 3))),
+            ),
+        ],
+        categories=VOC.make_voc_categories(task=VocTask.voc_action),
+    )
+
+
+class VocFormatImportExportTest(TestDataFormatBase):
+    @pytest.mark.parametrize(
+        "fxt_dataset_dir, importer",
+        [
+            (DUMMY_DATASET_DIR, VocClassificationImporter),
+            (DUMMY_DATASET_DIR, VocDetectionImporter),
+            (DUMMY_DATASET_DIR, VocSegmentationImporter),
+            (DUMMY_DATASET_DIR, VocLayoutImporter),
+            (DUMMY_DATASET_DIR, VocActionImporter),
+        ],
+        ids=["cls", "det", "seg", "layout", "action"],
+    )
+    def test_can_detect(self, fxt_dataset_dir: str, importer: Importer):
+        detected_formats = DEFAULT_ENVIRONMENT.detect_dataset(fxt_dataset_dir)
+        assert importer.NAME in detected_formats
+
+    @pytest.mark.parametrize(
+        [
+            "fxt_dataset_dir",
+            "fxt_expected_dataset",
+            "importer",
+            "fxt_import_kwargs",
+        ],
+        [
+            (DUMMY_DATASET_DIR, "fxt_classification_dataset", VocClassificationImporter, {}),
+            (DUMMY_DATASET_DIR, "fxt_detection_dataset", VocDetectionImporter, {}),
+            (DUMMY_DATASET_DIR, "fxt_segmentation_dataset", VocSegmentationImporter, {}),
+            (DUMMY_DATASET_DIR, "fxt_layout_dataset", VocLayoutImporter, {}),
+            (DUMMY_DATASET_DIR, "fxt_action_dataset", VocActionImporter, {}),
+        ],
+        indirect=["fxt_expected_dataset"],
+        ids=["cls", "det", "seg", "layout", "action"],
+    )
+    def test_can_import(
+        self,
+        fxt_dataset_dir: str,
+        fxt_expected_dataset: Dataset,
+        fxt_import_kwargs: Dict[str, Any],
+        request: pytest.FixtureRequest,
+        importer: Importer,
+    ):
+        return super().test_can_import(
+            fxt_dataset_dir,
+            fxt_expected_dataset,
+            fxt_import_kwargs,
+            request,
+            importer=importer,
+        )
+
+    @pytest.mark.parametrize(
+        "fxt_expected_dataset, exporter, fxt_export_kwargs, importer, fxt_import_kwargs",
+        [
+            (
+                "fxt_classification_dataset",
+                VocClassificationExporter,
+                {"label_map": "voc_classification"},
+                VocClassificationImporter,
+                {},
+            ),
+            (
+                "fxt_detection_dataset",
+                VocDetectionExporter,
+                {"label_map": "voc_detection"},
+                VocDetectionImporter,
+                {},
+            ),
+            (
+                "fxt_segmentation_dataset",
+                VocSegmentationExporter,
+                {"label_map": "voc_segmentation"},
+                VocSegmentationImporter,
+                {},
+            ),
+            (
+                "fxt_layout_dataset",
+                VocLayoutExporter,
+                {"label_map": "voc_layout"},
+                VocLayoutImporter,
+                {},
+            ),
+            (
+                "fxt_action_dataset",
+                VocActionExporter,
+                {"label_map": "voc_action"},
+                VocActionImporter,
+                {},
+            ),
+        ],
+        indirect=["fxt_expected_dataset"],
+    )
+    def test_can_export_and_import(
+        self,
+        fxt_expected_dataset: Dataset,
+        test_dir: str,
+        fxt_import_kwargs: Dict[str, Any],
+        fxt_export_kwargs: Dict[str, Any],
+        request: pytest.FixtureRequest,
+        exporter: Exporter,
+        importer: Importer,
+    ):
+        return super().test_can_export_and_import(
+            fxt_expected_dataset,
+            test_dir,
+            fxt_import_kwargs,
+            fxt_export_kwargs,
+            request,
+            exporter=exporter,
+            importer=importer,
+        )
+
+    @pytest.mark.parametrize(
+        "fxt_dataset_dir,fxt_format",
+        [
+            (DUMMY_DATASET_DIR, "voc_classification"),
+            (DUMMY_DATASET_DIR, "voc_detection"),
+            (DUMMY_DATASET_DIR, "voc_action"),
+            (DUMMY_DATASET_DIR, "voc_layout"),
+            (DUMMY_DATASET_DIR, "voc_detection"),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_pickle(
+        self, fxt_dataset_dir: str, fxt_format: str, request: pytest.FixtureRequest
+    ):
+        source = Dataset.import_from(fxt_dataset_dir, format=fxt_format)
+
+        parsed = pickle.loads(pickle.dumps(source))
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, source, parsed)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_colormap_generator(self):
+        reference = np.array(
+            [
+                [0, 0, 0],
+                [128, 0, 0],
+                [0, 128, 0],
+                [128, 128, 0],
+                [0, 0, 128],
+                [128, 0, 128],
+                [0, 128, 128],
+                [128, 128, 128],
+                [64, 0, 0],
+                [192, 0, 0],
+                [64, 128, 0],
+                [192, 128, 0],
+                [64, 0, 128],
+                [192, 0, 128],
+                [64, 128, 128],
+                [192, 128, 128],
+                [0, 64, 0],
+                [128, 64, 0],
+                [0, 192, 0],
+                [128, 192, 0],
+                [0, 64, 128],
+                [224, 224, 192],  # ignored
+            ]
+        )
+
+        assert np.array_equal(reference, list(VOC.VocColormap.values())) == True
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_write_and_parse_labelmap(self, test_dir: str):
+        src_label_map = VOC.make_voc_label_map()
+        src_label_map["qq"] = [None, ["part1", "part2"], ["act1", "act2"]]
+        src_label_map["ww"] = [(10, 20, 30), [], ["act3"]]
+
+        file_path = osp.join(test_dir, "test.txt")
+
+        VOC.write_label_map(file_path, src_label_map)
+        dst_label_map = VOC.parse_label_map(file_path)
+
+        assert src_label_map == dst_label_map
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_write_and_parse_dataset_meta_file(self, test_dir: str):
+        src_label_map = VOC.make_voc_label_map()
+        src_label_map["qq"] = [None, ["part1", "part2"], ["act1", "act2"]]
+        src_label_map["ww"] = [(10, 20, 30), [], ["act3"]]
+
+        VOC.write_meta_file(test_dir, src_label_map)
+        dst_label_map = VOC.parse_meta_file(test_dir)
+
+        assert src_label_map == dst_label_map
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_report_invalid_line_in_labelmap(self, test_dir: str):
+        path = osp.join(test_dir, "labelmap.txt")
+        with open(path, "w") as f:
+            f.write("a\n")
+
+        with pytest.raises(InvalidAnnotationError) as err_info:
+            VOC.parse_label_map(path)
+        assert (
+            str(err_info.value)
+            == "Label description has wrong number of fields '1'. Expected 4 ':'-separated fields."
+        )
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_report_repeated_label_in_labelmap(self, test_dir: str):
+        path = osp.join(test_dir, "labelmap.txt")
+        with open(path, "w") as f:
+            f.write("a:::\n")
+            f.write("a:::\n")
+
+        with pytest.raises(InvalidAnnotationError) as err_info:
+            VOC.parse_label_map(path)
+        assert str(err_info.value) == "Label 'a' is already defined in the label map"
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_report_invalid_color_in_labelmap(self, test_dir: str):
+        path = osp.join(test_dir, "labelmap.txt")
+        with open(path, "w") as f:
+            f.write("a:10,20::\n")
+
+        with pytest.raises(InvalidAnnotationError) as err_info:
+            VOC.parse_label_map(path)
+        assert (
+            str(err_info.value)
+            == "Label 'a' has wrong color '['10', '20']'. Expected an 'r,g,b' triplet."
+        )
+
+
+class VocFormatPracticeTest:
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_attributes(self, test_dir: str, request: pytest.FixtureRequest):
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id="a",
+                    annotations=[
+                        Bbox(
+                            2,
+                            3,
+                            4,
+                            5,
+                            label=15,
+                            attributes={"occluded": True, "x": 1, "y": "2"},
+                        ),
+                    ],
+                ),
+            ],
+            categories=VOC.make_voc_categories(task=VocTask.voc_detection),
+        )
+
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id="a",
+                    annotations=[
+                        Bbox(
+                            2,
+                            3,
+                            4,
+                            5,
+                            label=15,
+                            id=0,
+                            group=0,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": True,
+                                "x": "1",
+                                "y": "2",  # can only read strings
+                            },
+                        ),
+                    ],
+                ),
+            ],
+            categories=VOC.make_voc_categories(task=VocTask.voc_detection),
+        )
+
+        VocDetectionExporter.convert(
+            src_dataset,
+            save_dir=test_dir,
+            save_media=True,
+        )
+        imported = Dataset.import_from(test_dir, VocDetectionImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_voc_with_custom_labelmap(self, test_dir: str, request: pytest.FixtureRequest):
+        def src_categories():
+            label_cat = LabelCategories()
+            label_cat.add(VOC.VocLabel.cat.name)
+            label_cat.add("non_voc_label")
+            return {
+                AnnotationType.label: label_cat,
+                AnnotationType.mask: MaskCategories(colormap=VOC.generate_colormap(3)),
+            }
+
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    annotations=[
+                        Bbox(2, 3, 4, 5, label=0, id=1),
+                        Bbox(1, 2, 3, 4, label=1, id=2),
+                    ],
+                )
+            ],
+            categories=src_categories(),
+        )
+
+        def dst_categories():
+            label_cat = LabelCategories()
+            label_cat.attributes.update(["difficult", "truncated", "occluded"])
+            label_cat.add(VOC.VocLabel.cat.name)
+            label_cat.add("non_voc_label")
+            return {
+                AnnotationType.label: label_cat,
+            }
+
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    annotations=[
+                        # drop non voc label
+                        Bbox(
+                            2,
+                            3,
+                            4,
+                            5,
+                            label=0,
+                            id=0,
+                            group=0,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        ),
+                        Bbox(
+                            1,
+                            2,
+                            3,
+                            4,
+                            label=1,
+                            id=1,
+                            group=1,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        ),
+                    ],
+                )
+            ],
+            categories=dst_categories(),
+        )
+
+        VocDetectionExporter.convert(
+            src_dataset,
+            save_dir=test_dir,
+            save_media=True,
+        )
+        imported_dataset = Dataset.import_from(test_dir, VocDetectionImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_voc_segm_unpainted(self, test_dir: str, request: pytest.FixtureRequest):
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    subset="a",
+                    annotations=[
+                        # overlapping masks, the first should be truncated
+                        # the second and third are different instances
+                        Mask(image=np.array([[0, 1, 1, 1, 0]]), label=4, z_order=1),
+                        Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3, z_order=2),
+                        Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, z_order=3),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["background", "a", "b", "c", "d"]
+                ),
+                AnnotationType.mask: MaskCategories(colormap=VOC.generate_colormap(5)),
+            },
+        )
+
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    subset="a",
+                    annotations=[
+                        Mask(image=np.array([[0, 0, 1, 0, 0]]), label=4, group=1),
+                        Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3, group=2),
+                        Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, group=3),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["background", "a", "b", "c", "d"],
+                ),
+                AnnotationType.mask: MaskCategories(colormap=VOC.generate_colormap(5)),
+            },
+        )
+
+        VocSegmentationExporter.convert(
+            src_dataset,
+            save_dir=test_dir,
+            save_media=True,
+        )
+        imported_dataset = Dataset.import_from(test_dir, VocSegmentationImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_voc_segm_with_many_instances(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def bit(x, y, shape):
+            mask = np.zeros(shape)
+            mask[y, x] = 1
+            return mask
+
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    subset="a",
+                    annotations=[
+                        Mask(
+                            image=bit(x, y, shape=[10, 10]),
+                            label=3,
+                            z_order=10 * y + x + 1,
+                        )
+                        for y in range(10)
+                        for x in range(10)
+                    ],
+                ),
+            ],
+            categories=VOC.make_voc_categories(task=VocTask.voc_segmentation),
+        )
+
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    subset="a",
+                    annotations=[
+                        Mask(
+                            image=bit(x, y, shape=[10, 10]),
+                            label=3,
+                            group=10 * y + x + 1,
+                        )
+                        for y in range(10)
+                        for x in range(10)
+                    ],
+                ),
+            ],
+            categories=VOC.make_voc_categories(task=VocTask.voc_segmentation),
+        )
+
+        VocSegmentationExporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map="voc_segmentation"
+        )
+        imported_dataset = Dataset.import_from(test_dir, VocSegmentationImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_dataset_with_source_labelmap_undefined(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    annotations=[
+                        Bbox(2, 3, 4, 5, label=0, id=1),
+                        Bbox(1, 2, 3, 4, label=1, id=2),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(["Label_1", "label_2"])
+            },
+        )
+
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    annotations=[
+                        Bbox(
+                            2,
+                            3,
+                            4,
+                            5,
+                            label=1,
+                            id=0,
+                            group=0,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        ),
+                        Bbox(
+                            1,
+                            2,
+                            3,
+                            4,
+                            label=2,
+                            id=1,
+                            group=1,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        ),
+                    ],
+                )
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["background", "Label_1", "label_2"]
+                )
+            },
+        )
+
+        VocDetectionExporter.convert(
+            src_dataset,
+            save_dir=test_dir,
+            save_media=True,
+        )
+        imported_dataset = Dataset.import_from(test_dir, VocDetectionImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_dataset_with_source_labelmap_defined(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    annotations=[
+                        Bbox(2, 3, 4, 5, label=0, id=1),
+                        Bbox(1, 2, 3, 4, label=2, id=2),
+                    ],
+                )
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["label_1", "background", "label_2"]
+                )
+            },
+        )
+
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=1,
+                    annotations=[
+                        Bbox(
+                            2,
+                            3,
+                            4,
+                            5,
+                            label=1,
+                            id=0,
+                            group=0,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        ),
+                        Bbox(
+                            1,
+                            2,
+                            3,
+                            4,
+                            label=2,
+                            id=1,
+                            group=1,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        ),
+                    ],
+                )
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["background", "label_1", "label_2"]
+                )
+            },
+        )
+
+        VocDetectionExporter.convert(
+            src_dataset,
+            save_dir=test_dir,
+            save_media=True,
+        )
+        imported_dataset = Dataset.import_from(test_dir, VocDetectionImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_inplace_save_writes_only_updated_data_with_transforms(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    "3",
+                    subset="test",
+                    media=Image.from_numpy(data=np.ones((2, 3, 3))),
+                    annotations=[
+                        Bbox(
+                            0,
+                            1,
+                            0,
+                            0,
+                            label=3,
+                            id=0,
+                            group=0,
+                        )
+                    ],
+                ),
+                DatasetItem(
+                    "4",
+                    subset="train",
+                    media=Image.from_numpy(data=np.ones((2, 4, 3))),
+                    annotations=[
+                        Bbox(
+                            1,
+                            0,
+                            0,
+                            0,
+                            label=3,
+                            id=0,
+                            group=0,
+                        ),
+                        Mask(np.ones((2, 2)), label=1, group=0),
+                    ],
+                ),
+            ],
+            categories=["a", "b", "c", "d"],
+        )
+
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    "1",
+                    subset="a",
+                    media=Image.from_numpy(data=np.ones((2, 1, 3))),
+                    annotations=[Bbox(0, 0, 0, 1, label=1)],
+                ),
+                DatasetItem(
+                    "2",
+                    subset="b",
+                    media=Image.from_numpy(data=np.ones((2, 2, 3))),
+                    annotations=[
+                        Bbox(0, 0, 1, 0, label=2),
+                        Mask(np.ones((2, 2)), label=1),
+                    ],
+                ),
+                DatasetItem(
+                    "3",
+                    subset="b",
+                    media=Image.from_numpy(data=np.ones((2, 3, 3))),
+                    annotations=[Bbox(0, 1, 0, 0, label=3)],
+                ),
+                DatasetItem(
+                    "4",
+                    subset="c",
+                    media=Image.from_numpy(data=np.ones((2, 4, 3))),
+                    annotations=[Bbox(1, 0, 0, 0, label=3), Mask(np.ones((2, 2)), label=1)],
+                ),
+            ],
+            categories=["a", "b", "c", "d"],
+        )
+
+        src_dataset.export(test_dir, "voc_instance_segmentation", save_media=True)
+
+        src_dataset.filter("/item[id >= 3]")
+        src_dataset.transform("random_split", splits=(("train", 0.5), ("test", 0.5)), seed=42)
+        src_dataset.save(save_media=True)
+
+        assert {"3.xml", "4.xml"} == set(os.listdir(osp.join(test_dir, "Annotations")))
+        assert {"3.jpg", "4.jpg"} == set(os.listdir(osp.join(test_dir, "JPEGImages")))
+        assert {"4.png"} == set(os.listdir(osp.join(test_dir, "SegmentationClass")))
+        assert {"4.png"} == set(os.listdir(osp.join(test_dir, "SegmentationObject")))
+        assert {"train.txt", "test.txt"} == set(os.listdir(osp.join(test_dir, "ImageSets", "Main")))
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, src_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_inplace_save_writes_only_updated_data_with_direct_changes(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        dst_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    1,
+                    subset="a",
+                    media=Image.from_numpy(data=np.ones((1, 2, 3))),
+                    annotations=[
+                        # Bbox(0, 0, 0, 0, label=1) # won't find removed anns
+                    ],
+                ),
+                DatasetItem(
+                    2,
+                    subset="b",
+                    media=Image.from_numpy(data=np.ones((3, 2, 3))),
+                    annotations=[
+                        Bbox(
+                            0,
+                            0,
+                            0,
+                            0,
+                            label=3,
+                            id=0,
+                            group=0,
+                            attributes={
+                                "truncated": False,
+                                "difficult": False,
+                                "occluded": False,
+                            },
+                        )
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["background", "a", "b", "c", "d"]
+                ),
+            },
+        )
+
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    1,
+                    subset="a",
+                    media=Image.from_numpy(data=np.ones((1, 2, 3))),
+                    annotations=[Bbox(0, 0, 0, 0, label=1)],
+                ),
+                DatasetItem(2, subset="b", annotations=[Bbox(0, 0, 0, 0, label=2)]),
+                DatasetItem(
+                    3,
+                    subset="c",
+                    media=Image.from_numpy(data=np.ones((2, 2, 3))),
+                    annotations=[Bbox(0, 0, 0, 0, label=3), Mask(np.ones((2, 2)), label=1)],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    ["background", "a", "b", "c", "d"]
+                ),
+                AnnotationType.mask: MaskCategories(colormap=VOC.generate_colormap(5)),
+            },
+        )
+
+        src_dataset.export(test_dir, "voc_detection", save_media=True)
+        os.unlink(osp.join(test_dir, "Annotations", "1.xml"))
+        os.unlink(osp.join(test_dir, "Annotations", "2.xml"))
+        os.unlink(osp.join(test_dir, "Annotations", "3.xml"))
+
+        src_dataset.put(
+            DatasetItem(
+                2,
+                subset="b",
+                media=Image.from_numpy(data=np.ones((3, 2, 3))),
+                annotations=[Bbox(0, 0, 0, 0, label=3)],
+            )
+        )
+        src_dataset.remove(3, "c")
+        src_dataset.save(save_media=True)
+
+        assert {"2.xml"} == set(os.listdir(osp.join(test_dir, "Annotations")))
+        assert {"1.jpg", "2.jpg"} == set(os.listdir(osp.join(test_dir, "JPEGImages")))
+        assert {"a.txt", "b.txt"} == set(os.listdir(osp.join(test_dir, "ImageSets", "Main")))
+
+        imported_dataset = Dataset.import_from(test_dir, "voc_detection")
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, dst_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_dataset_with_no_data_images(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        src_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id="frame1",
+                    subset="test",
+                    media=Image.from_file(path="frame1.jpg"),
+                    annotations=[
+                        Bbox(
+                            1.0,
+                            2.0,
+                            3.0,
+                            4.0,
+                            attributes={
+                                "difficult": False,
+                                "truncated": False,
+                                "occluded": False,
+                            },
+                            id=0,
+                            label=0,
+                            group=0,
+                        )
+                    ],
+                )
+            ],
+            categories=VOC.make_voc_categories(task=VocTask.voc_detection),
+        )
+
+        VocDetectionExporter.convert(
+            src_dataset,
+            save_dir=test_dir,
+            save_media=True,
+        )
+        imported_dataset = Dataset.import_from(test_dir, VocDetectionImporter.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+    @pytest.mark.parametrize(
+        "fxt_task,fxt_importer,fxt_exporter",
+        [
+            (VocTask.voc_classification, VocClassificationImporter, VocClassificationExporter),
+            (VocTask.voc_detection, VocDetectionImporter, VocDetectionExporter),
+            (VocTask.voc_segmentation, VocSegmentationImporter, VocSegmentationExporter),
+            (
+                VocTask.voc_instance_segmentation,
+                VocInstanceSegmentationImporter,
+                VocInstanceSegmentationExporter,
+            ),
+            (VocTask.voc_layout, VocLayoutImporter, VocLayoutExporter),
+            (VocTask.voc_action, VocActionImporter, VocActionExporter),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_dataset_with_only_images(
+        self, fxt_task, fxt_importer, fxt_exporter, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def dataset_with_images(task):
+            return Dataset.from_iterable(
+                [
+                    DatasetItem(id=1, subset="a", media=Image.from_numpy(data=np.ones([4, 5, 3]))),
+                    DatasetItem(id=2, subset="a", media=Image.from_numpy(data=np.ones([4, 5, 3]))),
+                    DatasetItem(id=3, subset="b", media=Image.from_numpy(data=np.ones([2, 6, 3]))),
+                ],
+                categories=VOC.make_voc_categories(task=task),
+            )
+
+        src_dataset = dataset_with_images(fxt_task)
+
+        fxt_exporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map=fxt_importer.NAME
+        )
+        imported_dataset = Dataset.import_from(test_dir, fxt_importer.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+    @pytest.mark.parametrize(
+        "fxt_task,fxt_importer,fxt_exporter",
+        [
+            (VocTask.voc_classification, VocClassificationImporter, VocClassificationExporter),
+            (VocTask.voc_detection, VocDetectionImporter, VocDetectionExporter),
+            (VocTask.voc_segmentation, VocSegmentationImporter, VocSegmentationExporter),
+            (
+                VocTask.voc_instance_segmentation,
+                VocInstanceSegmentationImporter,
+                VocInstanceSegmentationExporter,
+            ),
+            (VocTask.voc_layout, VocLayoutImporter, VocLayoutExporter),
+            (VocTask.voc_action, VocActionImporter, VocActionExporter),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_dataset_with_no_subsets(
+        self, fxt_task, fxt_importer, fxt_exporter, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def dataset_with_no_subsets(task):
+            return Dataset.from_iterable(
+                [
+                    DatasetItem(id=1),
+                    DatasetItem(id=2),
+                ],
+                categories=VOC.make_voc_categories(task=task),
+            )
+
+        src_dataset = dataset_with_no_subsets(fxt_task)
+
+        fxt_exporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map=fxt_importer.NAME
+        )
+        imported_dataset = Dataset.import_from(test_dir, fxt_importer.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+    @pytest.mark.parametrize(
+        "fxt_task,fxt_importer,fxt_exporter",
+        [
+            (VocTask.voc_classification, VocClassificationImporter, VocClassificationExporter),
+            (VocTask.voc_detection, VocDetectionImporter, VocDetectionExporter),
+            (VocTask.voc_segmentation, VocSegmentationImporter, VocSegmentationExporter),
+            (
+                VocTask.voc_instance_segmentation,
+                VocInstanceSegmentationImporter,
+                VocInstanceSegmentationExporter,
+            ),
+            (VocTask.voc_layout, VocLayoutImporter, VocLayoutExporter),
+            (VocTask.voc_action, VocActionImporter, VocActionExporter),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_dataset_with_spaces_in_filename(
+        self, fxt_task, fxt_importer, fxt_exporter, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def dataset_with_cyrillic_and_spaces_in_filename(task):
+            return Dataset.from_iterable(
+                [
+                    DatasetItem(id="кириллица с пробелом 1"),
+                    DatasetItem(
+                        id="кириллица с пробелом 2",
+                        media=Image.from_numpy(data=np.ones([4, 5, 3])),
+                    ),
+                ],
+                categories=VOC.make_voc_categories(task=task),
+            )
+
+        src_dataset = dataset_with_cyrillic_and_spaces_in_filename(fxt_task)
+
+        fxt_exporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map=fxt_importer.NAME
+        )
+        imported_dataset = Dataset.import_from(test_dir, fxt_importer.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_background_masks_dont_introduce_instances_but_cover_others(self, test_dir: str):
+        dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    1,
+                    media=Image.from_numpy(data=np.zeros((4, 1, 1))),
+                    annotations=[
+                        Mask([1, 1, 1, 1], label=1, attributes={"z_order": 1}),
+                        Mask([0, 0, 1, 1], label=2, attributes={"z_order": 2}),
+                        Mask([0, 0, 1, 1], label=0, attributes={"z_order": 3}),
+                    ],
+                )
+            ],
+            categories=["background", "a", "b"],
+        )
+
+        VocSegmentationExporter.convert(dataset, test_dir, apply_colormap=False)
+
+        cls_mask = load_mask(osp.join(test_dir, "SegmentationClass", "1.png"))
+        inst_mask = load_mask(osp.join(test_dir, "SegmentationObject", "1.png"))
+        assert np.array_equal([0, 1], np.unique(cls_mask))
+        assert np.array_equal([0, 1], np.unique(inst_mask))
+
+    @pytest.mark.parametrize(
+        "fxt_task,fxt_importer,fxt_exporter",
+        [
+            (VocTask.voc_classification, VocClassificationImporter, VocClassificationExporter),
+            (VocTask.voc_detection, VocDetectionImporter, VocDetectionExporter),
+            (VocTask.voc_segmentation, VocSegmentationImporter, VocSegmentationExporter),
+            (
+                VocTask.voc_instance_segmentation,
+                VocInstanceSegmentationImporter,
+                VocInstanceSegmentationExporter,
+            ),
+            (VocTask.voc_layout, VocLayoutImporter, VocLayoutExporter),
+            (VocTask.voc_action, VocActionImporter, VocActionExporter),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_dataset_with_image_info(
+        self, fxt_task, fxt_importer, fxt_exporter, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def dataset_with_image_info(task):
+            return Dataset.from_iterable(
+                [
+                    DatasetItem(id=1, media=Image.from_file(path="1.jpg", size=(10, 15))),
+                ],
+                categories=VOC.make_voc_categories(task=task),
+            )
+
+        src_dataset = dataset_with_image_info(fxt_task)
+
+        fxt_exporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map=fxt_importer.NAME
+        )
+        imported_dataset = Dataset.import_from(test_dir, fxt_importer.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+    @pytest.mark.parametrize(
+        "fxt_task,fxt_importer,fxt_exporter",
+        [
+            (VocTask.voc_classification, VocClassificationImporter, VocClassificationExporter),
+            (VocTask.voc_detection, VocDetectionImporter, VocDetectionExporter),
+            (VocTask.voc_segmentation, VocSegmentationImporter, VocSegmentationExporter),
+            (
+                VocTask.voc_instance_segmentation,
+                VocInstanceSegmentationImporter,
+                VocInstanceSegmentationExporter,
+            ),
+            (VocTask.voc_layout, VocLayoutImporter, VocLayoutExporter),
+            (VocTask.voc_action, VocActionImporter, VocActionExporter),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_and_load_image_with_arbitrary_extension(
+        self, fxt_task, fxt_importer, fxt_exporter, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def dataset_with_arbitrary_extension(task):
+            return Dataset.from_iterable(
+                [
+                    DatasetItem(
+                        id="q/1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG")
+                    ),
+                    DatasetItem(id="a/b/c/2", media=Image.from_numpy(data=np.zeros((3, 4, 3)))),
+                ],
+                categories=VOC.make_voc_categories(task=task),
+            )
+
+        src_dataset = dataset_with_arbitrary_extension(fxt_task)
+
+        fxt_exporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map=fxt_importer.NAME
+        )
+        imported_dataset = Dataset.import_from(test_dir, fxt_importer.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+    @pytest.mark.parametrize(
+        "fxt_task,fxt_importer,fxt_exporter",
+        [
+            (VocTask.voc_classification, VocClassificationImporter, VocClassificationExporter),
+            (VocTask.voc_detection, VocDetectionImporter, VocDetectionExporter),
+            (VocTask.voc_segmentation, VocSegmentationImporter, VocSegmentationExporter),
+            (
+                VocTask.voc_instance_segmentation,
+                VocInstanceSegmentationImporter,
+                VocInstanceSegmentationExporter,
+            ),
+            (VocTask.voc_layout, VocLayoutImporter, VocLayoutExporter),
+            (VocTask.voc_action, VocActionImporter, VocActionExporter),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_save_dataset_with_relative_paths(
+        self, fxt_task, fxt_importer, fxt_exporter, test_dir: str, request: pytest.FixtureRequest
+    ):
+        def dataset_with_relative_paths(task):
+            return Dataset.from_iterable(
+                [
+                    DatasetItem(id="1", media=Image.from_numpy(data=np.ones((4, 2, 3)))),
+                    DatasetItem(id="subdir1/1", media=Image.from_numpy(data=np.ones((2, 6, 3)))),
+                    DatasetItem(id="subdir2/1", media=Image.from_numpy(data=np.ones((5, 4, 3)))),
+                ],
+                categories=VOC.make_voc_categories(task=task),
+            )
+
+        src_dataset = dataset_with_relative_paths(fxt_task)
+
+        fxt_exporter.convert(
+            src_dataset, save_dir=test_dir, save_media=True, label_map=fxt_importer.NAME
+        )
+        imported_dataset = Dataset.import_from(test_dir, fxt_importer.NAME)
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, src_dataset, imported_dataset, require_media=True)
+
+
+class VocFormatErrorTest:
+    # ?xml... must be in the file beginning
+    XML_ANNOTATION_TEMPLATE = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<annotation>
+<filename>a.jpg</filename>
+<size><width>20</width><height>10</height><depth>3</depth></size>
+<object>
+    <name>person</name>
+    <bndbox><xmin>1</xmin><ymin>2</ymin><xmax>3</xmax><ymax>4</ymax></bndbox>
+    <difficult>1</difficult>
+    <truncated>1</truncated>
+    <occluded>1</occluded>
+    <point><x>1</x><y>1</y></point>
+    <attributes><attribute><name>a</name><value>42</value></attribute></attributes>
+    <actions><jumping>1</jumping></actions>
+    <part>
+        <name>head</name>
+        <bndbox><xmin>1</xmin><ymin>2</ymin><xmax>3</xmax><ymax>4</ymax></bndbox>
+    </part>
+</object>
+</annotation>
+    """
+
+    @classmethod
+    def _write_xml_dataset(cls, root_dir, format_dir="Main", mangle_xml=None):
+        subset_file = osp.join(root_dir, "ImageSets", format_dir, "test.txt")
+        if not osp.exists(subset_file):
+            os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write("a\n" if format_dir != "Layout" else "a 0\n")
+
+        ann_file = osp.join(root_dir, "Annotations", "a.xml")
+        if not osp.exists(ann_file):
+            os.makedirs(osp.dirname(ann_file))
+        with open(ann_file, "wb") as f:
+            xml = ElementTree.fromstring(cls.XML_ANNOTATION_TEMPLATE.encode())
+            if mangle_xml:
+                mangle_xml(xml)
+            f.write(ElementTree.tostring(xml))
+
+    @pytest.mark.parametrize(
+        "fxt_format,fxt_format_dir",
+        [
+            ("voc_detection", "Main"),
+            ("voc_layout", "Layout"),
+            ("voc_action", "Action"),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_parse_xml_without_errors(
+        self, fxt_format: str, fxt_format_dir: str, test_dir: str
+    ):
+        self._write_xml_dataset(test_dir, format_dir=fxt_format_dir)
+
+        dataset = Dataset.import_from(test_dir, fxt_format)
+        assert len(dataset) == 1
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_invalid_label_in_xml(self, test_dir: str):
+        def mangle_xml(xml: ElementTree.ElementBase):
+            xml.find("object/name").text = "test"
+
+        self._write_xml_dataset(test_dir, format_dir="Main", mangle_xml=mangle_xml)
+
+        with pytest.raises(AnnotationImportError) as err_info:
+            Dataset.import_from(test_dir, format="voc_detection").init_cache()
+        assert (
+            str(err_info.value)
+            == "Failed to import item ('a', 'test') annotation: Undeclared label 'test'"
+        )
+
+    @pytest.mark.parametrize(
+        "fxt_format,fxt_format_dir",
+        [
+            ("voc_detection", "Main"),
+            ("voc_layout", "Layout"),
+            ("voc_action", "Action"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "fxt_key,fxt_value",
+        [
+            ("size/width", "a"),
+            ("size/height", "a"),
+            ("object/bndbox/xmin", "a"),
+            ("object/bndbox/ymin", "a"),
+            ("object/bndbox/xmax", "a"),
+            ("object/bndbox/ymax", "a"),
+            ("object/occluded", "a"),
+            ("object/difficult", "a"),
+            ("object/truncated", "a"),
+            ("object/point/x", "a"),
+            ("object/point/y", "a"),
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_invalid_field_in_xml(
+        self, fxt_format: str, fxt_format_dir: str, fxt_key: str, fxt_value: str, test_dir: str
+    ):
+        def mangle_xml(xml: ElementTree.ElementBase):
+            xml.find(fxt_key).text = fxt_value
+
+        self._write_xml_dataset(test_dir, format_dir=fxt_format_dir, mangle_xml=mangle_xml)
+
+        with pytest.raises(ItemImportError) as err_info:
+            Dataset.import_from(test_dir, format=fxt_format).init_cache()
+        assert "Invalid annotation field" in str(err_info.value)
+
+    @pytest.mark.parametrize(
+        "fxt_format,fxt_format_dir",
+        [
+            ("voc_detection", "Main"),
+            ("voc_layout", "Layout"),
+            ("voc_action", "Action"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "fxt_key",
+        [
+            "object/name",
+            "object/bndbox",
+            "object/bndbox/xmin",
+            "object/bndbox/ymin",
+            "object/bndbox/xmax",
+            "object/bndbox/ymax",
+            "object/point/x",
+            "object/point/y",
+            "object/attributes/attribute/name",
+            "object/attributes/attribute/value",
+        ],
+    )
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_missing_field_in_xml(
+        self, fxt_format: str, fxt_format_dir: str, fxt_key: str, test_dir: str
+    ):
+        def mangle_xml(xml: ElementTree.ElementBase):
+            for elem in xml.findall(fxt_key):
+                elem.getparent().remove(elem)
+
+        self._write_xml_dataset(test_dir, format_dir=fxt_format_dir, mangle_xml=mangle_xml)
+
+        with pytest.raises(AnnotationImportError) as err_info:
+            Dataset.import_from(test_dir, format=fxt_format).init_cache()
+        assert "Missing annotation field" in str(err_info.value)
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_parse_classification_without_errors(
+        self, test_dir: str, request: pytest.FixtureRequest
+    ):
+        subset_file = osp.join(test_dir, "ImageSets", "Main", "test.txt")
+        os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write("a\n")
+            f.write("b\n")
+            f.write("c\n")
+
+        ann_file = osp.join(test_dir, "ImageSets", "Main", "cat_test.txt")
+        with open(ann_file, "w") as f:
+            f.write("a -1\n")
+            f.write("b 0\n")
+            f.write("c 1\n")
+
+        parsed = Dataset.import_from(test_dir, format="voc_classification")
+
+        expected = Dataset.from_iterable(
+            [
+                DatasetItem("a", subset="test"),
+                DatasetItem("b", subset="test"),
+                DatasetItem("c", subset="test", annotations=[Label(VOC.VocLabel.cat.value)]),
+            ],
+            categories=VOC.make_voc_categories(task=VocTask.voc_classification),
+        )
+
+        helper_tc = request.getfixturevalue("helper_tc")
+        compare_datasets(helper_tc, expected, parsed)
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_missing_field_in_classification(self, test_dir: str):
+        subset_file = osp.join(test_dir, "ImageSets", "Main", "test.txt")
+        os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write("a\n")
+
+        ann_file = osp.join(test_dir, "ImageSets", "Main", "cat_test.txt")
+        with open(ann_file, "w") as f:
+            f.write("a\n")
+
+        with pytest.raises(InvalidAnnotationError) as err_info:
+            Dataset.import_from(test_dir, format="voc_classification").init_cache()
+        assert str(err_info.value) == "cat_test.txt:1: invalid number of fields in line, expected 2"
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_invalid_annotation_value_in_classification(self, test_dir: str):
+        subset_file = osp.join(test_dir, "ImageSets", "Main", "test.txt")
+        os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write("a\n")
+
+        ann_file = osp.join(test_dir, "ImageSets", "Main", "cat_test.txt")
+        with open(ann_file, "w") as f:
+            f.write("a 3\n")
+
+        with pytest.raises(InvalidAnnotationError) as err_info:
+            Dataset.import_from(test_dir, format="voc_classification").init_cache()
+        assert (
+            str(err_info.value)
+            == "cat_test.txt:1: unexpected class existence value '3', expected -1, 0 or 1"
+        )
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_invalid_label_in_segmentation_cls_mask(self, test_dir: str):
+        subset_file = osp.join(test_dir, "ImageSets", "Segmentation", "test.txt")
+        os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write("a\n")
+
+        ann_file = osp.join(test_dir, "SegmentationClass", "a.png")
+        os.makedirs(osp.dirname(ann_file))
+        save_image(ann_file, np.array([[30]], dtype=np.uint8))
+
+        with pytest.raises(AnnotationImportError) as err_info:
+            Dataset.import_from(test_dir, format="voc_segmentation").init_cache()
+        assert (
+            str(err_info.value)
+            == "Failed to import item ('a', 'test') annotation: Undeclared label '30'"
+        )
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_invalid_label_in_segmentation_both_masks(self, test_dir: str):
+        subset_file = osp.join(test_dir, "ImageSets", "Segmentation", "test.txt")
+        os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write("a\n")
+
+        cls_file = osp.join(test_dir, "SegmentationClass", "a.png")
+        os.makedirs(osp.dirname(cls_file))
+        save_image(cls_file, np.array([[30]], dtype=np.uint8))
+
+        inst_file = osp.join(test_dir, "SegmentationObject", "a.png")
+        os.makedirs(osp.dirname(inst_file))
+        save_image(inst_file, np.array([[1]], dtype=np.uint8))
+
+        with pytest.raises(AnnotationImportError) as err_info:
+            Dataset.import_from(test_dir, format="voc_segmentation").init_cache()
+        assert (
+            str(err_info.value)
+            == "Failed to import item ('a', 'test') annotation: Undeclared label '30'"
+        )
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_report_invalid_quotes_in_lists_of_layout_task(self, test_dir: str):
+        subset_file = osp.join(test_dir, "ImageSets", "Layout", "test.txt")
+        os.makedirs(osp.dirname(subset_file))
+        with open(subset_file, "w") as f:
+            f.write('"qwe 1\n')
+
+        with pytest.raises(DatasetImportError) as err_info:
+            Dataset.import_from(test_dir, format="voc_layout").init_cache()
+        assert "Failed to import dataset" in str(err_info.value)
diff --git a/tests/unit/test_voc_format.py b/tests/unit/test_voc_format.py
index c668640269..f2dbb5ed67 100644
--- a/tests/unit/test_voc_format.py
+++ b/tests/unit/test_voc_format.py
@@ -183,8 +183,8 @@ def __iter__(self):
                                         "difficult": False,
                                         "occluded": False,
                                     },
-                                    id=1,
-                                    group=1,
+                                    id=0,
+                                    group=0,
                                 ),
                                 # Only main boxes denote instances (have ids)
                                 Mask(
@@ -204,8 +204,8 @@ def __iter__(self):
                                         "occluded": False,
                                         **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
                                     },
-                                    id=2,
-                                    group=2,
+                                    id=1,
+                                    group=1,
                                 ),
                                 # Only main boxes denote instances (have ids)
                                 Bbox(
@@ -214,7 +214,7 @@ def __iter__(self):
                                     2,
                                     2,
                                     label=self._label(VOC.VocBodyPart(1).name),
-                                    group=2,
+                                    group=1,
                                 ),
                             ],
                         ),
@@ -226,6 +226,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc)
+
         dataset = Dataset.import_from(DUMMY_DATASET_DIR, "voc")
 
         compare_datasets(self, DstExtractor(), dataset)
@@ -249,9 +252,12 @@ def __iter__(self):
                             subset="test",
                             media=Image.from_numpy(data=np.ones((10, 20, 3))),
                         ),
-                    ]
+                    ],
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_classification)
+
         expected_dataset = DstExtractor()
 
         rpath = osp.join("ImageSets", "Main", "train.txt")
@@ -284,17 +290,16 @@ def test_can_import_voc_layout_dataset(self):
                             5.0,
                             2.0,
                             2.0,
-                            label=15,
-                            id=2,
-                            group=2,
+                            label=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": False,
                                 "occluded": False,
-                                **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
                             },
                         ),
-                        Bbox(5.5, 6.0, 2.0, 2.0, label=22, group=2),
+                        Bbox(5.5, 6.0, 2.0, 2.0, label=2, group=0),
                     ],
                 ),
                 DatasetItem(
@@ -303,14 +308,14 @@ def test_can_import_voc_layout_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_layout),
         )
 
         rpath = osp.join("ImageSets", "Layout", "train.txt")
         matrix = [
             ("voc_layout", "", ""),
             ("voc_layout", "train", rpath),
-            ("voc", "train", rpath),
+            # ("voc", "train", rpath),
         ]
         for format, subset, path in matrix:
             with self.subTest(format=format, subset=subset, path=path):
@@ -338,8 +343,8 @@ def test_can_import_voc_detection_dataset(self):
                             2.0,
                             2.0,
                             label=8,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": True,
@@ -353,13 +358,12 @@ def test_can_import_voc_detection_dataset(self):
                             2.0,
                             2.0,
                             label=15,
-                            id=2,
-                            group=2,
+                            id=1,
+                            group=1,
                             attributes={
                                 "difficult": False,
                                 "truncated": False,
                                 "occluded": False,
-                                **{a.name: a.value % 2 == 1 for a in VOC.VocAction},
                             },
                         ),
                     ],
@@ -370,7 +374,7 @@ def test_can_import_voc_detection_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_detection),
         )
 
         rpath = osp.join("ImageSets", "Main", "train.txt")
@@ -405,14 +409,14 @@ def test_can_import_voc_segmentation_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation),
         )
 
         rpath = osp.join("ImageSets", "Segmentation", "train.txt")
         matrix = [
             ("voc_segmentation", "", ""),
             ("voc_segmentation", "train", rpath),
-            ("voc", "train", rpath),
+            # ("voc", "train", rpath),
         ]
         for format, subset, path in matrix:
             with self.subTest(format=format, subset=subset, path=path):
@@ -439,9 +443,9 @@ def test_can_import_voc_action_dataset(self):
                             5.0,
                             2.0,
                             2.0,
-                            label=15,
-                            id=2,
-                            group=2,
+                            label=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": False,
@@ -457,14 +461,14 @@ def test_can_import_voc_action_dataset(self):
                     media=Image.from_numpy(data=np.ones((10, 20, 3))),
                 ),
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_action),
         )
 
         rpath = osp.join("ImageSets", "Action", "train.txt")
         matrix = [
             ("voc_action", "", ""),
             ("voc_action", "train", rpath),
-            ("voc", "train", rpath),
+            # ("voc", "train", rpath),
         ]
         for format, subset, path in matrix:
             with self.subTest(format=format, subset=subset, path=path):
@@ -484,7 +488,7 @@ def test_can_detect_voc(self):
         for path in [DUMMY_DATASET_DIR, DUMMY_DATASET2_DIR]:
             with self.subTest(path=path):
                 detected_formats = env.detect_dataset(path)
-                self.assertEqual([VocImporter.NAME], detected_formats)
+                self.assertIn(VocImporter.NAME, detected_formats)
 
     @mark_requirement(Requirements.DATUM_BUG_583)
     def test_can_import_voc_dataset_with_empty_lines_in_subset_lists(self):
@@ -501,8 +505,8 @@ def test_can_import_voc_dataset_with_empty_lines_in_subset_lists(self):
                             2.0,
                             2.0,
                             label=8,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "difficult": False,
                                 "truncated": True,
@@ -513,7 +517,7 @@ def test_can_import_voc_dataset_with_empty_lines_in_subset_lists(self):
                     ],
                 )
             ],
-            categories=VOC.make_voc_categories(),
+            categories=VOC.make_voc_categories(task=VOC.VocTask.voc_detection),
         )
 
         rpath = osp.join("ImageSets", "Main", "train.txt")
@@ -630,8 +634,8 @@ def test_can_report_invalid_quotes_in_lists_of_layout_task(self):
     def test_can_report_invalid_label_in_xml(self):
         formats = [
             ("voc_detection", "Main"),
-            ("voc_layout", "Layout"),
-            ("voc_action", "Action"),
+            # ("voc_layout", "Layout"),
+            # ("voc_action", "Action"),
         ]
 
         for fmt, fmt_dir in formats:
@@ -665,11 +669,6 @@ def test_can_report_missing_field_in_xml(self):
                     "object/bndbox/ymin",
                     "object/bndbox/xmax",
                     "object/bndbox/ymax",
-                    "object/part/name",
-                    "object/part/bndbox/xmin",
-                    "object/part/bndbox/ymin",
-                    "object/part/bndbox/xmax",
-                    "object/part/bndbox/ymax",
                     "object/point/x",
                     "object/point/y",
                     "object/attributes/attribute/name",
@@ -702,22 +701,17 @@ def test_can_report_invalid_field_in_xml(self):
         for fmt, fmt_dir in formats:
             with self.subTest(fmt=fmt):
                 for key, value in [
+                    ("size/width", "a"),
+                    ("size/height", "a"),
                     ("object/bndbox/xmin", "a"),
                     ("object/bndbox/ymin", "a"),
                     ("object/bndbox/xmax", "a"),
                     ("object/bndbox/ymax", "a"),
-                    ("object/part/bndbox/xmin", "a"),
-                    ("object/part/bndbox/ymin", "a"),
-                    ("object/part/bndbox/xmax", "a"),
-                    ("object/part/bndbox/ymax", "a"),
-                    ("size/width", "a"),
-                    ("size/height", "a"),
                     ("object/occluded", "a"),
                     ("object/difficult", "a"),
                     ("object/truncated", "a"),
                     ("object/point/x", "a"),
                     ("object/point/y", "a"),
-                    ("object/actions/jumping", "a"),
                 ]:
                     with self.subTest(key=key):
                         with TestDir() as test_dir:
@@ -773,7 +767,7 @@ def test_can_parse_classification_without_errors(self):
                     DatasetItem("b", subset="test"),
                     DatasetItem("c", subset="test", annotations=[Label(VOC.VocLabel.cat.value)]),
                 ],
-                categories=VOC.make_voc_categories(),
+                categories=VOC.make_voc_categories(task=VOC.VocTask.voc_classification),
             )
             compare_datasets(self, expected, parsed)
 
@@ -833,14 +827,21 @@ def test_can_report_invalid_label_in_segmentation_both_masks(self):
 
 class VocExporterTest(TestCase):
     def _test_save_and_load(
-        self, source_dataset, converter, test_dir, target_dataset=None, importer_args=None, **kwargs
+        self,
+        source_dataset,
+        converter,
+        test_dir,
+        importer,
+        target_dataset=None,
+        importer_args=None,
+        **kwargs,
     ):
         return check_save_and_load(
             self,
             source_dataset,
             converter,
             test_dir,
-            importer="voc",
+            importer=importer,
             target_dataset=target_dataset,
             importer_args=importer_args,
             **kwargs,
@@ -871,11 +872,15 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_classification)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
-                partial(VocClassificationExporter.convert, label_map="voc"),
+                partial(VocClassificationExporter.convert, label_map="voc_classification"),
                 test_dir,
+                importer="voc_classification",
             )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -916,6 +921,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_detection)
+
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter(
@@ -930,8 +938,8 @@ def __iter__(self):
                                     4,
                                     5,
                                     label=2,
-                                    id=1,
-                                    group=1,
+                                    id=0,
+                                    group=0,
                                     attributes={
                                         "truncated": False,
                                         "difficult": False,
@@ -944,8 +952,8 @@ def __iter__(self):
                                     4,
                                     5,
                                     label=3,
-                                    id=2,
-                                    group=2,
+                                    id=1,
+                                    group=1,
                                     attributes={
                                         "truncated": True,
                                         "difficult": False,
@@ -964,8 +972,8 @@ def __iter__(self):
                                     6,
                                     5,
                                     label=3,
-                                    id=1,
-                                    group=1,
+                                    id=0,
+                                    group=0,
                                     attributes={
                                         "truncated": False,
                                         "difficult": True,
@@ -977,11 +985,15 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_detection)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
-                partial(VocDetectionExporter.convert, label_map="voc"),
+                partial(VocDetectionExporter.convert, label_map="voc_detection"),
                 test_dir,
+                importer="voc_detection",
                 target_dataset=DstExtractor(),
             )
 
@@ -1005,6 +1017,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation)
+
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter(
@@ -1021,11 +1036,15 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
-                partial(VocSegmentationExporter.convert, label_map="voc"),
+                partial(VocSegmentationExporter.convert, label_map="voc_segmentation"),
                 test_dir,
+                importer="voc_segmentation",
                 target_dataset=DstExtractor(),
             )
 
@@ -1049,6 +1068,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation)
+
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter(
@@ -1065,11 +1087,19 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
-                partial(VocSegmentationExporter.convert, label_map="voc", apply_colormap=False),
+                partial(
+                    VocSegmentationExporter.convert,
+                    label_map="voc_segmentation",
+                    apply_colormap=False,
+                ),
                 test_dir,
+                importer="voc_segmentation",
                 target_dataset=DstExtractor(),
             )
 
@@ -1100,6 +1130,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation)
+
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter(
@@ -1120,11 +1153,15 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_segmentation)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
-                partial(VocSegmentationExporter.convert, label_map="voc"),
+                partial(VocSegmentationExporter.convert, label_map="voc_segmentation"),
                 test_dir,
+                importer="voc_segmentation",
                 target_dataset=DstExtractor(),
             )
 
@@ -1143,30 +1180,35 @@ def __iter__(self):
                                     3,
                                     4,
                                     5,
-                                    label=2,
-                                    id=1,
-                                    group=1,
+                                    label=self._label("person"),
+                                    id=0,
+                                    group=0,
                                     attributes={
-                                        "pose": VOC.VocPose(1).name,
                                         "truncated": True,
                                         "difficult": False,
                                         "occluded": False,
                                     },
                                 ),
                                 Bbox(
-                                    2, 3, 1, 1, label=self._label(VOC.VocBodyPart(1).name), group=1
+                                    2, 3, 1, 1, label=self._label(VOC.VocBodyPart(1).name), group=0
                                 ),
                                 Bbox(
-                                    5, 4, 3, 2, label=self._label(VOC.VocBodyPart(2).name), group=1
+                                    5, 4, 3, 2, label=self._label(VOC.VocBodyPart(2).name), group=0
                                 ),
                             ],
                         ),
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_layout)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
-                TestExtractor(), partial(VocLayoutExporter.convert, label_map="voc"), test_dir
+                TestExtractor(),
+                partial(VocLayoutExporter.convert, label_map="voc_layout"),
+                test_dir,
+                importer="voc_layout",
             )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
@@ -1184,12 +1226,7 @@ def __iter__(self):
                                     3,
                                     4,
                                     5,
-                                    label=2,
-                                    attributes={
-                                        "truncated": True,
-                                        VOC.VocAction(1).name: True,
-                                        VOC.VocAction(2).name: True,
-                                    },
+                                    label=0,
                                 ),
                                 Bbox(
                                     5,
@@ -1208,6 +1245,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_action)
+
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter(
@@ -1221,11 +1261,11 @@ def __iter__(self):
                                     3,
                                     4,
                                     5,
-                                    label=2,
-                                    id=1,
-                                    group=1,
+                                    label=0,
+                                    id=0,
+                                    group=0,
                                     attributes={
-                                        "truncated": True,
+                                        "truncated": False,
                                         "difficult": False,
                                         "occluded": False,
                                         # no attributes here in the label categories
@@ -1237,8 +1277,8 @@ def __iter__(self):
                                     3,
                                     2,
                                     label=self._label("person"),
-                                    id=2,
-                                    group=2,
+                                    id=1,
+                                    group=1,
                                     attributes={
                                         "truncated": True,
                                         "difficult": False,
@@ -1257,17 +1297,25 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc_action)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
-                partial(VocActionExporter.convert, label_map="voc", allow_attributes=False),
+                partial(VocActionExporter.convert, label_map="voc_action", allow_attributes=False),
                 test_dir,
+                importer="voc_action",
                 target_dataset=DstExtractor(),
             )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_no_subsets(self):
         class TestExtractor(TestExtractorBase):
+            def __init__(self, task):
+                super().__init__()
+                self._task = task
+
             def __iter__(self):
                 return iter(
                     [
@@ -1276,17 +1324,25 @@ def __iter__(self):
                     ]
                 )
 
-        for task in [None] + list(VOC.VocTask):
+            def categories(self):
+                return VOC.make_voc_categories(task=self._task)
+
+        for task in list(VOC.VocTask):
             with self.subTest(subformat=task), TestDir() as test_dir:
                 self._test_save_and_load(
-                    TestExtractor(),
-                    partial(VocExporter.convert, label_map="voc", tasks=task),
+                    TestExtractor(task),
+                    partial(VocExporter.convert, label_map=task.name, task=task),
                     test_dir,
+                    importer=task.name,
                 )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
         class TestExtractor(TestExtractorBase):
+            def __init__(self, task):
+                super().__init__()
+                self._task = task
+
             def __iter__(self):
                 return iter(
                     [
@@ -1298,18 +1354,26 @@ def __iter__(self):
                     ]
                 )
 
-        for task in [None] + list(VOC.VocTask):
+            def categories(self):
+                return VOC.make_voc_categories(task=self._task)
+
+        for task in list(VOC.VocTask):
             with self.subTest(subformat=task), TestDir() as test_dir:
                 self._test_save_and_load(
-                    TestExtractor(),
-                    partial(VocExporter.convert, label_map="voc", tasks=task, save_media=True),
+                    TestExtractor(task),
+                    partial(VocExporter.convert, label_map=task.name, task=task, save_media=True),
                     test_dir,
+                    importer=task.name,
                     require_media=True,
                 )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_images(self):
         class TestExtractor(TestExtractorBase):
+            def __init__(self, task):
+                super().__init__()
+                self._task = task
+
             def __iter__(self):
                 return iter(
                     [
@@ -1325,12 +1389,16 @@ def __iter__(self):
                     ]
                 )
 
-        for task in [None] + list(VOC.VocTask):
+            def categories(self):
+                return VOC.make_voc_categories(task=self._task)
+
+        for task in list(VOC.VocTask):
             with self.subTest(subformat=task), TestDir() as test_dir:
                 self._test_save_and_load(
-                    TestExtractor(),
-                    partial(VocExporter.convert, label_map="voc", save_media=True, tasks=task),
+                    TestExtractor(task),
+                    partial(VocExporter.convert, label_map=task.name, task=task, save_media=True),
                     test_dir,
+                    importer=task.name,
                     require_media=True,
                 )
 
@@ -1366,8 +1434,8 @@ def __iter__(self):
                             4,
                             5,
                             label=self._label("cat"),
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1385,6 +1453,7 @@ def categories(self):
                 SrcExtractor(),
                 partial(VocExporter.convert, label_map="voc"),
                 test_dir,
+                importer="voc",
                 target_dataset=DstExtractor(),
             )
 
@@ -1419,8 +1488,8 @@ def __iter__(self):
                             4,
                             5,
                             label=self._label("Label_1"),
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1433,8 +1502,8 @@ def __iter__(self):
                             3,
                             4,
                             label=self._label("label_2"),
-                            id=2,
-                            group=2,
+                            id=1,
+                            group=1,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1456,6 +1525,7 @@ def categories(self):
                 SrcExtractor(),
                 partial(VocExporter.convert, label_map="source"),
                 test_dir,
+                importer="voc",
                 target_dataset=DstExtractor(),
             )
 
@@ -1489,8 +1559,8 @@ def __iter__(self):
                             4,
                             5,
                             label=self._label("label_1"),
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1503,8 +1573,8 @@ def __iter__(self):
                             3,
                             4,
                             label=self._label("label_2"),
-                            id=2,
-                            group=2,
+                            id=1,
+                            group=1,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1526,6 +1596,7 @@ def categories(self):
                 SrcExtractor(),
                 partial(VocExporter.convert, label_map="source"),
                 test_dir,
+                importer="voc",
                 target_dataset=DstExtractor(),
             )
 
@@ -1562,8 +1633,8 @@ def __iter__(self):
                             4,
                             5,
                             label=self._label("label_1"),
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "act1": False,
                                 "act2": False,
@@ -1583,6 +1654,7 @@ def categories(self):
                 SrcExtractor(),
                 partial(VocExporter.convert, label_map=label_map, save_dataset_meta=True),
                 test_dir,
+                importer="voc",
                 target_dataset=DstExtractor(),
             )
             self.assertTrue(osp.isfile(osp.join(test_dir, "dataset_meta.json")))
@@ -1642,8 +1714,8 @@ def __iter__(self):
                             3,
                             4,
                             label=self._label("label"),
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "act1": True,
                                 "act2": False,
@@ -1652,8 +1724,8 @@ def __iter__(self):
                                 "occluded": False,
                             },
                         ),
-                        Bbox(2, 3, 4, 5, label=self._label("label_part1"), group=1),
-                        Bbox(2, 3, 4, 6, label=self._label("label_part2"), group=1),
+                        Bbox(2, 3, 4, 5, label=self._label("label_part1"), group=0),
+                        Bbox(2, 3, 4, 6, label=self._label("label_part2"), group=0),
                     ],
                 )
 
@@ -1665,6 +1737,7 @@ def categories(self):
                 SrcExtractor(),
                 partial(VocExporter.convert, label_map=label_map),
                 test_dir,
+                importer="voc",
                 target_dataset=DstExtractor(),
             )
 
@@ -1696,6 +1769,10 @@ def test_background_masks_dont_introduce_instances_but_cover_others(self):
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_image_info(self):
         class TestExtractor(TestExtractorBase):
+            def __init__(self, task):
+                super().__init__()
+                self._task = task
+
             def __iter__(self):
                 return iter(
                     [
@@ -1703,17 +1780,25 @@ def __iter__(self):
                     ]
                 )
 
-        for task in [None] + list(VOC.VocTask):
+            def categories(self):
+                return VOC.make_voc_categories(task=self._task)
+
+        for task in list(VOC.VocTask):
             with self.subTest(subformat=task), TestDir() as test_dir:
                 self._test_save_and_load(
-                    TestExtractor(),
-                    partial(VocExporter.convert, label_map="voc", tasks=task),
+                    TestExtractor(task),
+                    partial(VocExporter.convert, label_map=task.name, task=task),
                     test_dir,
+                    importer=task.name,
                 )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_and_load_image_with_arbitrary_extension(self):
         class TestExtractor(TestExtractorBase):
+            def __init__(self, task):
+                super().__init__()
+                self._task = task
+
             def __iter__(self):
                 return iter(
                     [
@@ -1724,18 +1809,26 @@ def __iter__(self):
                     ]
                 )
 
-        for task in [None] + list(VOC.VocTask):
+            def categories(self):
+                return VOC.make_voc_categories(task=self._task)
+
+        for task in list(VOC.VocTask):
             with self.subTest(subformat=task), TestDir() as test_dir:
                 self._test_save_and_load(
-                    TestExtractor(),
-                    partial(VocExporter.convert, label_map="voc", tasks=task, save_media=True),
+                    TestExtractor(task),
+                    partial(VocExporter.convert, label_map=task.name, task=task, save_media=True),
                     test_dir,
+                    importer=task.name,
                     require_media=True,
                 )
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_relative_paths(self):
         class TestExtractor(TestExtractorBase):
+            def __init__(self, task):
+                super().__init__()
+                self._task = task
+
             def __iter__(self):
                 return iter(
                     [
@@ -1749,12 +1842,16 @@ def __iter__(self):
                     ]
                 )
 
-        for task in [None] + list(VOC.VocTask):
+            def categories(self):
+                return VOC.make_voc_categories(task=self._task)
+
+        for task in list(VOC.VocTask):
             with self.subTest(subformat=task), TestDir() as test_dir:
                 self._test_save_and_load(
-                    TestExtractor(),
-                    partial(VocExporter.convert, label_map="voc", save_media=True, tasks=task),
+                    TestExtractor(task),
+                    partial(VocExporter.convert, label_map=task.name, save_media=True, task=task),
                     test_dir,
+                    importer=task.name,
                     require_media=True,
                 )
 
@@ -1780,6 +1877,9 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc)
+
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter(
@@ -1793,8 +1893,8 @@ def __iter__(self):
                                     4,
                                     5,
                                     label=2,
-                                    id=1,
-                                    group=1,
+                                    id=0,
+                                    group=0,
                                     attributes={
                                         "truncated": False,
                                         "difficult": False,
@@ -1808,11 +1908,15 @@ def __iter__(self):
                     ]
                 )
 
+            def categories(self):
+                return VOC.make_voc_categories(task=VOC.VocTask.voc)
+
         with TestDir() as test_dir:
             self._test_save_and_load(
                 TestExtractor(),
                 partial(VocExporter.convert, label_map="voc"),
                 test_dir,
+                importer="voc",
                 target_dataset=DstExtractor(),
             )
 
@@ -1839,8 +1943,8 @@ def test_inplace_save_writes_only_updated_data_with_direct_changes(self):
                             0,
                             0,
                             label=4,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1919,8 +2023,8 @@ def test_inplace_save_writes_only_updated_data_with_transforms(self):
                             0,
                             0,
                             label=4,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -1940,8 +2044,8 @@ def test_inplace_save_writes_only_updated_data_with_transforms(self):
                             0,
                             0,
                             label=4,
-                            id=1,
-                            group=1,
+                            id=0,
+                            group=0,
                             attributes={
                                 "truncated": False,
                                 "difficult": False,
@@ -2033,9 +2137,9 @@ def __iter__(self):
                                         "truncated": False,
                                         "occluded": False,
                                     },
-                                    id=1,
+                                    id=0,
                                     label=0,
-                                    group=1,
+                                    group=0,
                                 )
                             ],
                         )
@@ -2043,9 +2147,12 @@ def __iter__(self):
                 )
 
             def categories(self):
-                return VOC.make_voc_categories()
+                return VOC.make_voc_categories(task=VOC.VocTask.voc)
 
         with TestDir() as test_dir:
             self._test_save_and_load(
-                TestExtractor(), partial(VocExporter.convert, label_map="voc"), test_dir
+                TestExtractor(),
+                partial(VocExporter.convert, label_map="voc"),
+                test_dir,
+                importer="voc",
             )