From a2bff45a883fa0381616cb19a629ee45993a3d2c Mon Sep 17 00:00:00 2001 From: felixdittrich92 Date: Tue, 16 Nov 2021 08:56:24 +0100 Subject: [PATCH] hash check changes and rm tests ftm --- docs/source/datasets.rst | 1 + doctr/datasets/__init__.py | 1 + doctr/datasets/synthtext.py | 18 +++++++----------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index d0bd5c4358..7d68e136d3 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -21,6 +21,7 @@ Here are all datasets that are available through docTR: .. autoclass:: DocArtefacts .. autoclass:: IIIT5K .. autoclass:: SVT +.. autoclass:: SynthText Data Loading diff --git a/doctr/datasets/__init__.py b/doctr/datasets/__init__.py index 0754a783cf..bef74405df 100644 --- a/doctr/datasets/__init__.py +++ b/doctr/datasets/__init__.py @@ -10,6 +10,7 @@ from .recognition import * from .sroie import * from .svt import * +from .synthtext import * from .utils import * from .vocabs import * diff --git a/doctr/datasets/synthtext.py b/doctr/datasets/synthtext.py index c24eca197a..89f805651c 100644 --- a/doctr/datasets/synthtext.py +++ b/doctr/datasets/synthtext.py @@ -4,7 +4,6 @@ # See LICENSE or go to for full license details. import os -from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple import numpy as np @@ -51,15 +50,16 @@ def __init__( # Load mat data tmp_root = os.path.join(self.root, 'SynthText') mat_data = sio.loadmat(os.path.join(tmp_root, 'gt.mat')) + split = int(len(mat_data['imnames'][0]) * 0.9) + paths = mat_data['imnames'][0][:split] if self.train else mat_data['imnames'][0][split:] + boxes = mat_data['wordBB'][0][:split] if self.train else mat_data['wordBB'][0][split:] + labels = mat_data['txt'][0][:split] if self.train else mat_data['txt'][0][split:] - self.data: List[Tuple[Path, Dict[str, Any]]] = [] + self.data: List[Tuple[str, Dict[str, Any]]] = [] np_dtype = np.float16 if self.fp16 else np.float32 - for img_path, word_boxes, txt in tqdm(iterable=zip( - mat_data['imnames'][0], - mat_data['wordBB'][0], - mat_data['txt'][0] - ), desc='Load SynthText', total=len(mat_data['imnames'][0])): + for img_path, word_boxes, txt in tqdm(iterable=zip(paths, boxes, labels), + desc='Load SynthText', total=len(paths)): # File existence check if not os.path.exists(os.path.join(tmp_root, img_path[0])): @@ -79,10 +79,6 @@ def __init__( self.data.append((img_path[0], dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=labels))) - if self.train: - self.data = self.data[:int(len(self.data) * 0.9)] - else: - self.data = self.data[int(len(self.data) * 0.9):] self.root = tmp_root def extra_repr(self) -> str: