Skip to content

Commit

Permalink
hash check changes and rm tests ftm
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 committed Nov 16, 2021
1 parent 58740c2 commit a2bff45
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 11 deletions.
1 change: 1 addition & 0 deletions docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Here are all datasets that are available through docTR:
.. autoclass:: DocArtefacts
.. autoclass:: IIIT5K
.. autoclass:: SVT
.. autoclass:: SynthText


Data Loading
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .recognition import *
from .sroie import *
from .svt import *
from .synthtext import *
from .utils import *
from .vocabs import *

Expand Down
18 changes: 7 additions & 11 deletions doctr/datasets/synthtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.

import os
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -51,15 +50,16 @@ def __init__(
# Load mat data
tmp_root = os.path.join(self.root, 'SynthText')
mat_data = sio.loadmat(os.path.join(tmp_root, 'gt.mat'))
split = int(len(mat_data['imnames'][0]) * 0.9)
paths = mat_data['imnames'][0][:split] if self.train else mat_data['imnames'][0][split:]
boxes = mat_data['wordBB'][0][:split] if self.train else mat_data['wordBB'][0][split:]
labels = mat_data['txt'][0][:split] if self.train else mat_data['txt'][0][split:]

self.data: List[Tuple[Path, Dict[str, Any]]] = []
self.data: List[Tuple[str, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32

for img_path, word_boxes, txt in tqdm(iterable=zip(
mat_data['imnames'][0],
mat_data['wordBB'][0],
mat_data['txt'][0]
), desc='Load SynthText', total=len(mat_data['imnames'][0])):
for img_path, word_boxes, txt in tqdm(iterable=zip(paths, boxes, labels),
desc='Load SynthText', total=len(paths)):

# File existence check
if not os.path.exists(os.path.join(tmp_root, img_path[0])):
Expand All @@ -79,10 +79,6 @@ def __init__(

self.data.append((img_path[0], dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=labels)))

if self.train:
self.data = self.data[:int(len(self.data) * 0.9)]
else:
self.data = self.data[int(len(self.data) * 0.9):]
self.root = tmp_root

def extra_repr(self) -> str:
Expand Down

0 comments on commit a2bff45

Please sign in to comment.