Skip to content

Commit

Permalink
Merge branch 'main' into artefact-update
Browse files Browse the repository at this point in the history
  • Loading branch information
fg-mindee committed Nov 12, 2021
2 parents bd74611 + ab26073 commit 0c7c672
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,8 @@ dmypy.json
doctr/version.py
logs/
wandb/

# Checkpoints
*.pt
*.pb
*.index
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ We try to keep framework-specific dependencies to a minimum. You can install fra

```shell
# for TensorFlow
pip install python-doctr[tf]
pip install "python-doctr[tf]"
# for PyTorch
pip install python-doctr[torch]
pip install "python-doctr[torch]"
```

### Developer mode
Expand Down
1 change: 1 addition & 0 deletions docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Here are all datasets that are available through docTR:
.. autoclass:: OCRDataset
.. autoclass:: CharacterGenerator
.. autoclass:: DocArtefacts
.. autoclass:: IIIT5K


Data Loading
Expand Down
4 changes: 2 additions & 2 deletions docs/source/installing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ We strive towards reducing framework-specific dependencies to a minimum, but som
.. code:: bash
# for TensorFlow
pip install python-doctr[tf]
pip install "python-doctr[tf]"
# for PyTorch
pip install python-doctr[torch]
pip install "python-doctr[torch]"
Via Git
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .detection import *
from .doc_artefacts import *
from .funsd import *
from .iiit5k import *
from .ocr import *
from .recognition import *
from .sroie import *
Expand Down
5 changes: 2 additions & 3 deletions doctr/datasets/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os
from pathlib import Path
from typing import Any, Callable, List, Optional, Tuple, Union
from zipfile import ZipFile
import shutil

from doctr.utils.data import download_from_url

Expand Down Expand Up @@ -100,7 +100,6 @@ def __init__(
archive_path = Path(archive_path)
dataset_path = archive_path.parent.joinpath(archive_path.stem)
if not dataset_path.is_dir() or overwrite:
with ZipFile(archive_path, 'r') as f:
f.extractall(path=dataset_path)
shutil.unpack_archive(archive_path, dataset_path)

super().__init__(dataset_path if extract_archive else archive_path, fp16)
82 changes: 82 additions & 0 deletions doctr/datasets/iiit5k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (C) 2021, Mindee.

# This program is licensed under the Apache License version 2.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.

import os
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np
import scipy.io as sio

from .datasets import VisionDataset

__all__ = ['IIIT5K']


class IIIT5K(VisionDataset):
"""IIIT-5K character-level localization dataset from
`"BMVC 2012 Scene Text Recognition using Higher Order Language Priors"
<https://cdn.iiit.ac.in/cdn/cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/home/mishraBMVC12.pdf>`_.
Example::
>>> # NOTE: this dataset is for character-level localization
>>> from doctr.datasets import IIIT5K
>>> train_set = IIIT5K(train=True, download=True)
>>> img, target = train_set[0]
Args:
train: whether the subset should be the training one
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

URL = 'https://cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/IIIT5K-Word_V3.0.tar.gz'
SHA256 = '7872c9efbec457eb23f3368855e7738f72ce10927f52a382deb4966ca0ffa38e'

def __init__(
self,
train: bool = True,
sample_transforms: Optional[Callable[[Any], Any]] = None,
rotated_bbox: bool = False,
**kwargs: Any,
) -> None:

super().__init__(url=self.URL, file_name='IIIT5K-Word-V3.tar',
file_hash=self.SHA256, extract_archive=True, **kwargs)
self.sample_transforms = sample_transforms
self.train = train

# Load mat data
tmp_root = os.path.join(self.root, 'IIIT5K')
mat_file = 'trainCharBound' if self.train else 'testCharBound'
mat_data = sio.loadmat(os.path.join(tmp_root, f'{mat_file}.mat'))[mat_file][0]

self.data: List[Tuple[Path, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32

for img_path, label, box_targets in mat_data:
_raw_path = img_path[0]
_raw_label = label[0]

# File existence check
if not os.path.exists(os.path.join(tmp_root, _raw_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}")

if rotated_bbox:
# x_center, y_center, w, h, alpha = 0
box_targets = [[box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3], 0] for box in box_targets]
else:
# x, y, width, height -> xmin, ymin, xmax, ymax
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]

# label are casted to list where each char corresponds to the character's bounding box
self.data.append((_raw_path, dict(boxes=np.asarray(
box_targets, dtype=np_dtype), labels=list(_raw_label))))

self.root = tmp_root

def extra_repr(self) -> str:
return f"train={self.train}"
2 changes: 2 additions & 0 deletions tests/pytorch/test_datasets_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def test_visiondataset():
['CORD', False, [512, 512], 100, False],
['DocArtefacts', True, [512, 512], 2700, False],
['DocArtefacts', False, [512, 512], 300, True],
['IIIT5K', True, [32, 128], 2000, True],
['IIIT5K', False, [32, 128], 3000, False],
],
)
def test_dataset(dataset_name, train, input_size, size, rotate):
Expand Down
2 changes: 2 additions & 0 deletions tests/tensorflow/test_datasets_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
['CORD', False, [512, 512], 100, False],
['DocArtefacts', True, [512, 512], 2700, False],
['DocArtefacts', False, [512, 512], 300, True],
['IIIT5K', True, [32, 128], 2000, True],
['IIIT5K', False, [32, 128], 3000, False],
],
)
def test_dataset(dataset_name, train, input_size, size, rotate):
Expand Down

0 comments on commit 0c7c672

Please sign in to comment.