Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

style: Fixed mypy and moved tool configs to pyproject.toml #966

Merged
merged 16 commits into from
Jul 1, 2022
Merged
2 changes: 0 additions & 2 deletions .coveragerc

This file was deleted.

2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[flake8]
max-line-length = 120
ignore = E402, E265, F403, W503, W504, E731
exclude = .circleci, .git, venv*, docs, build
exclude = .git, venv*, build
per-file-ignores = **/__init__.py:F401
26 changes: 13 additions & 13 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ on:
branches: main

jobs:
flake8-py3:
flake8:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: [3.7]
python: [3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -24,14 +24,14 @@ jobs:
run: |
pip install flake8
flake8 --version
flake8 ./
flake8

isort-py3:
isort:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: [3.7]
python: [3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -46,12 +46,12 @@ jobs:
isort .
if [ -n "$(git status --porcelain --untracked-files=no)" ]; then exit 1; else echo "All clear"; fi

mypy-py3:
mypy:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: [3.7]
python: [3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -67,19 +67,19 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[tf] --upgrade
pip install -e .[all] --upgrade
pip install mypy
- name: Run mypy
run: |
mypy --version
mypy --config-file mypy.ini doctr/
mypy

pydocstyle-py3:
pydocstyle:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: [3.7]
python: [3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -89,6 +89,6 @@ jobs:
architecture: x64
- name: Run pydocstyle
run: |
pip install pydocstyle
pip install pydocstyle[toml]
pydocstyle --version
pydocstyle doctr/
pydocstyle
5 changes: 0 additions & 5 deletions .isort.cfg

This file was deleted.

3 changes: 0 additions & 3 deletions .pydocstyle

This file was deleted.

5 changes: 3 additions & 2 deletions api/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@

import time

from app import config as cfg
from app.routes import detection, ocr, recognition
from fastapi import FastAPI, Request
from fastapi.openapi.utils import get_openapi

from app import config as cfg
from app.routes import detection, ocr, recognition

app = FastAPI(title=cfg.PROJECT_NAME, description=cfg.PROJECT_DESCRIPTION, debug=cfg.DEBUG, version=cfg.VERSION)


Expand Down
4 changes: 2 additions & 2 deletions api/app/routes/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from typing import List

from app.schemas import DetectionOut
from app.vision import det_predictor
from fastapi import APIRouter, File, UploadFile, status

from app.schemas import DetectionOut
from app.vision import det_predictor
from doctr.io import decode_img_as_tensor

router = APIRouter()
Expand Down
4 changes: 2 additions & 2 deletions api/app/routes/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from typing import List

from app.schemas import OCROut
from app.vision import predictor
from fastapi import APIRouter, File, UploadFile, status

from app.schemas import OCROut
from app.vision import predictor
from doctr.io import decode_img_as_tensor

router = APIRouter()
Expand Down
4 changes: 2 additions & 2 deletions api/app/routes/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# This program is licensed under the Apache License version 2.
# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.

from app.schemas import RecognitionOut
from app.vision import reco_predictor
from fastapi import APIRouter, File, UploadFile, status

from app.schemas import RecognitionOut
from app.vision import reco_predictor
from doctr.io import decode_img_as_tensor

router = APIRouter()
Expand Down
3 changes: 2 additions & 1 deletion api/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import pytest
import requests
from app.main import app
from httpx import AsyncClient

from app.main import app


@pytest.fixture(scope="session")
def mock_recognition_image(tmpdir_factory):
Expand Down
13 changes: 7 additions & 6 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.intersphinx',
'sphinx.ext.viewcode',
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.intersphinx',
'sphinx.ext.viewcode',
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx.ext.autosectionlabel',
Expand Down Expand Up @@ -114,9 +114,10 @@
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# Add googleanalytics id
# ref: https://github.com/orenhecht/googleanalytics/blob/master/sphinxcontrib/googleanalytics.py

def add_ga_javascript(app, pagename, templatename, context, doctree):
# Add googleanalytics id
# ref: https://github.com/orenhecht/googleanalytics/blob/master/sphinxcontrib/googleanalytics.py

metatags = context.get('metatags', '')
metatags += """
Expand Down
1 change: 1 addition & 0 deletions doctr/datasets/cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
if len(word["text"]) > 0:
x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
box: Union[List[float], np.ndarray]
if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box = np.array([
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __getitem__(

if self.img_transforms is not None:
# typing issue cf. https://github.com/python/mypy/issues/5485
img = self.img_transforms(img) # type: ignore[call-arg]
img = self.img_transforms(img)

if self.sample_transforms is not None:
img, target = self.sample_transforms(img, target)
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(
if not os.path.exists(os.path.join(self.root, img_name)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")

polygons = np.asarray(label['polygons'], dtype=np_dtype)
polygons: np.ndarray = np.asarray(label['polygons'], dtype=np_dtype)
geoms = polygons if use_polygons else np.concatenate((polygons.min(axis=1), polygons.max(axis=1)), axis=1)

self.data.append((img_name, np.asarray(geoms, dtype=np_dtype)))
4 changes: 2 additions & 2 deletions doctr/datasets/doc_artefacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def __init__(
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_name)}")

# xmin, ymin, xmax, ymax
boxes = np.asarray([obj['geometry'] for obj in label], dtype=np_dtype)
classes = np.asarray([self.CLASSES.index(obj['label']) for obj in label], dtype=np.int64)
boxes: np.ndarray = np.asarray([obj['geometry'] for obj in label], dtype=np_dtype)
classes: np.ndarray = np.asarray([self.CLASSES.index(obj['label']) for obj in label], dtype=np.int64)
if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
boxes = np.stack(
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/ic03.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(

# filter images without boxes
if len(_boxes) > 0:
boxes = np.asarray(_boxes, dtype=np_dtype)
boxes: np.ndarray = np.asarray(_boxes, dtype=np_dtype)
# Get the labels
labels = [lab.text for rect in rectangles for lab in rect if lab.text]

Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/ic13.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(
]
labels = [line[-1].replace("\"", "") for line in _lines]
# xmin, ymin, xmax, ymax
box_targets = np.array([list(map(int, line[:4])) for line in _lines], dtype=np_dtype)
box_targets: np.ndarray = np.array([list(map(int, line[:4])) for line in _lines], dtype=np_dtype)
if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box_targets = np.array(
Expand Down
4 changes: 2 additions & 2 deletions doctr/datasets/sroie.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def __init__(
labels = [",".join(row[8:]) for row in _rows]
# reorder coordinates (8 -> (4,2) ->
# (x, y) coordinates of top left, top right, bottom right, bottom left corners) and filter empty lines
coords = np.stack([np.array(list(map(int, row[:8])), dtype=np_dtype).reshape((4, 2))
for row in _rows], axis=0)
coords: np.ndarray = np.stack([np.array(list(map(int, row[:8])), dtype=np_dtype).reshape((4, 2))
for row in _rows], axis=0)

if not use_polygons:
# xmin, ymin, xmax, ymax
Expand Down
4 changes: 2 additions & 2 deletions doctr/datasets/svhn.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(
box_dict = {k: [int(f[v[0]][()].item()) for v in vals] for k, vals in box.items()}

# Convert it to the right format
coords = np.array([
coords: np.ndarray = np.array([
box_dict['left'],
box_dict['top'],
box_dict['width'],
Expand All @@ -94,7 +94,7 @@ def __init__(

if use_polygons:
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
box_targets = np.stack(
box_targets: np.ndarray = np.stack(
[
np.stack([coords[:, 0], coords[:, 1]], axis=-1),
np.stack([coords[:, 0] + coords[:, 2], coords[:, 1]], axis=-1),
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/svt.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
for rect in rectangles
]

boxes = np.asarray(_boxes, dtype=np_dtype)
boxes: np.ndarray = np.asarray(_boxes, dtype=np_dtype)
# Get the labels
labels = [lab.text for rect in rectangles for lab in rect]

Expand Down
8 changes: 4 additions & 4 deletions doctr/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@ def encode_string(
A list encoding the input_string"""

try:
return list(map(vocab.index, input_string)) # type: ignore[arg-type]
return list(map(vocab.index, input_string))
except ValueError:
raise ValueError("some characters cannot be found in 'vocab'")


def decode_sequence(
input_seq: Union[np.array, SequenceType[int]],
input_seq: Union[np.ndarray, SequenceType[int]],
mapping: str,
) -> str:
"""Given a predefined mapping, decode the sequence of numbers to a string
Expand Down Expand Up @@ -145,7 +145,7 @@ def encode_sequences(
default_symbol = pad
else: # pad with eos symbol
default_symbol = eos
encoded_data = np.full([len(sequences), target_size], default_symbol, dtype=np.int32)
encoded_data: np.ndarray = np.full([len(sequences), target_size], default_symbol, dtype=np.int32)

# Encode the strings
for idx, seq in enumerate(map(partial(encode_string, vocab=vocab), sequences)):
Expand Down Expand Up @@ -176,7 +176,7 @@ def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> Lis
Returns:
a list of cropped images
"""
img = np.array(Image.open(img_path).convert('RGB'))
img: np.ndarray = np.array(Image.open(img_path).convert('RGB'))
# Polygon
if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
return extract_rcrops(img, geoms.astype(dtype=int))
Expand Down
16 changes: 10 additions & 6 deletions doctr/io/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def __init__(
if geometry is None:
# Check whether this is a rotated or straight box
box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[operator, misc]
geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[operator]

super().__init__(words=words)
self.geometry = geometry
Expand Down Expand Up @@ -188,7 +188,7 @@ def __init__(
box_resolution_fn = resolve_enclosing_rbbox if isinstance(
lines[0].geometry, np.ndarray
) else resolve_enclosing_bbox
geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore[operator, arg-type]
geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore[operator]

super().__init__(lines=lines, artefacts=artefacts)
self.geometry = geometry
Expand Down Expand Up @@ -287,7 +287,11 @@ def export_as_xml(self, file_title: str = 'docTR - XML export (hOCR)') -> Tuple[
head = SubElement(page_hocr, 'head')
SubElement(head, 'title').text = file_title
SubElement(head, 'meta', attrib={'http-equiv': 'Content-Type', 'content': 'text/html; charset=utf-8'})
SubElement(head, 'meta', attrib={'name': 'ocr-system', 'content': f"python-doctr {doctr.__version__}"})
SubElement(
head,
'meta',
attrib={'name': 'ocr-system', 'content': f"python-doctr {doctr.__version__}"}, # type: ignore[attr-defined]
)
SubElement(head, 'meta', attrib={'name': 'ocr-capabilities',
'content': 'ocr_page ocr_carea ocr_par ocr_line ocrx_word'})
# Create the body
Expand All @@ -301,7 +305,7 @@ def export_as_xml(self, file_title: str = 'docTR - XML export (hOCR)') -> Tuple[
for block in self.blocks:
if len(block.geometry) != 2:
raise TypeError("XML export is only available for straight bounding boxes for now.")
(xmin, ymin), (xmax, ymax) = block.geometry # type: ignore[misc]
(xmin, ymin), (xmax, ymax) = block.geometry
block_div = SubElement(body, 'div', attrib={
'class': 'ocr_carea',
'id': f'block_{block_count}',
Expand All @@ -316,7 +320,7 @@ def export_as_xml(self, file_title: str = 'docTR - XML export (hOCR)') -> Tuple[
})
block_count += 1
for line in block.lines:
(xmin, ymin), (xmax, ymax) = line.geometry # type: ignore[misc]
(xmin, ymin), (xmax, ymax) = line.geometry
# NOTE: baseline, x_size, x_descenders, x_ascenders is currently initalized to 0
line_span = SubElement(paragraph, 'span', attrib={
'class': 'ocr_line',
Expand All @@ -327,7 +331,7 @@ def export_as_xml(self, file_title: str = 'docTR - XML export (hOCR)') -> Tuple[
})
line_count += 1
for word in line.words:
(xmin, ymin), (xmax, ymax) = word.geometry # type: ignore[misc]
(xmin, ymin), (xmax, ymax) = word.geometry
conf = word.confidence
word_div = SubElement(line_span, 'span', attrib={
'class': 'ocrx_word',
Expand Down
2 changes: 1 addition & 1 deletion doctr/io/image/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
if is_tf_available():
from .tensorflow import *
elif is_torch_available():
from .pytorch import * # type: ignore[misc]
from .pytorch import *
4 changes: 2 additions & 2 deletions doctr/io/image/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def read_img_as_numpy(
raise FileNotFoundError(f"unable to access {file}")
img = cv2.imread(str(file), cv2.IMREAD_COLOR)
elif isinstance(file, bytes):
file = np.frombuffer(file, np.uint8)
img = cv2.imdecode(file, cv2.IMREAD_COLOR)
_file: np.ndarray = np.frombuffer(file, np.uint8)
img = cv2.imdecode(_file, cv2.IMREAD_COLOR)
else:
raise TypeError("unsupported object type for argument 'file'")

Expand Down
Loading