diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 4004dc6..60142a6 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -21,22 +21,16 @@ jobs: uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install package and dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install .[dev] - name: Lint with flake8 run: | - pip install flake8 # stop the build if there are Python syntax errors or undefined names - flake8 **/*.py --count --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 **/*.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Install package - run: | - # required because of entry points - pip install . + flake8 --filename=*.py --count --show-source --max-line-length=119 --statistics + # exit-zero treats all errors as warnings + flake8 --filename=*.py --count --exit-zero --max-line-length=119 --max-complexity=10 --statistics - name: Test with pytest run: | - pip install pytest python -m pytest -vvv tests diff --git a/.gitignore b/.gitignore index 894a44c..0912e33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Ignore models dir +spacy_udpipe/models + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..85d21a3 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +.PHONY: lint test + +# Lint source code + +lint: + # stop the build if there are Python syntax errors or undefined names + flake8 --filename=*.py --count --show-source --max-line-length=119 --statistics + # exit-zero treats all errors as warnings + flake8 --filename=*.py --count --exit-zero --max-line-length=119 --max-complexity=10 --statistics + +# Run tests + +test: + python -m pytest -vvv tests diff --git a/README.md b/README.md index 352c3b9..38d8e14 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,14 @@ This can be done for any of the languages supported by spaCy. For an exhaustive Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. Please make sure to update the tests as appropriate. Tests are run automatically for each pull request on the master branch. -To start the tests locally, first, install the package with `pip install -e .`, then run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory. +To start the tests locally, first, install the package with `pip install -e .[dev]`, then run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory as follows: +```bash +make test +``` +Additionally, run [`flake8`](https://flake8.pycqa.org/en/latest) with the following command to check for coding mistakes: +```bash +make lint +``` ## License * Source code: [MIT](https://choosealicense.com/licenses/mit/) © Text Analysis and Knowledge Engineering Lab (TakeLab) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 0258023..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -spacy>=3.0.0,<4.0.0 -ufal.udpipe~=1.2.0 -# Development dependencies -pytest>=5.0.0 \ No newline at end of file diff --git a/setup.py b/setup.py index dc402b2..54ff015 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ import os -import setuptools +from setuptools import find_packages, setup def get_version(fname: str) -> str: @@ -9,11 +9,11 @@ def get_version(fname: str) -> str: "spacy_udpipe", fname ) - with open(full_path, "r") as fp: - for l in fp: - if l.startswith("__version__"): - delim = '"' if '"' in l else "'" - return l.split(delim)[1] + with open(full_path, "r", encoding="utf-8") as fp: + for line in fp: + if line.startswith("__version__"): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] else: raise RuntimeError( "Unable to find version string." @@ -22,10 +22,10 @@ def get_version(fname: str) -> str: URL = "https://github.com/TakeLab/spacy-udpipe" -with open("README.md", "r") as f: +with open("README.md", "r", encoding="utf-8") as f: readme = f.read() -setuptools.setup( +setup( name="spacy_udpipe", version=get_version("__init__.py"), description="Use fast UDPipe models directly in spaCy", @@ -36,8 +36,11 @@ def get_version(fname: str) -> str: author_email="takelab@fer.hr", license="MIT", keywords="nlp udpipe spacy python", - packages=setuptools.find_packages(), + packages=find_packages(), install_requires=["spacy>=3.0.0,<4.0.0", "ufal.udpipe>=1.2.0"], + extras_require={ + "dev": ["flake8", "pytest"], + }, python_requires=">=3.6", entry_points={ "spacy_tokenizers": [ @@ -49,9 +52,15 @@ def get_version(fname: str) -> str: classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research" "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Text Processing", ], project_urls={ "SpaCy": "https://spacy.io/", diff --git a/spacy_udpipe/utils.py b/spacy_udpipe/utils.py index 5512451..1725163 100644 --- a/spacy_udpipe/utils.py +++ b/spacy_udpipe/utils.py @@ -10,10 +10,7 @@ BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3131" # noqa: E501 MODELS_DIR = os.path.join(os.path.dirname(__file__), "models") with open( - os.path.join( - os.path.dirname(__file__), - "languages.json" - ), + os.path.join(os.path.dirname(__file__), "languages.json"), encoding="utf-8" ) as f: LANGUAGES = json.load(f) diff --git a/tests/languages/fr/test_fr_language.py b/tests/languages/fr/test_fr_language.py index e5c9b54..0bb5f4e 100644 --- a/tests/languages/fr/test_fr_language.py +++ b/tests/languages/fr/test_fr_language.py @@ -1,5 +1,3 @@ -from typing import List - import pytest from spacy.lang.fr import FrenchDefaults from spacy.language import BaseDefaults @@ -24,12 +22,12 @@ def test_get_defaults(lang: str) -> None: def test_spacy_udpipe(lang: str) -> None: nlp = load(lang=lang) - + text = "Attention aux articles contractés!" - doc = nlp (text=text) - + doc = nlp(text=text) + assert [t.orth_ for t in doc] == ["Attention", "à", "les", "articles", "contractés", "!"] - + pos = [{"INTJ", "NOUN"}, {"ADP"}, {"DET"}, {"NOUN"}, {"VERB", "ADJ"}, {"PUNCT"}] for i, t in enumerate(doc): assert t.pos_ in pos[i]