-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add proper support for multiword tokens
- Loading branch information
1 parent
fa121f5
commit 263c880
Showing
2 changed files
with
51 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from typing import List | ||
|
||
import pytest | ||
from spacy.lang.fr import FrenchDefaults | ||
from spacy.language import BaseDefaults | ||
from spacy_udpipe import download | ||
from spacy_udpipe.language import load | ||
from spacy_udpipe.utils import get_defaults | ||
|
||
FR = "fr" | ||
|
||
|
||
@pytest.fixture | ||
def lang() -> str: | ||
return FR | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def download_lang(lang: str) -> None: | ||
download(lang) | ||
|
||
|
||
def test_get_defaults(lang: str) -> None: | ||
assert get_defaults(lang) == FrenchDefaults | ||
assert get_defaults("blabla") == BaseDefaults | ||
|
||
|
||
def test_spacy_udpipe(lang: str) -> None: | ||
nlp = load(lang=lang) | ||
assert nlp._meta["lang"] == f"udpipe_{lang}" | ||
|
||
text = "Attention aux articles contractés!" | ||
doc = nlp (text=text) | ||
|
||
assert [t.orth_ for t in doc] == ["Attention", "à", "les", "articles", "contractés", "!"] | ||
|
||
pos = [{"INTJ", "NOUN"}, {"ADP"}, {"DET"}, {"NOUN"}, {"VERB", "ADJ"}, {"PUNCT"}] | ||
for i, t in enumerate(doc): | ||
assert t.pos_ in pos[i] | ||
|
||
assert [t.head.i for t in doc] == [0, 3, 3, 0, 3, 0] | ||
|
||
dep = [{"ROOT", "root"}, {"case"}, {"det"}, {"nmod", "obl", "obl:arg"}, {"acl", "amod"}, {"punct"}] | ||
for i, t in enumerate(doc): | ||
assert t.dep_ in dep[i] |