From 15dd3e6f6a81506b5b2a70ded8846d0e8abdd1f7 Mon Sep 17 00:00:00 2001 From: Juneja Sarjil Date: Mon, 20 Jan 2025 02:23:00 +0530 Subject: [PATCH] deduplication and made style --- docs/source/modules/datasets.rst | 4 ++-- doctr/datasets/vocabs.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/source/modules/datasets.rst b/docs/source/modules/datasets.rst index 140e824a0..8671204fc 100644 --- a/docs/source/modules/datasets.rst +++ b/docs/source/modules/datasets.rst @@ -181,8 +181,8 @@ of vocabs. - 70 - অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ০১২৩৪৫৬৭৮৯ * - gujarati - - 107 - - અઆઇઈઉઊઋએઐઓઔઅંઅઃકખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશષસહળક્ષજ્ઞ૦૧૨૩૪૫૬૭૮૯!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~૰ઽ◌ંઃ॥ૐ઼ ઁ૱ + - 103 + - અઆઇઈઉઊઋએઐઓઔકખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશષસહળક્ષજ્ઞ૦૧૨૩૪૫૬૭૮૯!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~૰ઽ◌ંઃ॥ૐ઼ ઁ૱ * - multilingual - 195 - english & french & german & italian & spanish & portuguese & czech & polish & dutch & norwegian & danish & finnish & swedish & § diff --git a/doctr/datasets/vocabs.py b/doctr/datasets/vocabs.py index 342c138d1..99ae3cc37 100644 --- a/doctr/datasets/vocabs.py +++ b/doctr/datasets/vocabs.py @@ -22,9 +22,9 @@ "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह", "hindi_digits": "०१२३४५६७८९", "hindi_punctuation": "।,?!:्ॐ॰॥", - "gujarati_vowels": "અઆઇઈઉઊઋએઐઓઔઅંઅઃ", - "gujarati_consonants":"કખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશષસહળક્ષજ્ઞ", - "gujarati_digits":"૦૧૨૩૪૫૬૭૮૯", + "gujarati_vowels": "અઆઇઈઉઊઋએઐઓ", + "gujarati_consonants": "કખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશષસહળક્ષજ્ઞ", + "gujarati_digits": "૦૧૨૩૪૫૬૭૮૯", "gujarati_punctuation": "૰ઽ◌ંઃ॥ૐ઼ઁ" + "૱", "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ", "bangla_digits": "০১২৩৪৫৬৭৮৯", @@ -62,7 +62,13 @@ ) VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪" VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"] -VOCABS["gujarati"] = VOCABS["gujarati_vowels"] + VOCABS["gujarati_consonants"] + VOCABS["gujarati_digits"] + VOCABS["gujarati_punctuation"] + VOCABS["punctuation"] +VOCABS["gujarati"] = ( + VOCABS["gujarati_vowels"] + + VOCABS["gujarati_consonants"] + + VOCABS["gujarati_digits"] + + VOCABS["gujarati_punctuation"] + + VOCABS["punctuation"] +) VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"] VOCABS["ukrainian"] = ( VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"