diff --git a/.gitignore b/.gitignore index d7fb88407f..a0a7f4a82c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,6 @@ dist ld codespell.egg-info *.pyc +*.orig .cache/ .pytest_cache/ diff --git a/codespell_lib/data/dictionary.txt b/codespell_lib/data/dictionary.txt index 86e8396dba..befac5afff 100644 --- a/codespell_lib/data/dictionary.txt +++ b/codespell_lib/data/dictionary.txt @@ -1,6 +1,4 @@ 1nd->1st -a a->a -a bout->about, a bout, aare->are abailable->available abandonded->abandoned @@ -419,7 +417,6 @@ affilate->affiliate affilliate->affiliate affort->afford, effort, affraid->afraid -afore mentioned->aforementioned aforememtioned->aforementioned aforementionned->aforementioned aformentioned->aforementioned @@ -678,12 +675,6 @@ amoutns->amounts amuch->much amung->among amunition->ammunition -an an->an -an other->another -an we->and we -an will->and will -an window->a window -an with->and with analagous->analogous analises->analysis, analyses, analitic->analytic @@ -711,7 +702,6 @@ ancapsulate->encapsulate ancestory->ancestry anchord->anchored ancilliary->ancillary -and and->and andd->and andlers->handlers, antlers, androgenous->androgynous @@ -1398,7 +1388,6 @@ basicly->basically batery->battery bcak->back bcause->because -be be->be beacause->because beachead->beachhead beacuse->because @@ -2197,7 +2186,6 @@ cnat->can't cnter->center co-incided->coincided cobvers->covers -Coca Cola->Coca-Cola coctail->cocktail codepoitn->codepoint codespel->codespell @@ -4169,7 +4157,6 @@ dota->data doub->doubt, daub, doube->double doubel->double -double click->double-click doubleclick->double-click doucment->document doulbe->double @@ -4200,7 +4187,6 @@ dreasm->dreams dreawn->drawn driectly->directly drnik->drink -drop down->drop-down dropable->droppable droped->dropped droping->dropping @@ -5988,7 +5974,6 @@ idicates->indicates idicating->indicating idiosyncracy->idiosyncrasy idividual->individual -if if->if iff->if, disabled due to valid mathematical concept ignonre->ignore ignorence->ignorance @@ -6925,7 +6910,6 @@ irrelvant->irrelevant irreplacable->irreplaceable irresistable->irresistible irresistably->irresistibly -is is->is, it is, is it, is'nt->isn't isconnection->isconnected iserting->inserting @@ -6948,7 +6932,6 @@ istance->instance istead->instead istener->listeners isue->issue -it it->it, it is, is it, iteger->integer iterater->iterator iteratered->iterated @@ -6967,7 +6950,6 @@ itializing->initializing itnernal->internal itnervals->intervals itnroduced->introduced -its is->it is, it's, itsef->itself itselfs->itself itselt->itself @@ -7889,7 +7871,6 @@ muscial->musical muscician->musician muscicians->musicians musn't->mustn't -must aligned->must align, must be aligned, mustator->mutator muste->must mut->must, mutt, moot, @@ -8065,8 +8046,6 @@ normnal->normal northen->northern northereastern->northeastern nortmally->normally -not either->neither -not quit->not quite notabley->notably notaion->notation notasion->notation @@ -8233,7 +8212,6 @@ ocurrence->occurrence ocurrences->occurrences oder->order, odor, oen->one -of of->of offcers->officers offcially->officially offereings->offerings @@ -8389,7 +8367,6 @@ optmizations->optimizations optmize->optimize optmized->optimized optomism->optimism -or or->or orded->ordered orderd->ordered orgamise->organise @@ -8481,7 +8458,6 @@ ouputarea->outputarea ouputs->outputs ouputted->outputted ouputting->outputting -our our->our ourselfs->ourselves ourselve->ourselves ourselvs->ourselves @@ -11862,14 +11838,12 @@ thansk->thanks thant->than thare->there thast->that, that's, -that that->that that, that, that the, that they, that this, that;s->that's thatn->that, than, thats'->that's thats->that's thats;->that's thck->thick -the the->the theather->theater theer->there theese->these @@ -11899,7 +11873,6 @@ thess->this, these, thest->test thether->tether, whether, thev->the -they they->they theyre->they're thgat->that thge->the @@ -11917,7 +11890,6 @@ thikns->thinks thimngs->things thinigs->things thinn->thin -this this->this, this is, is this, thise->these thist->this thiunk->think @@ -11959,7 +11931,6 @@ thron->thrown, throne, throrough->thorough throttoling->throttling throug->through -through out->throughout throughly->thoroughly throught->thought, through, throughout, througout->throughout @@ -12023,7 +11994,6 @@ tkae->take tkaes->takes tkaing->taking tlaking->talking -to to->to, to do, tobbaco->tobacco tobot->robot toches->touches @@ -12958,8 +12928,6 @@ warnig->warning warnigs->warnings warrent->warrant warrriors->warriors -was occured->has occurred -was occurred->has occurred was'nt->wasn't was't->wasn't was;t->wasn't @@ -12978,7 +12946,6 @@ wavelenght->wavelength wavelenghts->wavelengths wavelnes->wavelines wayword->wayward -we we->we weant->want, wean, weaponary->weaponry weas->was diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index e4e951b5f7..b468f9ba74 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -6,7 +6,6 @@ import os import os.path as op import subprocess -import re import sys import tempfile import warnings @@ -398,37 +397,6 @@ def FakeStdin(text): sys.stdin = oldin -def test_dictionary_formatting(): - """Test that all dictionary entries are in lower case and non-empty.""" - err_dict = dict() - with open(op.join(op.dirname(__file__), '..', 'data', - 'dictionary.txt'), 'rb') as fid: - for line in fid: - err, rep = line.decode('utf-8').split('->') - err = err.lower() - assert err not in err_dict, 'entry already exists' - rep = rep.rstrip('\n') - assert len(rep) > 0, ('%s: correction %r must be non-empty' - % (err, rep)) - assert not re.match('^\s.*', rep), ('%s: correction %r cannot ' - 'start with whitespace' - % (err, rep)) - if rep.count(','): - if not rep.endswith(','): - assert 'disabled' in rep.split(',')[-1], \ - ('currently corrections must end with trailing "," (if' - ' multiple corrections are available) or ' - 'have "disabled" in the comment') - err_dict[err] = rep - reps = [r.strip() for r in rep.lower().split(',')] - reps = [r for r in reps if len(r)] - unique = list() - for r in reps: - if r not in unique: - unique.append(r) - assert reps == unique, 'entries are not (lower-case) unique' - - def test_case_handling(reload_codespell_lib): """Test that capitalized entries get detected properly.""" # Some simple Unicode things diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py new file mode 100644 index 0000000000..19c85dd03e --- /dev/null +++ b/codespell_lib/tests/test_dictionary.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +import os.path as op +import re + + +def test_dictionary_formatting(): + """Test that all dictionary entries are in lower case and non-empty.""" + err_dict = dict() + ws = re.compile(r'.*\s.*') # whitespace + with open(op.join(op.dirname(__file__), '..', 'data', + 'dictionary.txt'), 'rb') as fid: + for line in fid: + err, rep = line.decode('utf-8').split('->') + err = err.lower() + assert err not in err_dict, 'error %r already exists' % err + assert ws.match(err) is None, 'error %r has whitespace' % err + rep = rep.rstrip('\n') + assert len(rep) > 0, ('error %s: correction %r must be non-empty' + % (err, rep)) + assert not re.match('^\s.*', rep), ('error %s: correction %r ' + 'cannot start with whitespace' + % (err, rep)) + if rep.count(','): + if not rep.endswith(','): + assert 'disabled' in rep.split(',')[-1], \ + ('currently corrections must end with trailing "," (if' + ' multiple corrections are available) or ' + 'have "disabled" in the comment') + err_dict[err] = rep + reps = [r.strip() for r in rep.lower().split(',')] + reps = [r for r in reps if len(r)] + unique = list() + for r in reps: + if r not in unique: + unique.append(r) + assert reps == unique, 'entries are not (lower-case) unique'