From 5b8e13ebcddd43107d692f8269a37cf29069c462 Mon Sep 17 00:00:00 2001 From: Edward Mac Gillavry Date: Tue, 4 May 2021 15:21:15 +0200 Subject: [PATCH] Tests, code, and configs for '-daal', '-burg', '-baan'. Addressing #131 and #133. --- classifier/CompoundStreetClassifier.js | 6 ++++++ .../nl/concatenated_suffixes_inseparable.txt | 1 + .../nl/concatenated_suffixes_separable.txt | 3 +++ test/address.nld.test.js | 15 +++++++++++++++ 4 files changed, 25 insertions(+) create mode 100644 resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_inseparable.txt diff --git a/classifier/CompoundStreetClassifier.js b/classifier/CompoundStreetClassifier.js index 21c2874d..8383df78 100644 --- a/classifier/CompoundStreetClassifier.js +++ b/classifier/CompoundStreetClassifier.js @@ -15,6 +15,12 @@ class CompoundStreetClassifier extends WordClassifier { // this removes suffixes such as 'r.' which can be ambiguous minlength: 3 }) + + libpostal.load(this.suffixes, ['de', 'nl'], 'concatenated_suffixes_inseparable.txt', { + // remove any suffixes which contain less than 3 characters (excluding a period) + // this removes suffixes such as 'r.' which can be ambiguous + minlength: 3 + }) } each (span) { diff --git a/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_inseparable.txt b/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_inseparable.txt new file mode 100644 index 00000000..a4904a2c --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_inseparable.txt @@ -0,0 +1 @@ +burg|brg|bg \ No newline at end of file diff --git a/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_separable.txt b/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_separable.txt index 63ab1abe..747f4c5c 100644 --- a/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_separable.txt +++ b/resources/pelias/dictionaries/libpostal/nl/concatenated_suffixes_separable.txt @@ -1,3 +1,6 @@ +baan +daal dijk !plain|pln. plein|pln +burg|brg|bg \ No newline at end of file diff --git a/test/address.nld.test.js b/test/address.nld.test.js index 423b5486..2de763f1 100644 --- a/test/address.nld.test.js +++ b/test/address.nld.test.js @@ -25,6 +25,16 @@ const testcase = (test, common) => { { street: 'Achter Clarenburg' }, { locality: 'Utrecht' } ]) + assert('Rozenburg', [ + [{ locality: 'Rozenburg' }], + [{ street: 'Rozenburg' }] + ], false) + + assert('Bloemendaal', [ + [{ locality: 'Bloemendaal' }], + [{ street: 'Bloemendaal' }] + ], false) + assert('Brinkstraat 87, 7512EC, Enschede', [ { street: 'Brinkstraat' }, { housenumber: '87' }, { postcode: '7512EC' }, { locality: 'Enschede' } ]) @@ -56,6 +66,11 @@ const testcase = (test, common) => { assert('Burgemeester Martenssingel, Gouda', [ { street: 'Burgemeester Martenssingel' }, { locality: 'Gouda' } ]) + + assert('Agorabaan, Lelystad', [ + { street: 'Agorabaan' }, { locality: 'Lelystad' } + ]) + } module.exports.all = (tape, common) => {