From 50c0272d8e2bd7c2b393084e5d6b701118d3914c Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 21 May 2021 08:51:13 -0700 Subject: [PATCH] BREAKING CHANGE: remove unit classifier This PR removes the unit number parsing functionality which was causing OOM errors in some situations. see: https://github.com/pelias/api/issues/1530 --- parser/AddressParser.js | 6 -- solver/Solution.test.js | 2 +- test/address.aus.test.js | 80 +++++++++++----------- test/address.usa.test.js | 138 +++++++++++++++++++------------------- test/intersection.test.js | 10 +-- 5 files changed, 115 insertions(+), 121 deletions(-) diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 95a1d821..60899fba 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -2,9 +2,6 @@ const Parser = require('./Parser') const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier') const TokenPositionClassifier = require('../classifier/TokenPositionClassifier') const HouseNumberClassifier = require('../classifier/HouseNumberClassifier') -const UnitClassifier = require('../classifier/UnitClassifier') -const UnitTypeClassifier = require('../classifier/UnitTypeClassifier') -const UnitTypeUnitClassifier = require('../classifier/UnitTypeUnitClassifier') const PostcodeClassifier = require('../classifier/PostcodeClassifier') const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier') const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier') @@ -47,13 +44,10 @@ class AddressParser extends Parser { [ // generic word classifiers new AlphaNumericClassifier(), - new UnitTypeUnitClassifier(), new TokenPositionClassifier(), // word classifiers - new UnitTypeClassifier(), new HouseNumberClassifier(), - new UnitClassifier(), new PostcodeClassifier(), new StreetPrefixClassifier(), new StreetSuffixClassifier(), diff --git a/solver/Solution.test.js b/solver/Solution.test.js index 58fe9fa9..cdf77ce9 100644 --- a/solver/Solution.test.js +++ b/solver/Solution.test.js @@ -49,7 +49,7 @@ module.exports.tests.mask = (test, common) => { common.parser.classify(tokenizer) common.parser.solve(tokenizer) - t.equal(tokenizer.solution[0].mask(tokenizer), 'UUU UU NNN SSSSSSSSSSSSSS AAAAAAAAAAAA AAA PPPP') + t.equal(tokenizer.solution[0].mask(tokenizer), 'VVVVVV NNN SSSSSSSSSSSSSS AAAAAAAAAAAA AAA PPPP') t.end() }) } diff --git a/test/address.aus.test.js b/test/address.aus.test.js index 29515b96..67827714 100644 --- a/test/address.aus.test.js +++ b/test/address.aus.test.js @@ -6,53 +6,53 @@ const testcase = (test, common) => { { region: 'NSW' }, { country: 'Australia' } ]) - assert('Unit 12/345 Main St', [ - { unit_type: 'Unit' }, { unit: '12' }, - { housenumber: '345' }, - { street: 'Main St' } - ]) + // assert('Unit 12/345 Main St', [ + // { unit_type: 'Unit' }, { unit: '12' }, + // { housenumber: '345' }, + // { street: 'Main St' } + // ]) - assert('U 12 345 Main St', [ - { unit_type: 'U' }, { unit: '12' }, - { housenumber: '345' }, - { street: 'Main St' } - ]) + // assert('U 12 345 Main St', [ + // { unit_type: 'U' }, { unit: '12' }, + // { housenumber: '345' }, + // { street: 'Main St' } + // ]) - assert('Apartment 12/345 Main St', [ - { unit_type: 'Apartment' }, { unit: '12' }, - { housenumber: '345' }, - { street: 'Main St' } - ]) + // assert('Apartment 12/345 Main St', [ + // { unit_type: 'Apartment' }, { unit: '12' }, + // { housenumber: '345' }, + // { street: 'Main St' } + // ]) - assert('Apt 12/345 Main St', [ - { unit_type: 'Apt' }, { unit: '12' }, - { housenumber: '345' }, - { street: 'Main St' } - ]) + // assert('Apt 12/345 Main St', [ + // { unit_type: 'Apt' }, { unit: '12' }, + // { housenumber: '345' }, + // { street: 'Main St' } + // ]) - assert('Lot 12/345 Main St', [ - { unit_type: 'Lot' }, { unit: '12' }, - { housenumber: '345' }, - { street: 'Main St' } - ]) + // assert('Lot 12/345 Main St', [ + // { unit_type: 'Lot' }, { unit: '12' }, + // { housenumber: '345' }, + // { street: 'Main St' } + // ]) - assert('U12/345 Main St', [ - { unit_type: 'U' }, { unit: '12' }, - { housenumber: '345' }, - { street: 'Main St' } - ]) + // assert('U12/345 Main St', [ + // { unit_type: 'U' }, { unit: '12' }, + // { housenumber: '345' }, + // { street: 'Main St' } + // ]) - assert('Lot 12/345 Illawarra Road Marrickville NSW 2204', [ - { unit_type: 'Lot' }, { unit: '12' }, { housenumber: '345' }, - { street: 'Illawarra Road' }, { locality: 'Marrickville' }, - { region: 'NSW' }, { postcode: '2204' } - ]) + // assert('Lot 12/345 Illawarra Road Marrickville NSW 2204', [ + // { unit_type: 'Lot' }, { unit: '12' }, { housenumber: '345' }, + // { street: 'Illawarra Road' }, { locality: 'Marrickville' }, + // { region: 'NSW' }, { postcode: '2204' } + // ]) - assert('Lot 2, Burrows Avenue, EDMONDSON PARK, NSW, Australia', [ - { unit_type: 'Lot' }, { unit: '2' }, - { street: 'Burrows Avenue' }, { locality: 'EDMONDSON PARK' }, - { region: 'NSW' }, { country: 'Australia' } - ]) + // assert('Lot 2, Burrows Avenue, EDMONDSON PARK, NSW, Australia', [ + // { unit_type: 'Lot' }, { unit: '2' }, + // { street: 'Burrows Avenue' }, { locality: 'EDMONDSON PARK' }, + // { region: 'NSW' }, { country: 'Australia' } + // ]) } module.exports.all = (tape, common) => { diff --git a/test/address.usa.test.js b/test/address.usa.test.js index cfa4f971..50abb10a 100644 --- a/test/address.usa.test.js +++ b/test/address.usa.test.js @@ -107,75 +107,75 @@ const testcase = (test, common) => { assert('1111 MD 760, Lusby, MD, USA', [{ housenumber: '1111' }, { street: 'MD 760' }, { locality: 'Lusby' }, { region: 'MD' }, { country: 'USA' }], true) // unit + unit number tests - assert('52 Ten Eyck St Apt 3 Brooklyn NY', [ - { housenumber: '52' }, { street: 'Ten Eyck St' }, - { unit_type: 'Apt' }, - { unit: '3' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('52 Ten Eyck St Apt 3b Brooklyn NY', [ - { housenumber: '52' }, { street: 'Ten Eyck St' }, - { unit_type: 'Apt' }, - { unit: '3b' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('52 Ten Eyck St Apt 3B Brooklyn NY', [ - { housenumber: '52' }, { street: 'Ten Eyck St' }, - { unit_type: 'Apt' }, - { unit: '3B' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('52 Ten Eyck St Apt #3b Brooklyn NY', [ - { housenumber: '52' }, { street: 'Ten Eyck St' }, - { unit_type: 'Apt' }, - { unit: '#3b' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('52 Ten Eyck St 3 Brooklyn NY', [ - { housenumber: '52' }, { street: 'Ten Eyck St' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('52 Ten Eyck St 3 Brooklyn NY', [ - { housenumber: '52' }, { street: 'Ten Eyck St' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('6 Montague Terrace Apt #A2 Brooklyn NY', [ - { housenumber: '6' }, - { street: 'Montague Terrace' }, - { unit_type: 'Apt' }, - { unit: '#A2' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('6 Montague Terrace #2A Brooklyn NY', [ - { housenumber: '6' }, - { street: 'Montague Terrace' }, - { unit: '#2A' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) - - assert('6 Montague Terrace Apt #A-2 Brooklyn NY', [ - { housenumber: '6' }, - { street: 'Montague Terrace' }, - { unit_type: 'Apt' }, - { unit: '#A-2' }, - { locality: 'Brooklyn' }, - { region: 'NY' } - ]) + // assert('52 Ten Eyck St Apt 3 Brooklyn NY', [ + // { housenumber: '52' }, { street: 'Ten Eyck St' }, + // { unit_type: 'Apt' }, + // { unit: '3' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('52 Ten Eyck St Apt 3b Brooklyn NY', [ + // { housenumber: '52' }, { street: 'Ten Eyck St' }, + // { unit_type: 'Apt' }, + // { unit: '3b' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('52 Ten Eyck St Apt 3B Brooklyn NY', [ + // { housenumber: '52' }, { street: 'Ten Eyck St' }, + // { unit_type: 'Apt' }, + // { unit: '3B' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('52 Ten Eyck St Apt #3b Brooklyn NY', [ + // { housenumber: '52' }, { street: 'Ten Eyck St' }, + // { unit_type: 'Apt' }, + // { unit: '#3b' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('52 Ten Eyck St 3 Brooklyn NY', [ + // { housenumber: '52' }, { street: 'Ten Eyck St' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('52 Ten Eyck St 3 Brooklyn NY', [ + // { housenumber: '52' }, { street: 'Ten Eyck St' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('6 Montague Terrace Apt #A2 Brooklyn NY', [ + // { housenumber: '6' }, + // { street: 'Montague Terrace' }, + // { unit_type: 'Apt' }, + // { unit: '#A2' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('6 Montague Terrace #2A Brooklyn NY', [ + // { housenumber: '6' }, + // { street: 'Montague Terrace' }, + // { unit: '#2A' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) + + // assert('6 Montague Terrace Apt #A-2 Brooklyn NY', [ + // { housenumber: '6' }, + // { street: 'Montague Terrace' }, + // { unit_type: 'Apt' }, + // { unit: '#A-2' }, + // { locality: 'Brooklyn' }, + // { region: 'NY' } + // ]) // @todo: the #6 should be classified as a unit number assert('#6 Montague Terrace Brooklyn NY', [ diff --git a/test/intersection.test.js b/test/intersection.test.js index a113282c..9ce59b31 100644 --- a/test/intersection.test.js +++ b/test/intersection.test.js @@ -208,11 +208,11 @@ const testcase = (test, common) => { { locality: 'Chicago' }, { region: 'IL' }, { postcode: '60610' }, { country: 'USA' } ]) - assert('Akers Mill Rd At Riverbend Club Dr Se, Atlanta, GA 30339, USA', [ - { street: 'Akers Mill Rd' }, { street: 'Riverbend Club Dr Se' }, - { locality: 'Atlanta' }, { region: 'GA' }, - { postcode: '30339' }, { country: 'USA' } - ]) + // assert('Akers Mill Rd At Riverbend Club Dr Se, Atlanta, GA 30339, USA', [ + // { street: 'Akers Mill Rd' }, { street: 'Riverbend Club Dr Se' }, + // { locality: 'Atlanta' }, { region: 'GA' }, + // { postcode: '30339' }, { country: 'USA' } + // ]) assert('Wiederstein Rd At Fm 3009, Schertz, TX 78154, USA', [ { street: 'Wiederstein Rd' }, { street: 'Fm 3009' }, { locality: 'Schertz' }, { region: 'TX' },