Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BREAKING CHANGE: remove unit classifier
Browse files Browse the repository at this point in the history
This PR removes the unit number parsing functionality which was causing OOM errors in some situations.
see: pelias/api#1530
orangejulius authored and missinglink committed Oct 5, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent e9c5608 commit af2254c
Showing 5 changed files with 115 additions and 121 deletions.
6 changes: 0 additions & 6 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
@@ -2,9 +2,6 @@ const Parser = require('./Parser')
const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier')
const TokenPositionClassifier = require('../classifier/TokenPositionClassifier')
const HouseNumberClassifier = require('../classifier/HouseNumberClassifier')
const UnitClassifier = require('../classifier/UnitClassifier')
const UnitTypeClassifier = require('../classifier/UnitTypeClassifier')
const UnitTypeUnitClassifier = require('../classifier/UnitTypeUnitClassifier')
const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier')
@@ -47,13 +44,10 @@ class AddressParser extends Parser {
[
// generic word classifiers
new AlphaNumericClassifier(),
new UnitTypeUnitClassifier(),
new TokenPositionClassifier(),

// word classifiers
new UnitTypeClassifier(),
new HouseNumberClassifier(),
new UnitClassifier(),
new PostcodeClassifier(),
new StreetPrefixClassifier(),
new StreetSuffixClassifier(),
2 changes: 1 addition & 1 deletion solver/Solution.test.js
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ module.exports.tests.mask = (test, common) => {
common.parser.classify(tokenizer)
common.parser.solve(tokenizer)

t.equal(tokenizer.solution[0].mask(tokenizer), 'UUU UU NNN SSSSSSSSSSSSSS AAAAAAAAAAAA AAA PPPP')
t.equal(tokenizer.solution[0].mask(tokenizer), 'VVVVVV NNN SSSSSSSSSSSSSS AAAAAAAAAAAA AAA PPPP')
t.end()
})
}
80 changes: 40 additions & 40 deletions test/address.aus.test.js
Original file line number Diff line number Diff line change
@@ -6,53 +6,53 @@ const testcase = (test, common) => {
{ region: 'NSW' }, { country: 'Australia' }
])

assert('Unit 12/345 Main St', [
{ unit_type: 'Unit' }, { unit: '12' },
{ housenumber: '345' },
{ street: 'Main St' }
])
// assert('Unit 12/345 Main St', [
// { unit_type: 'Unit' }, { unit: '12' },
// { housenumber: '345' },
// { street: 'Main St' }
// ])

assert('U 12 345 Main St', [
{ unit_type: 'U' }, { unit: '12' },
{ housenumber: '345' },
{ street: 'Main St' }
])
// assert('U 12 345 Main St', [
// { unit_type: 'U' }, { unit: '12' },
// { housenumber: '345' },
// { street: 'Main St' }
// ])

assert('Apartment 12/345 Main St', [
{ unit_type: 'Apartment' }, { unit: '12' },
{ housenumber: '345' },
{ street: 'Main St' }
])
// assert('Apartment 12/345 Main St', [
// { unit_type: 'Apartment' }, { unit: '12' },
// { housenumber: '345' },
// { street: 'Main St' }
// ])

assert('Apt 12/345 Main St', [
{ unit_type: 'Apt' }, { unit: '12' },
{ housenumber: '345' },
{ street: 'Main St' }
])
// assert('Apt 12/345 Main St', [
// { unit_type: 'Apt' }, { unit: '12' },
// { housenumber: '345' },
// { street: 'Main St' }
// ])

assert('Lot 12/345 Main St', [
{ unit_type: 'Lot' }, { unit: '12' },
{ housenumber: '345' },
{ street: 'Main St' }
])
// assert('Lot 12/345 Main St', [
// { unit_type: 'Lot' }, { unit: '12' },
// { housenumber: '345' },
// { street: 'Main St' }
// ])

assert('U12/345 Main St', [
{ unit_type: 'U' }, { unit: '12' },
{ housenumber: '345' },
{ street: 'Main St' }
])
// assert('U12/345 Main St', [
// { unit_type: 'U' }, { unit: '12' },
// { housenumber: '345' },
// { street: 'Main St' }
// ])

assert('Lot 12/345 Illawarra Road Marrickville NSW 2204', [
{ unit_type: 'Lot' }, { unit: '12' }, { housenumber: '345' },
{ street: 'Illawarra Road' }, { locality: 'Marrickville' },
{ region: 'NSW' }, { postcode: '2204' }
])
// assert('Lot 12/345 Illawarra Road Marrickville NSW 2204', [
// { unit_type: 'Lot' }, { unit: '12' }, { housenumber: '345' },
// { street: 'Illawarra Road' }, { locality: 'Marrickville' },
// { region: 'NSW' }, { postcode: '2204' }
// ])

assert('Lot 2, Burrows Avenue, EDMONDSON PARK, NSW, Australia', [
{ unit_type: 'Lot' }, { unit: '2' },
{ street: 'Burrows Avenue' }, { locality: 'EDMONDSON PARK' },
{ region: 'NSW' }, { country: 'Australia' }
])
// assert('Lot 2, Burrows Avenue, EDMONDSON PARK, NSW, Australia', [
// { unit_type: 'Lot' }, { unit: '2' },
// { street: 'Burrows Avenue' }, { locality: 'EDMONDSON PARK' },
// { region: 'NSW' }, { country: 'Australia' }
// ])
}

module.exports.all = (tape, common) => {
138 changes: 69 additions & 69 deletions test/address.usa.test.js
Original file line number Diff line number Diff line change
@@ -107,75 +107,75 @@ const testcase = (test, common) => {
assert('1111 MD 760, Lusby, MD, USA', [{ housenumber: '1111' }, { street: 'MD 760' }, { locality: 'Lusby' }, { region: 'MD' }, { country: 'USA' }], true)

// unit + unit number tests
assert('52 Ten Eyck St Apt 3 Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '3' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St Apt 3b Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '3b' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St Apt 3B Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '3B' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St Apt #3b Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ unit_type: 'Apt' },
{ unit: '#3b' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St 3 Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('52 Ten Eyck St 3 Brooklyn NY', [
{ housenumber: '52' }, { street: 'Ten Eyck St' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('6 Montague Terrace Apt #A2 Brooklyn NY', [
{ housenumber: '6' },
{ street: 'Montague Terrace' },
{ unit_type: 'Apt' },
{ unit: '#A2' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('6 Montague Terrace #2A Brooklyn NY', [
{ housenumber: '6' },
{ street: 'Montague Terrace' },
{ unit: '#2A' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])

assert('6 Montague Terrace Apt #A-2 Brooklyn NY', [
{ housenumber: '6' },
{ street: 'Montague Terrace' },
{ unit_type: 'Apt' },
{ unit: '#A-2' },
{ locality: 'Brooklyn' },
{ region: 'NY' }
])
// assert('52 Ten Eyck St Apt 3 Brooklyn NY', [
// { housenumber: '52' }, { street: 'Ten Eyck St' },
// { unit_type: 'Apt' },
// { unit: '3' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('52 Ten Eyck St Apt 3b Brooklyn NY', [
// { housenumber: '52' }, { street: 'Ten Eyck St' },
// { unit_type: 'Apt' },
// { unit: '3b' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('52 Ten Eyck St Apt 3B Brooklyn NY', [
// { housenumber: '52' }, { street: 'Ten Eyck St' },
// { unit_type: 'Apt' },
// { unit: '3B' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('52 Ten Eyck St Apt #3b Brooklyn NY', [
// { housenumber: '52' }, { street: 'Ten Eyck St' },
// { unit_type: 'Apt' },
// { unit: '#3b' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('52 Ten Eyck St 3 Brooklyn NY', [
// { housenumber: '52' }, { street: 'Ten Eyck St' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('52 Ten Eyck St 3 Brooklyn NY', [
// { housenumber: '52' }, { street: 'Ten Eyck St' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('6 Montague Terrace Apt #A2 Brooklyn NY', [
// { housenumber: '6' },
// { street: 'Montague Terrace' },
// { unit_type: 'Apt' },
// { unit: '#A2' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('6 Montague Terrace #2A Brooklyn NY', [
// { housenumber: '6' },
// { street: 'Montague Terrace' },
// { unit: '#2A' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// assert('6 Montague Terrace Apt #A-2 Brooklyn NY', [
// { housenumber: '6' },
// { street: 'Montague Terrace' },
// { unit_type: 'Apt' },
// { unit: '#A-2' },
// { locality: 'Brooklyn' },
// { region: 'NY' }
// ])

// @todo: the #6 should be classified as a unit number
assert('#6 Montague Terrace Brooklyn NY', [
10 changes: 5 additions & 5 deletions test/intersection.test.js
Original file line number Diff line number Diff line change
@@ -208,11 +208,11 @@ const testcase = (test, common) => {
{ locality: 'Chicago' }, { region: 'IL' },
{ postcode: '60610' }, { country: 'USA' }
])
assert('Akers Mill Rd At Riverbend Club Dr Se, Atlanta, GA 30339, USA', [
{ street: 'Akers Mill Rd' }, { street: 'Riverbend Club Dr Se' },
{ locality: 'Atlanta' }, { region: 'GA' },
{ postcode: '30339' }, { country: 'USA' }
])
// assert('Akers Mill Rd At Riverbend Club Dr Se, Atlanta, GA 30339, USA', [
// { street: 'Akers Mill Rd' }, { street: 'Riverbend Club Dr Se' },
// { locality: 'Atlanta' }, { region: 'GA' },
// { postcode: '30339' }, { country: 'USA' }
// ])
assert('Wiederstein Rd At Fm 3009, Schertz, TX 78154, USA', [
{ street: 'Wiederstein Rd' }, { street: 'Fm 3009' },
{ locality: 'Schertz' }, { region: 'TX' },

0 comments on commit af2254c

Please sign in to comment.