From 434a8b09c32dfba7c071ced97846819ab2c3b83f Mon Sep 17 00:00:00 2001 From: Joxit Date: Mon, 9 Sep 2019 11:29:22 +0200 Subject: [PATCH] feat(hyphen): Add hyphen in pelias `peliasNameTokenizer` and `peliasStreetTokenizer` --- integration/analyzer_peliasIndexOneEdgeGram.js | 4 ++-- integration/analyzer_peliasPhrase.js | 2 +- integration/analyzer_peliasQuery.js | 2 +- integration/analyzer_peliasQueryFullToken.js | 2 +- integration/analyzer_peliasQueryPartialToken.js | 2 +- integration/analyzer_peliasStreet.js | 2 +- settings.js | 4 ++-- test/fixtures/expected.json | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/integration/analyzer_peliasIndexOneEdgeGram.js b/integration/analyzer_peliasIndexOneEdgeGram.js index 879e9817..649ed1b3 100644 --- a/integration/analyzer_peliasIndexOneEdgeGram.js +++ b/integration/analyzer_peliasIndexOneEdgeGram.js @@ -45,7 +45,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'keyword_street_suffix', 'ctr', ['c', 'ct', 'ctr', 'ce', 'cen', 'cent', 'cente', 'center'] ); assertAnalysis( 'peliasIndexOneEdgeGramFilter', '1 a ab abc abcdefghij', [ - '1', 'a', 'a', 'ab', 'a', 'ab', 'abc', 'a', 'ab', 'abc', + '1', 'a', 'a', 'ab', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef', 'abcdefg', 'abcdefgh', 'abcdefghi', 'abcdefghij' ] ); assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] ); @@ -58,7 +58,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'no kstem', 'peoples', ['p', 'pe', 'peo', 'peop', 'peopl', 'people', 'peoples'] ); // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), ['-','-&'] ); + assertAnalysis( 'punctuation', punctuation.all.join(''), ['&', 'a', 'an', 'and', 'u', 'un', 'und'] ); assertAnalysis( 'punctuation', 'Hawai‘i', ['h', 'ha', 'haw', 'hawa', 'hawai', 'hawaii'] ); // ensure that very large grams are created diff --git a/integration/analyzer_peliasPhrase.js b/integration/analyzer_peliasPhrase.js index b825b0ad..a8213afe 100644 --- a/integration/analyzer_peliasPhrase.js +++ b/integration/analyzer_peliasPhrase.js @@ -45,7 +45,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'stem direction synonyms', '20 bear road northeast', ['0:20', '1:bear', '2:road', '2:rd', '3:northeast', '3:ne'], true ); // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), [ '-&' ] ); + assertAnalysis( 'punctuation', punctuation.all.join(''), ['&', 'and', 'und'] ); assertAnalysis( 'punctuation', 'Hawai‘i', ['hawaii'] ); suite.run( t.end ); diff --git a/integration/analyzer_peliasQuery.js b/integration/analyzer_peliasQuery.js index c18e5466..4280cf67 100644 --- a/integration/analyzer_peliasQuery.js +++ b/integration/analyzer_peliasQuery.js @@ -33,7 +33,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis('no kstem', 'peoples', ['peoples']); // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), ['-&'] ); + assertAnalysis( 'punctuation', punctuation.all.join(''), ['&'] ); suite.run( t.end ); }); diff --git a/integration/analyzer_peliasQueryFullToken.js b/integration/analyzer_peliasQueryFullToken.js index 2eca69ef..c08b2c5e 100644 --- a/integration/analyzer_peliasQueryFullToken.js +++ b/integration/analyzer_peliasQueryFullToken.js @@ -43,7 +43,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'no kstem', 'peoples', ['peoples'] ); // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), ['-&'] ); + assertAnalysis( 'punctuation', punctuation.all.join(''), ['&', 'and', 'und'] ); // ensure that very large tokens are created assertAnalysis( 'largeGrams', 'grolmanstrasse', [ 'grolmanstrasse' ]); diff --git a/integration/analyzer_peliasQueryPartialToken.js b/integration/analyzer_peliasQueryPartialToken.js index 5521bd0c..f08220ca 100644 --- a/integration/analyzer_peliasQueryPartialToken.js +++ b/integration/analyzer_peliasQueryPartialToken.js @@ -40,7 +40,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'no kstem', 'peoples', ['peoples'] ); // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), ['-&'] ); + assertAnalysis( 'punctuation', punctuation.all.join(''), ['&', 'and', 'und'] ); // ensure that very large grams are created assertAnalysis( 'largeGrams', 'grolmanstrasse', ['grolmanstrasse']); diff --git a/integration/analyzer_peliasStreet.js b/integration/analyzer_peliasStreet.js index 1ab89f14..09f1e9a6 100644 --- a/integration/analyzer_peliasStreet.js +++ b/integration/analyzer_peliasStreet.js @@ -15,7 +15,7 @@ module.exports.tests.analyze = function(test, common){ suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up assertAnalysis( 'lowercase', 'F', ['f']); - assertAnalysis( 'asciifolding', 'Max-Beer-Straße', ['max-beer-strasse']); + assertAnalysis( 'asciifolding', 'Max-Beer-Straße', ['max', 'beer', 'strasse', 'str']); assertAnalysis( 'trim', ' f ', ['f'] ); assertAnalysis( 'keyword_street_suffix', 'foo Street', ['0:foo', '1:street', '1:st'], true ); assertAnalysis( 'keyword_street_suffix', 'foo Road', ['0:foo', '1:road', '1:rd'], true ); diff --git a/settings.js b/settings.js index b6c14f6a..1385d653 100644 --- a/settings.js +++ b/settings.js @@ -27,11 +27,11 @@ function generate(){ "tokenizer": { "peliasNameTokenizer": { "type": "pattern", - "pattern": "[\\s,/\\\\]+" + "pattern": "[\\s,/\\\\-]+" }, "peliasStreetTokenizer": { "type": "pattern", - "pattern": "[\\s,/\\\\]+" + "pattern": "[\\s,/\\\\-]+" } }, "analyzer": { diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 5d03242b..8fe7dda4 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -4,11 +4,11 @@ "tokenizer": { "peliasNameTokenizer": { "type": "pattern", - "pattern": "[\\s,/\\\\]+" + "pattern": "[\\s,/\\\\-]+" }, "peliasStreetTokenizer": { "type": "pattern", - "pattern": "[\\s,/\\\\]+" + "pattern": "[\\s,/\\\\-]+" } }, "analyzer": {