Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elasticsearch2 #138

Merged
merged 8 commits into from
Jun 30, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ env:
- CXX=g++-4.8
matrix:
- TEST_SUITE=test
- TEST_SUITE=integration
script: "npm run $TEST_SUITE"
addons:
apt:
Expand Down
26 changes: 25 additions & 1 deletion integration/analyzer_peliasIndexOneEdgeGram.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ module.exports.tests.analyze = function(test, common){

assertAnalysis( 'peliasIndexOneEdgeGramFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] );

assertAnalysis( 'unique', '1 1 1', ['1'] );
assertAnalysis( 'notnull', ' / / ', [] );

Expand Down Expand Up @@ -119,7 +120,30 @@ module.exports.tests.functional = function(test, common){
]);

assertAnalysis( 'address', '101 mapzen place', [
'1', '10', '101', 'm', 'ma', 'map', 'mapz', 'mapze', 'mapzen', 'p', 'pl', 'pla', 'plac', 'place'
'101', 'm', 'ma', 'map', 'mapz', 'mapze', 'mapzen', 'p', 'pl', 'pla', 'plac', 'place'
]);

suite.run( t.end );
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexOneEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'm', 'ma', 'map', 'mapz', 'mapze', 'mapzen', 'p', 'pl', 'pla', 'plac', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'w', 'we', 'wes', 'west', '26', 's', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 's', 'st'
]);

suite.run( t.end );
Expand Down
23 changes: 23 additions & 0 deletions integration/analyzer_peliasQueryFullToken.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,29 @@ module.exports.tests.slop = function(test, common){
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasQueryFullToken' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'west', '26', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 'st'
]);

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
Expand Down
23 changes: 23 additions & 0 deletions integration/analyzer_peliasQueryPartialToken.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ module.exports.tests.functional = function(test, common){
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasQueryPartialToken' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'w', '26', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 'st'
]);

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
Expand Down
2 changes: 1 addition & 1 deletion integration/dynamic_templates.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module.exports.tests = {};
// 'admin' mappings have a different 'name' dynamic_template to the other types
module.exports.tests.dynamic_templates_name = function(test, common){
test( 'admin->name', nameAssertion( 'country', 'peliasIndexOneEdgeGram' ) );
test( 'document->name', nameAssertion( 'myType', 'peliasIndexTwoEdgeGram' ) );
test( 'document->name', nameAssertion( 'myType', 'peliasIndexOneEdgeGram' ) );
};

// all types share the same phrase mapping
Expand Down
8 changes: 4 additions & 4 deletions integration/validate.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ module.exports.tests = {};

module.exports.tests.validate = function(test, common){
test( 'schema', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );

suite.assert( function( done ){
suite.client.info({}, function( err, res ){
t.equal( res.status, 200 );
suite.client.info({}, function( err, res, status ){
t.equal( status, 200 );
done();
});
});
Expand All @@ -33,4 +33,4 @@ module.exports.all = function (tape, common) {
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
};
15 changes: 5 additions & 10 deletions mappings/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,19 @@ var schema = {
properties: {
name: {
type: 'string',
index_analyzer: 'keyword',
search_analyzer: 'keyword'
analyzer: 'keyword',
},
number: {
type: 'string',
index_analyzer: 'peliasHousenumber',
search_analyzer: 'peliasHousenumber'
analyzer: 'peliasHousenumber',
},
street: {
type: 'string',
index_analyzer: 'peliasStreet',
search_analyzer: 'peliasStreet'
analyzer: 'peliasStreet',
},
zip: {
type: 'string',
index_analyzer: 'peliasZip',
search_analyzer: 'peliasZip'
analyzer: 'peliasZip',
}
}
},
Expand Down Expand Up @@ -115,9 +111,8 @@ var schema = {
match_mapping_type: 'string',
mapping: {
type: 'string',
analyzer: 'peliasIndexTwoEdgeGram',
analyzer: 'peliasIndexOneEdgeGram',
fielddata : {
format : 'fst',
loading: 'eager_global_ordinals'
}
}
Expand Down
9 changes: 2 additions & 7 deletions mappings/partial/centroid.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@ var schema = {
/* store geohashes (with prefixes) in order to facilitate the geohash_cell filter */
'geohash': true,
'geohash_prefix': true,
'geohash_precision': 18,

/* eager loading should be enabled to prevent cold starts */
'fielddata' : {
'loading': 'eager_global_ordinals'
}
'geohash_precision': 18
};

module.exports = schema;
module.exports = schema;
5 changes: 2 additions & 3 deletions mappings/partial/literal.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"type": "string",
"index_analyzer": "keyword",
"search_analyzer": "keyword",
"analyzer": "keyword",
"store": "yes"
}
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"dependencies": {
"colors": "^1.1.2",
"mergeable": "latest",
"pelias-config": "latest"
"pelias-config": "~2.0.0"
},
"devDependencies": {
"difflet": "^1.0.1",
Expand Down
46 changes: 13 additions & 33 deletions schema.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,5 @@
var doc = require('./mappings/document');

var oneGramMapping = {
dynamic_templates: [{
nameGram: {
path_match: 'name.*',
match_mapping_type: 'string',
mapping: {
type: 'string',
analyzer: 'peliasIndexOneEdgeGram',
fielddata : {
format : 'fst',
loading: 'eager_global_ordinals'
}
}
}
}]
};

var schema = {
settings: require('./settings')(),
mappings: {
Expand All @@ -28,25 +11,22 @@ var schema = {

/**
these `_type`s are created when the index is created, while all other `_type`
are dynamically created as required at run time, this served two purposes:
are dynamically created as required at run time due to:

1) creating at least one _type will avoid errors when searching against
an empty database. Having at least one _type means that 0 documents are
returned instead of a error from elasticsearch.
creating at least one _type will avoid errors when searching against
an empty database. Having at least one _type means that 0 documents are
returned instead of a error from elasticsearch.

2) allows us to define their analysis differently from the other `_type`s.
in this case, we will elect to use the $oneGramMapping so that these
_type can be searched with a single character. doing so on *all* _type
would result in much larger indeces and decreased search performance.
querying against non-existant _types will result in errors.
**/
country: oneGramMapping,
macroregion: oneGramMapping,
region: oneGramMapping,
macrocounty: oneGramMapping,
county: oneGramMapping,
localadmin: oneGramMapping,
locality: oneGramMapping,
borough: oneGramMapping
country: doc,
macroregion: doc,
region: doc,
macrocounty: doc,
county: doc,
localadmin: doc,
locality: doc,
borough: doc
}
};

Expand Down
3 changes: 3 additions & 0 deletions scripts/info.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

var client = require('pelias-esclient')();
client.info( {}, console.log.bind(console) );
47 changes: 47 additions & 0 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,13 @@ function generate(){
"ampersand",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"surround_single_characters_with_word_markers",
"house_number_word_delimiter",
"remove_single_characters",
"surround_house_numbers_with_word_markers",
"peliasOneEdgeGramFilter",
"eliminate_tokens_starting_with_word_marker",
"remove_encapsulating_word_markers",
"unique",
"notnull"
]
Expand Down Expand Up @@ -213,7 +219,48 @@ function generate(){
"type" : "pattern_replace",
"pattern": " +",
"replacement": " "
},

// START OF COMPLICATED FILTERS TO ANALYZE HOUSE NUMBERS
// @see: https://github.com/pelias/schema/pull/133
// note: we use \x02 (start-of-text) and \x03 (end-of-text) characters to mark word borders
"surround_single_characters_with_word_markers":{
"description": "wraps single characters with markers, needed to protect valid single characters and not those extracted from house numbers (14a creates an 'a' token)",
"type": "pattern_replace",
"pattern": "^(.{1})$",
"replacement": "\x02$1\x03"
},
"house_number_word_delimiter": {
"description": "splits on letter-to-number transition and vice versa, splits 14a -> [14, 14a, a]",
"type": "word_delimiter",
"split_on_numerics": "true",
"preserve_original": "true"
},
"remove_single_characters": {
"description": "removes single characters created from house_number_word_delimiter, removes the letter portion of a house number",
"type": "length",
"min": 2
},
"surround_house_numbers_with_word_markers": {
"description": "surrounds house numbers with markers, needed to protect whole house numbers from elimination step after prefix n-gramming",
"type": "pattern_replace",
"pattern": "^([0-9]+[a-z]?)$",
"replacement": "\x02$1\x03"
},
"eliminate_tokens_starting_with_word_marker": {
"description": "remove tokens starting but not ending with markers, saves whole house numbers wrapped in markers",
"type": "pattern_replace",
"pattern": "^\x02(.*[^\x03])?$",
"replacement": ""
},
"remove_encapsulating_word_markers": {
"description": "extract the stuff between the markers, extract 14 from \x0214\x03 since we're done the prefix n-gramming step",
"type": "pattern_replace",
"pattern": "^\x02(.*)\x03$",
"replacement": "$1"
}
// END OF COMPLICATED FILTERS TO ANALYZE HOUSE NUMBERS

// more generated below
},
"char_filter": {
Expand Down
5 changes: 2 additions & 3 deletions test/compile.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module.exports.tests.compile = function(test, common) {
module.exports.tests.indeces = function(test, common) {
test('contains "_default_" index definition', function(t) {
t.equal(typeof schema.mappings._default_, 'object', 'mappings present');
t.equal(schema.mappings._default_.dynamic_templates[0].nameGram.mapping.analyzer, 'peliasIndexTwoEdgeGram');
t.equal(schema.mappings._default_.dynamic_templates[0].nameGram.mapping.analyzer, 'peliasIndexOneEdgeGram');
t.end();
});
test('explicitly specify some admin indeces and their analyzer', function(t) {
Expand All @@ -44,7 +44,6 @@ module.exports.tests.dynamic_templates = function(test, common) {
type: 'string',
analyzer: 'peliasIndexOneEdgeGram',
fielddata: {
format: 'fst',
loading: 'eager_global_ordinals'
}
});
Expand All @@ -66,7 +65,7 @@ module.exports.tests.current_schema = function(test, common) {
delete process.env.PELIAS_CONFIG;

// code intentionally commented to allow quick debugging of expected.json
// common.diff(fixture, schemaCopy);
// common.diff(schemaCopy, fixture);

t.deepEqual(schemaCopy, fixture);
t.end();
Expand Down
Loading