Skip to content

Commit

Permalink
Merge pull request #138 from pelias/elasticsearch2
Browse files Browse the repository at this point in the history
Elasticsearch2
  • Loading branch information
orangejulius authored Jun 30, 2016
2 parents 1254169 + 981ac42 commit ba0fc7d
Show file tree
Hide file tree
Showing 19 changed files with 2,263 additions and 197 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ env:
- CXX=g++-4.8
matrix:
- TEST_SUITE=test
- TEST_SUITE=integration
script: "npm run $TEST_SUITE"
addons:
apt:
Expand Down
26 changes: 25 additions & 1 deletion integration/analyzer_peliasIndexOneEdgeGram.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ module.exports.tests.analyze = function(test, common){

assertAnalysis( 'peliasIndexOneEdgeGramFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] );

assertAnalysis( 'unique', '1 1 1', ['1'] );
assertAnalysis( 'notnull', ' / / ', [] );

Expand Down Expand Up @@ -119,7 +120,30 @@ module.exports.tests.functional = function(test, common){
]);

assertAnalysis( 'address', '101 mapzen place', [
'1', '10', '101', 'm', 'ma', 'map', 'mapz', 'mapze', 'mapzen', 'p', 'pl', 'pla', 'plac', 'place'
'101', 'm', 'ma', 'map', 'mapz', 'mapze', 'mapzen', 'p', 'pl', 'pla', 'plac', 'place'
]);

suite.run( t.end );
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexOneEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'm', 'ma', 'map', 'mapz', 'mapze', 'mapzen', 'p', 'pl', 'pla', 'plac', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'w', 'we', 'wes', 'west', '26', 's', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 's', 'st'
]);

suite.run( t.end );
Expand Down
23 changes: 23 additions & 0 deletions integration/analyzer_peliasQueryFullToken.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,29 @@ module.exports.tests.slop = function(test, common){
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasQueryFullToken' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'west', '26', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 'st'
]);

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
Expand Down
23 changes: 23 additions & 0 deletions integration/analyzer_peliasQueryPartialToken.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ module.exports.tests.functional = function(test, common){
});
};

module.exports.tests.address = function(test, common){
test( 'address', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasQueryPartialToken' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'address', '101 mapzen place', [
'101', 'mapzen', 'place'
]);

assertAnalysis( 'address', '30 w 26 st', [
'30', 'w', '26', 'st'
]);

assertAnalysis( 'address', '4B 921 83 st', [
'4b', '921', '83', 'st'
]);

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
Expand Down
2 changes: 1 addition & 1 deletion integration/dynamic_templates.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module.exports.tests = {};
// 'admin' mappings have a different 'name' dynamic_template to the other types
module.exports.tests.dynamic_templates_name = function(test, common){
test( 'admin->name', nameAssertion( 'country', 'peliasIndexOneEdgeGram' ) );
test( 'document->name', nameAssertion( 'myType', 'peliasIndexTwoEdgeGram' ) );
test( 'document->name', nameAssertion( 'myType', 'peliasIndexOneEdgeGram' ) );
};

// all types share the same phrase mapping
Expand Down
8 changes: 4 additions & 4 deletions integration/validate.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ module.exports.tests = {};

module.exports.tests.validate = function(test, common){
test( 'schema', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );

suite.assert( function( done ){
suite.client.info({}, function( err, res ){
t.equal( res.status, 200 );
suite.client.info({}, function( err, res, status ){
t.equal( status, 200 );
done();
});
});
Expand All @@ -33,4 +33,4 @@ module.exports.all = function (tape, common) {
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
};
15 changes: 5 additions & 10 deletions mappings/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,19 @@ var schema = {
properties: {
name: {
type: 'string',
index_analyzer: 'keyword',
search_analyzer: 'keyword'
analyzer: 'keyword',
},
number: {
type: 'string',
index_analyzer: 'peliasHousenumber',
search_analyzer: 'peliasHousenumber'
analyzer: 'peliasHousenumber',
},
street: {
type: 'string',
index_analyzer: 'peliasStreet',
search_analyzer: 'peliasStreet'
analyzer: 'peliasStreet',
},
zip: {
type: 'string',
index_analyzer: 'peliasZip',
search_analyzer: 'peliasZip'
analyzer: 'peliasZip',
}
}
},
Expand Down Expand Up @@ -115,9 +111,8 @@ var schema = {
match_mapping_type: 'string',
mapping: {
type: 'string',
analyzer: 'peliasIndexTwoEdgeGram',
analyzer: 'peliasIndexOneEdgeGram',
fielddata : {
format : 'fst',
loading: 'eager_global_ordinals'
}
}
Expand Down
9 changes: 2 additions & 7 deletions mappings/partial/centroid.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@ var schema = {
/* store geohashes (with prefixes) in order to facilitate the geohash_cell filter */
'geohash': true,
'geohash_prefix': true,
'geohash_precision': 18,

/* eager loading should be enabled to prevent cold starts */
'fielddata' : {
'loading': 'eager_global_ordinals'
}
'geohash_precision': 18
};

module.exports = schema;
module.exports = schema;
5 changes: 2 additions & 3 deletions mappings/partial/literal.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"type": "string",
"index_analyzer": "keyword",
"search_analyzer": "keyword",
"analyzer": "keyword",
"store": "yes"
}
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"dependencies": {
"colors": "^1.1.2",
"mergeable": "latest",
"pelias-config": "latest"
"pelias-config": "~2.0.0"
},
"devDependencies": {
"difflet": "^1.0.1",
Expand Down
46 changes: 13 additions & 33 deletions schema.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,5 @@
var doc = require('./mappings/document');

var oneGramMapping = {
dynamic_templates: [{
nameGram: {
path_match: 'name.*',
match_mapping_type: 'string',
mapping: {
type: 'string',
analyzer: 'peliasIndexOneEdgeGram',
fielddata : {
format : 'fst',
loading: 'eager_global_ordinals'
}
}
}
}]
};

var schema = {
settings: require('./settings')(),
mappings: {
Expand All @@ -28,25 +11,22 @@ var schema = {

/**
these `_type`s are created when the index is created, while all other `_type`
are dynamically created as required at run time, this served two purposes:
are dynamically created as required at run time due to:
1) creating at least one _type will avoid errors when searching against
an empty database. Having at least one _type means that 0 documents are
returned instead of a error from elasticsearch.
creating at least one _type will avoid errors when searching against
an empty database. Having at least one _type means that 0 documents are
returned instead of a error from elasticsearch.
2) allows us to define their analysis differently from the other `_type`s.
in this case, we will elect to use the $oneGramMapping so that these
_type can be searched with a single character. doing so on *all* _type
would result in much larger indeces and decreased search performance.
querying against non-existant _types will result in errors.
**/
country: oneGramMapping,
macroregion: oneGramMapping,
region: oneGramMapping,
macrocounty: oneGramMapping,
county: oneGramMapping,
localadmin: oneGramMapping,
locality: oneGramMapping,
borough: oneGramMapping
country: doc,
macroregion: doc,
region: doc,
macrocounty: doc,
county: doc,
localadmin: doc,
locality: doc,
borough: doc
}
};

Expand Down
3 changes: 3 additions & 0 deletions scripts/info.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

var client = require('pelias-esclient')();
client.info( {}, console.log.bind(console) );
47 changes: 47 additions & 0 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,13 @@ function generate(){
"ampersand",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"surround_single_characters_with_word_markers",
"house_number_word_delimiter",
"remove_single_characters",
"surround_house_numbers_with_word_markers",
"peliasOneEdgeGramFilter",
"eliminate_tokens_starting_with_word_marker",
"remove_encapsulating_word_markers",
"unique",
"notnull"
]
Expand Down Expand Up @@ -213,7 +219,48 @@ function generate(){
"type" : "pattern_replace",
"pattern": " +",
"replacement": " "
},

// START OF COMPLICATED FILTERS TO ANALYZE HOUSE NUMBERS
// @see: https://github.com/pelias/schema/pull/133
// note: we use \x02 (start-of-text) and \x03 (end-of-text) characters to mark word borders
"surround_single_characters_with_word_markers":{
"description": "wraps single characters with markers, needed to protect valid single characters and not those extracted from house numbers (14a creates an 'a' token)",
"type": "pattern_replace",
"pattern": "^(.{1})$",
"replacement": "\x02$1\x03"
},
"house_number_word_delimiter": {
"description": "splits on letter-to-number transition and vice versa, splits 14a -> [14, 14a, a]",
"type": "word_delimiter",
"split_on_numerics": "true",
"preserve_original": "true"
},
"remove_single_characters": {
"description": "removes single characters created from house_number_word_delimiter, removes the letter portion of a house number",
"type": "length",
"min": 2
},
"surround_house_numbers_with_word_markers": {
"description": "surrounds house numbers with markers, needed to protect whole house numbers from elimination step after prefix n-gramming",
"type": "pattern_replace",
"pattern": "^([0-9]+[a-z]?)$",
"replacement": "\x02$1\x03"
},
"eliminate_tokens_starting_with_word_marker": {
"description": "remove tokens starting but not ending with markers, saves whole house numbers wrapped in markers",
"type": "pattern_replace",
"pattern": "^\x02(.*[^\x03])?$",
"replacement": ""
},
"remove_encapsulating_word_markers": {
"description": "extract the stuff between the markers, extract 14 from \x0214\x03 since we're done the prefix n-gramming step",
"type": "pattern_replace",
"pattern": "^\x02(.*)\x03$",
"replacement": "$1"
}
// END OF COMPLICATED FILTERS TO ANALYZE HOUSE NUMBERS

// more generated below
},
"char_filter": {
Expand Down
5 changes: 2 additions & 3 deletions test/compile.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module.exports.tests.compile = function(test, common) {
module.exports.tests.indeces = function(test, common) {
test('contains "_default_" index definition', function(t) {
t.equal(typeof schema.mappings._default_, 'object', 'mappings present');
t.equal(schema.mappings._default_.dynamic_templates[0].nameGram.mapping.analyzer, 'peliasIndexTwoEdgeGram');
t.equal(schema.mappings._default_.dynamic_templates[0].nameGram.mapping.analyzer, 'peliasIndexOneEdgeGram');
t.end();
});
test('explicitly specify some admin indeces and their analyzer', function(t) {
Expand All @@ -44,7 +44,6 @@ module.exports.tests.dynamic_templates = function(test, common) {
type: 'string',
analyzer: 'peliasIndexOneEdgeGram',
fielddata: {
format: 'fst',
loading: 'eager_global_ordinals'
}
});
Expand All @@ -66,7 +65,7 @@ module.exports.tests.current_schema = function(test, common) {
delete process.env.PELIAS_CONFIG;

// code intentionally commented to allow quick debugging of expected.json
// common.diff(fixture, schemaCopy);
// common.diff(schemaCopy, fixture);

t.deepEqual(schemaCopy, fixture);
t.end();
Expand Down
Loading

0 comments on commit ba0fc7d

Please sign in to comment.