diff --git a/src/main/java/de/komoot/photon/App.java b/src/main/java/de/komoot/photon/App.java index 3ba382da4..33868a6c3 100644 --- a/src/main/java/de/komoot/photon/App.java +++ b/src/main/java/de/komoot/photon/App.java @@ -65,7 +65,7 @@ public static void main(String[] rawArgs) throws Exception { // Working on an existing installation. // Update the index settings in case there are any changes. - esServer.updateIndexSettings(); + esServer.updateIndexSettings(args.getSynonymFile()); esClient.admin().cluster().prepareHealth().setWaitForYellowStatus().get(); if (args.isNominatimUpdate()) { diff --git a/src/main/java/de/komoot/photon/CommandLineArgs.java b/src/main/java/de/komoot/photon/CommandLineArgs.java index d7a7bb6f5..9fd3ed453 100644 --- a/src/main/java/de/komoot/photon/CommandLineArgs.java +++ b/src/main/java/de/komoot/photon/CommandLineArgs.java @@ -35,6 +35,9 @@ public class CommandLineArgs { @Parameter(names = "-extra-tags", description = "additional tags to save for each place") private String extraTags = ""; + @Parameter(names = "-synonym-file", description = "list of synonyms to apply at query time") + private String synonymFile = null; + @Parameter(names = "-json", description = "import nominatim database and dump it to a json like files in (useful for developing)") private String jsonDump = null; diff --git a/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java b/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java index 84f1a65f8..b077cc132 100644 --- a/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java +++ b/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java @@ -2,9 +2,13 @@ import org.elasticsearch.client.Client; import org.elasticsearch.common.xcontent.XContentType; +import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; import java.io.InputStream; /** @@ -41,6 +45,45 @@ public IndexSettings setShards(Integer numShards) { return this; } + /** + * Add query-time synonyms to the search analyzer. + * + * Synonyms need to be supplied in a simple text file with one synonym entry per line. + * Synonyms need to be comma-separated. Only single-term synonyms are supported at this + * time. Spaces in the synonym list are considered a syntax error. + * + * @param synonymFile File containing the synonyms. + * + * @return This object for chaining. + */ + public IndexSettings setSynonyms(String synonymFile) throws IOException { + if (synonymFile == null) { + return this; + } + + insertJsonArrayAfter(settings, "/analysis/analyzer/search_ngram/filter", "lowercase", "extra_synonyms"); + insertJsonArrayAfter(settings, "/analysis/analyzer/search_raw/filter", "lowercase", "extra_synonyms"); + + BufferedReader br = new BufferedReader(new FileReader(synonymFile)); + + JSONArray synonyms = new JSONArray(); + String line; + while ((line = br.readLine()) != null) { + if (line.indexOf(' ') >= 0) { + throw new RuntimeException("Synonym list must not contain any spaces or multi word terms."); + } + synonyms.put(line.toLowerCase()); + } + + JSONObject filters = (JSONObject) settings.optQuery("/analysis/filter"); + if (filters == null) { + throw new RuntimeException("Analyser update: cannot find filter definition"); + } + filters.put("extra_synonyms", new JSONObject().put("type", "synonym").put("synonyms", synonyms)); + + return this; + } + /** * Create a new index using the current index settings. * @@ -65,4 +108,29 @@ public void updateIndex(Client client, String indexName) { client.admin().indices().prepareUpdateSettings(PhotonIndex.NAME).setSettings(settings.toString(), XContentType.JSON).execute().actionGet(); client.admin().indices().prepareOpen(PhotonIndex.NAME).execute().actionGet(); } + + /** + * Insert the given value into the array after the string given by positionString. + * If the position string is not found, throws a runtime error. + * + * @param obj JSON object to insert into. + * @param jsonPointer Path description of the array to insert into. + * @param positionString Marker string after which to insert. + * @param value Value to insert. + */ + private void insertJsonArrayAfter(JSONObject obj, String jsonPointer, String positionString, String value) { + JSONArray array = (JSONArray) obj.optQuery(jsonPointer); + if (array == null) { + throw new RuntimeException("Analyser update: cannot find JSON array at" + jsonPointer); + } + + for (int i = 0; i < array.length(); i++) { + if (positionString.equals(array.getString(i))) { + array.put(i + 1, value); + return; + } + } + + throw new RuntimeException("Analyser update: cannot find position string " + positionString); + } } diff --git a/src/main/java/de/komoot/photon/elasticsearch/Server.java b/src/main/java/de/komoot/photon/elasticsearch/Server.java index 9cdf45474..0919cad17 100644 --- a/src/main/java/de/komoot/photon/elasticsearch/Server.java +++ b/src/main/java/de/komoot/photon/elasticsearch/Server.java @@ -178,14 +178,14 @@ public DatabaseProperties recreateIndex(String[] languages) throws IOException { return dbProperties; } - public void updateIndexSettings() { + public void updateIndexSettings(String synonymFile) throws IOException { // Load the settings from the database to make sure it is at the right // version. If the version is wrong, we should not be messing with the // index. DatabaseProperties dbProperties = new DatabaseProperties(); dbProperties.loadFromDatabase(getClient()); - loadIndexSettings().updateIndex(getClient(), PhotonIndex.NAME); + loadIndexSettings().setSynonyms(synonymFile).updateIndex(getClient(), PhotonIndex.NAME); // Sanity check: legacy databases don't save the languages, so there is no way to update // the mappings consistently.