Skip to content

Commit

Permalink
Add ignore_keywords flag to word delimiter graph (#5121)
Browse files Browse the repository at this point in the history
This introduces a new property for the word delimiter graph token filter
to configure ignoring of keywords.

It relates to this change elastic/elasticsearch#59563
  • Loading branch information
stevejgordon authored and github-actions[bot] committed Nov 26, 2020
1 parent 3113731 commit 19d4e0a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ public interface IWordDelimiterGraphTokenFilter : ITokenFilter
[JsonFormatter(typeof(NullableStringBooleanFormatter))]
bool? GenerateWordParts { get; set; }

/// <summary>
/// If true, the filter skips tokens with a keyword attribute of true. Defaults to false.
/// </summary>
[DataMember(Name = "ignore_keywords")]
[JsonFormatter(typeof(NullableStringBooleanFormatter))]
bool? IgnoreKeywords { get; set; }

/// <summary>
/// If true includes original words in subwords: "500-42" ⇒ "500-42" "500" "42". Defaults to false.
/// </summary>
Expand Down Expand Up @@ -133,6 +140,9 @@ public WordDelimiterGraphTokenFilter() : base("word_delimiter_graph") { }
/// <inheritdoc />
public bool? GenerateWordParts { get; set; }

/// <inheritdoc />
public bool? IgnoreKeywords { get; set; }

/// <inheritdoc />
public bool? PreserveOriginal { get; set; }

Expand Down Expand Up @@ -169,8 +179,8 @@ public class WordDelimiterGraphTokenFilterDescriptor
bool? IWordDelimiterGraphTokenFilter.CatenateWords { get; set; }
bool? IWordDelimiterGraphTokenFilter.GenerateNumberParts { get; set; }
bool? IWordDelimiterGraphTokenFilter.GenerateWordParts { get; set; }
bool? IWordDelimiterGraphTokenFilter.IgnoreKeywords { get; set; }
bool? IWordDelimiterGraphTokenFilter.PreserveOriginal { get; set; }

IEnumerable<string> IWordDelimiterGraphTokenFilter.ProtectedWords { get; set; }
string IWordDelimiterGraphTokenFilter.ProtectedWordsPath { get; set; }
bool? IWordDelimiterGraphTokenFilter.SplitOnCaseChange { get; set; }
Expand All @@ -187,6 +197,14 @@ public WordDelimiterGraphTokenFilterDescriptor GenerateWordParts(bool? generateW
public WordDelimiterGraphTokenFilterDescriptor GenerateNumberParts(bool? generateNumberParts = true) =>
Assign(generateNumberParts, (a, v) => a.GenerateNumberParts = v);

/// <summary>
/// <para>Configure whether the filter will skip tokens with a keyword attribute of true.</para>
/// <para>(Optional) When not configured, this defaults to false in Elasticsearch.</para>
/// </summary>
/// <param name="ignoreKeywords">If true, the filter skips tokens with a keyword attribute of true.</param>
public WordDelimiterGraphTokenFilterDescriptor IgnoreKeywords(bool? ignoreKeywords = true) =>
Assign(ignoreKeywords, (a, v) => a.IgnoreKeywords = v);

/// <inheritdoc />
public WordDelimiterGraphTokenFilterDescriptor CatenateWords(bool? catenateWords = true) => Assign(catenateWords, (a, v) => a.CatenateWords = v);

Expand Down
3 changes: 3 additions & 0 deletions tests/Tests/Analysis/TokenFilters/TokenFilterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
.CatenateWords()
.GenerateNumberParts()
.GenerateWordParts()
.IgnoreKeywords()
.PreserveOriginal()
.ProtectedWords("x", "y", "z")
.SplitOnCaseChange()
Expand All @@ -939,6 +940,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
CatenateWords = true,
GenerateNumberParts = true,
GenerateWordParts = true,
IgnoreKeywords = true,
PreserveOriginal = true,
ProtectedWords = new[] { "x", "y", "z" },
SplitOnCaseChange = true,
Expand All @@ -952,6 +954,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
adjust_offsets = true,
generate_word_parts = true,
generate_number_parts = true,
ignore_keywords = true,
catenate_words = true,
catenate_numbers = true,
catenate_all = true,
Expand Down
3 changes: 3 additions & 0 deletions tests/Tests/Analysis/TokenFilters/TokenFilterUsageTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ public class TokenFilterUsageTests : PromiseUsageTestBase<IIndexSettings, IndexS
.CatenateWords()
.GenerateNumberParts()
.GenerateWordParts()
.IgnoreKeywords()
.PreserveOriginal()
.ProtectedWords("x", "y", "z")
.SplitOnCaseChange()
Expand Down Expand Up @@ -301,6 +302,7 @@ public class TokenFilterUsageTests : PromiseUsageTestBase<IIndexSettings, IndexS
CatenateWords = true,
GenerateNumberParts = true,
GenerateWordParts = true,
IgnoreKeywords = true,
PreserveOriginal = true,
ProtectedWords = new[] { "x", "y", "z" },
SplitOnCaseChange = true,
Expand Down Expand Up @@ -624,6 +626,7 @@ public class TokenFilterUsageTests : PromiseUsageTestBase<IIndexSettings, IndexS
type = "word_delimiter_graph",
generate_word_parts = true,
generate_number_parts = true,
ignore_keywords = true,
catenate_words = true,
catenate_numbers = true,
catenate_all = true,
Expand Down

0 comments on commit 19d4e0a

Please sign in to comment.