-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Source filtering should treat dots in field names as sub objects. #20736
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,12 +19,17 @@ | |
|
||
package org.elasticsearch.common.xcontent.support; | ||
|
||
import org.apache.lucene.util.automaton.Automata; | ||
import org.apache.lucene.util.automaton.Automaton; | ||
import org.apache.lucene.util.automaton.CharacterRunAutomaton; | ||
import org.apache.lucene.util.automaton.Operations; | ||
import org.elasticsearch.ElasticsearchParseException; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.regex.Regex; | ||
import org.elasticsearch.common.unit.TimeValue; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -134,115 +139,171 @@ private static Object extractValue(String[] pathElements, int index, Object curr | |
return null; | ||
} | ||
|
||
public static Map<String, Object> filter(Map<String, Object> map, String[] includes, String[] excludes) { | ||
Map<String, Object> result = new HashMap<>(); | ||
filter(map, result, includes == null ? Strings.EMPTY_ARRAY : includes, excludes == null ? Strings.EMPTY_ARRAY : excludes, new StringBuilder()); | ||
return result; | ||
/** | ||
* Only keep properties in {@code map} that match the {@code includes} but | ||
* not the {@code excludes}. An empty list of includes is interpreted as a | ||
* wildcard while an empty list of excludes does not match anything. | ||
* | ||
* If a property matches both an include and an exclude, then the exclude | ||
* wins. | ||
* | ||
* If an object matches, then any of its sub properties are automatically | ||
* considered as matching as well, both for includes and excludes. | ||
* | ||
* Dots in field names are treated as sub objects. So for instance if a | ||
* document contains {@code a.b} as a property and {@code a} is an include, | ||
* then {@code a.b} will be kept in the filtered map. | ||
*/ | ||
public static Map<String, Object> filter(Map<String, ?> map, String[] includes, String[] excludes) { | ||
CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString()); | ||
|
||
CharacterRunAutomaton include; | ||
if (includes == null || includes.length == 0) { | ||
include = matchAllAutomaton; | ||
} else { | ||
Automaton includeA = Regex.simpleMatchToAutomaton(includes); | ||
includeA = makeMatchDotsInFieldNames(includeA); | ||
include = new CharacterRunAutomaton(includeA); | ||
} | ||
|
||
Automaton excludeA; | ||
if (excludes == null || excludes.length == 0) { | ||
excludeA = Automata.makeEmpty(); | ||
} else { | ||
excludeA = Regex.simpleMatchToAutomaton(excludes); | ||
excludeA = makeMatchDotsInFieldNames(excludeA); | ||
} | ||
CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA); | ||
|
||
// NOTE: We cannot use Operations.minus because of the special case that | ||
// we want all sub properties to match as soon as an object matches | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if you could use |
||
|
||
return filter(map, | ||
include, include.getInitialState(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The initial state is always 0 w/ Lucene's automata ... I'll fix Lucene to make this clearer :) |
||
exclude, exclude.getInitialState(), | ||
matchAllAutomaton); | ||
} | ||
|
||
private static void filter(Map<String, Object> map, Map<String, Object> into, String[] includes, String[] excludes, StringBuilder sb) { | ||
if (includes.length == 0 && excludes.length == 0) { | ||
into.putAll(map); | ||
return; | ||
/** Make matches on objects also match dots in field names. | ||
* For instance, if the original simple regex is `foo`, this will translate | ||
* it into `foo` OR `foo.*`. */ | ||
private static Automaton makeMatchDotsInFieldNames(Automaton automaton) { | ||
return Operations.union( | ||
automaton, | ||
Operations.concatenate(Arrays.asList(automaton, Automata.makeChar('.'), Automata.makeAnyString()))); | ||
} | ||
|
||
private static int step(CharacterRunAutomaton automaton, String key, int state) { | ||
for (int i = 0; state != -1 && i < key.length(); ++i) { | ||
state = automaton.step(state, key.charAt(i)); | ||
} | ||
for (Map.Entry<String, Object> entry : map.entrySet()) { | ||
return state; | ||
} | ||
|
||
private static Map<String, Object> filter(Map<String, ?> map, | ||
CharacterRunAutomaton includeAutomaton, int initialIncludeState, | ||
CharacterRunAutomaton excludeAutomaton, int initialExcludeState, | ||
CharacterRunAutomaton matchAllAutomaton) { | ||
Map<String, Object> filtered = new HashMap<>(); | ||
for (Map.Entry<String, ?> entry : map.entrySet()) { | ||
String key = entry.getKey(); | ||
int mark = sb.length(); | ||
if (sb.length() > 0) { | ||
sb.append('.'); | ||
|
||
int includeState = step(includeAutomaton, key, initialIncludeState); | ||
if (includeState == -1) { | ||
continue; | ||
} | ||
sb.append(key); | ||
String path = sb.toString(); | ||
|
||
if (Regex.simpleMatch(excludes, path)) { | ||
sb.setLength(mark); | ||
int excludeState = step(excludeAutomaton, key, initialExcludeState); | ||
if (excludeState != -1 && excludeAutomaton.isAccept(excludeState)) { | ||
continue; | ||
} | ||
|
||
boolean exactIncludeMatch = false; // true if the current position was specifically mentioned | ||
boolean pathIsPrefixOfAnInclude = false; // true if potentially a sub scope can be included | ||
if (includes.length == 0) { | ||
// implied match anything | ||
exactIncludeMatch = true; | ||
} else { | ||
for (String include : includes) { | ||
// check for prefix matches as well to see if we need to zero in, something like: obj1.arr1.* or *.field | ||
// note, this does not work well with middle matches, like obj1.*.obj3 | ||
if (include.charAt(0) == '*') { | ||
if (Regex.simpleMatch(include, path)) { | ||
exactIncludeMatch = true; | ||
break; | ||
} | ||
pathIsPrefixOfAnInclude = true; | ||
continue; | ||
} | ||
if (include.startsWith(path)) { | ||
if (include.length() == path.length()) { | ||
exactIncludeMatch = true; | ||
break; | ||
} else if (include.length() > path.length() && include.charAt(path.length()) == '.') { | ||
// include might may match deeper paths. Dive deeper. | ||
pathIsPrefixOfAnInclude = true; | ||
continue; | ||
} | ||
} | ||
if (Regex.simpleMatch(include, path)) { | ||
exactIncludeMatch = true; | ||
break; | ||
} | ||
Object value = entry.getValue(); | ||
|
||
CharacterRunAutomaton subIncludeAutomaton = includeAutomaton; | ||
int subIncludeState = includeState; | ||
if (includeAutomaton.isAccept(includeState)) { | ||
if (excludeState == -1 || excludeAutomaton.step(excludeState, '.') == -1) { | ||
// the exclude has no chances to match inner properties | ||
filtered.put(key, value); | ||
continue; | ||
} else { | ||
// the object matched, so consider that the include matches every inner property | ||
// we only care about excludes now | ||
subIncludeAutomaton = matchAllAutomaton; | ||
subIncludeState = includeAutomaton.getInitialState(); | ||
} | ||
} | ||
|
||
if (!(pathIsPrefixOfAnInclude || exactIncludeMatch)) { | ||
// skip subkeys, not interesting. | ||
sb.setLength(mark); | ||
continue; | ||
} | ||
if (value instanceof Map) { | ||
|
||
subIncludeState = subIncludeAutomaton.step(subIncludeState, '.'); | ||
if (subIncludeState == -1) { | ||
continue; | ||
} | ||
if (excludeState != -1) { | ||
excludeState = excludeAutomaton.step(excludeState, '.'); | ||
} | ||
|
||
Map<String, Object> valueAsMap = (Map<String, Object>) value; | ||
Map<String, Object> filteredValue = filter(valueAsMap, | ||
subIncludeAutomaton, subIncludeState, excludeAutomaton, excludeState, matchAllAutomaton); | ||
if (includeAutomaton.isAccept(includeState) || filteredValue.isEmpty() == false) { | ||
filtered.put(key, filteredValue); | ||
} | ||
|
||
} else if (value instanceof Iterable) { | ||
|
||
List<Object> filteredValue = filter((Iterable<?>) value, | ||
subIncludeAutomaton, subIncludeState, excludeAutomaton, excludeState, matchAllAutomaton); | ||
if (includeAutomaton.isAccept(includeState) || filteredValue.isEmpty() == false) { | ||
filtered.put(key, filteredValue); | ||
} | ||
|
||
} else { | ||
|
||
if (entry.getValue() instanceof Map) { | ||
Map<String, Object> innerInto = new HashMap<>(); | ||
// if we had an exact match, we want give deeper excludes their chance | ||
filter((Map<String, Object>) entry.getValue(), innerInto, exactIncludeMatch ? Strings.EMPTY_ARRAY : includes, excludes, sb); | ||
if (exactIncludeMatch || !innerInto.isEmpty()) { | ||
into.put(entry.getKey(), innerInto); | ||
// leaf property | ||
if (includeAutomaton.isAccept(includeState) | ||
&& (excludeState == -1 || excludeAutomaton.isAccept(excludeState) == false)) { | ||
filtered.put(key, value); | ||
} | ||
} else if (entry.getValue() instanceof List) { | ||
List<Object> list = (List<Object>) entry.getValue(); | ||
List<Object> innerInto = new ArrayList<>(list.size()); | ||
// if we had an exact match, we want give deeper excludes their chance | ||
filter(list, innerInto, exactIncludeMatch ? Strings.EMPTY_ARRAY : includes, excludes, sb); | ||
into.put(entry.getKey(), innerInto); | ||
} else if (exactIncludeMatch) { | ||
into.put(entry.getKey(), entry.getValue()); | ||
|
||
} | ||
sb.setLength(mark); | ||
} | ||
} | ||
|
||
private static void filter(List<Object> from, List<Object> to, String[] includes, String[] excludes, StringBuilder sb) { | ||
if (includes.length == 0 && excludes.length == 0) { | ||
to.addAll(from); | ||
return; | ||
} | ||
return filtered; | ||
} | ||
|
||
for (Object o : from) { | ||
if (o instanceof Map) { | ||
Map<String, Object> innerInto = new HashMap<>(); | ||
filter((Map<String, Object>) o, innerInto, includes, excludes, sb); | ||
if (!innerInto.isEmpty()) { | ||
to.add(innerInto); | ||
private static List<Object> filter(Iterable<?> iterable, | ||
CharacterRunAutomaton includeAutomaton, int initialIncludeState, | ||
CharacterRunAutomaton excludeAutomaton, int initialExcludeState, | ||
CharacterRunAutomaton matchAllAutomaton) { | ||
List<Object> filtered = new ArrayList<>(); | ||
for (Object value : iterable) { | ||
if (value instanceof Map) { | ||
int includeState = includeAutomaton.step(initialIncludeState, '.'); | ||
int excludeState = initialExcludeState; | ||
if (excludeState != -1) { | ||
excludeState = excludeAutomaton.step(excludeState, '.'); | ||
} | ||
Map<String, Object> filteredValue = filter((Map<String, ?>)value, | ||
includeAutomaton, includeState, excludeAutomaton, excludeState, matchAllAutomaton); | ||
if (filteredValue.isEmpty() == false) { | ||
filtered.add(filteredValue); | ||
} | ||
} else if (o instanceof List) { | ||
List<Object> innerInto = new ArrayList<>(); | ||
filter((List<Object>) o, innerInto, includes, excludes, sb); | ||
if (!innerInto.isEmpty()) { | ||
to.add(innerInto); | ||
} else if (value instanceof Iterable) { | ||
List<Object> filteredValue = filter((Iterable<?>) value, | ||
includeAutomaton, initialIncludeState, excludeAutomaton, initialExcludeState, matchAllAutomaton); | ||
if (filteredValue.isEmpty() == false) { | ||
filtered.add(filteredValue); | ||
} | ||
} else { | ||
to.add(o); | ||
// TODO: we have tests relying on this behavior on arrays even | ||
// if the path does not match, but this looks like a bug? | ||
filtered.add(value); | ||
} | ||
} | ||
return filtered; | ||
} | ||
|
||
public static boolean isObject(Object node) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -551,4 +551,25 @@ public void testNestedMapInList() throws IOException { | |
parser.list()); | ||
} | ||
} | ||
|
||
public void testDotsInFieldNames() { | ||
Map<String, Object> map = new HashMap<>(); | ||
map.put("a.b", 2); | ||
Map<String, Object> sub = new HashMap<>(); | ||
sub.put("c", 3); | ||
map.put("a", sub); | ||
map.put("d", 5); | ||
|
||
// dots in field names in includes | ||
Map<String, Object> filtered = XContentMapValues.filter(map, new String[] {"a"}, new String[0]); | ||
Map<String, Object> expected = new HashMap<>(map); | ||
expected.remove("d"); | ||
assertEquals(expected, filtered); | ||
|
||
// dots in field names in excludes | ||
filtered = XContentMapValues.filter(map, new String[0], new String[] {"a"}); | ||
expected = new HashMap<>(map); | ||
expected.keySet().retainAll(Collections.singleton("d")); | ||
assertEquals(expected, filtered); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a test with strings longer than one char? Sadly, you probably also want on that has a character that is two java |
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this invoked once per returned hit? We should maybe (separate change) compile the automaton/a up front, and re-use that across all the hits?