Skip to content

Commit

Permalink
Add expand.rst documentation and further updates to tests/implement…
Browse files Browse the repository at this point in the history
…ation.

Signed-off-by: currantw <[email protected]>
  • Loading branch information
currantw committed Feb 5, 2025
1 parent 4b111c0 commit c9bd3d1
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

package org.opensearch.sql.data.model;

import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.expression.ReferenceExpression;
import org.opensearch.sql.utils.PathUtils;

Expand Down Expand Up @@ -64,20 +63,12 @@ private static List<ExprValue> expandExprValueAtPath(ExprValue rootExprValue, St
}

ExprValue targetExprValue = PathUtils.getExprValueAtPath(rootExprValue, path);
List<ExprValue> expandedExprValues = expandExprValue(targetExprValue);
if (!targetExprValue.type().equals(ARRAY)) {
return new LinkedList<>(Collections.singletonList(rootExprValue));
}

return expandedExprValues.stream()
return targetExprValue.collectionValue().stream()
.map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v))
.collect(Collectors.toCollection(LinkedList::new));
}

/** Expands the given {@link ExprValue} and returns the result. */
private static List<ExprValue> expandExprValue(ExprValue exprValue) {
if (exprValue.type().equals(ARRAY)) {
List<ExprValue> values = exprValue.collectionValue();
return values.isEmpty() ? List.of(ExprValueUtils.nullValue()) : values;
}

return List.of(exprValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,19 @@ void testArray() {

@Test
void testArrayEmpty() {
ExprValue inputRow =
inputRow =
ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.collectionValue(List.of())));
mockInput(inputRow);

actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY)));
expectedRows = List.of(ExprValueUtils.tupleValue(Map.of("array_empty", nullExprValue)));
expectedRows = List.of();

assertEquals(expectedRows, actualRows);
}

@Test
void testArrayNested() {
ExprValue inputRow =
inputRow =
ExprValueUtils.tupleValue(
Map.of(
"struct",
Expand All @@ -92,18 +92,18 @@ void testArrayNested() {

@Test
void testScalar() {
ExprValue inputValue = ExprValueUtils.tupleValue(Map.of("scalar", stringValue));
mockInput(inputValue);
inputRow = ExprValueUtils.tupleValue(Map.of("scalar", stringValue));
mockInput(inputRow);

actualRows = execute(expand(inputPlan, DSL.ref("scalar", ARRAY)));
expectedRows = List.of(inputValue);
expectedRows = List.of(inputRow);

assertEquals(expectedRows, actualRows);
}

@Test
void testScalarNull() {
ExprValue inputRow = ExprValueUtils.tupleValue(Map.of("scalar_null", nullExprValue));
inputRow = ExprValueUtils.tupleValue(Map.of("scalar_null", nullExprValue));
mockInput(inputRow);

actualRows = execute(expand(inputPlan, DSL.ref("scalar_null", ARRAY)));
Expand Down Expand Up @@ -136,10 +136,10 @@ void testScalarMissing() {

@Test
void testScalarNested() {
ExprValue rowInput =
inputRow =
ExprValueUtils.tupleValue(
Map.of("struct", ExprValueUtils.tupleValue(Map.of("scalar", stringValue))));
mockInput(rowInput);
mockInput(inputRow);

actualRows = execute(expand(inputPlan, DSL.ref("struct.scalar", ARRAY)));
expectedRows =
Expand All @@ -160,11 +160,11 @@ void testPathUnknown() {

@Test
void testAncestorNull() {
ExprValue rowInput = ExprValueUtils.tupleValue(Map.of("struct_null", nullExprValue));
mockInput(rowInput);
inputRow = ExprValueUtils.tupleValue(Map.of("struct_null", nullExprValue));
mockInput(inputRow);

actualRows = execute(expand(inputPlan, DSL.ref("struct_null.unreachable", ARRAY)));
expectedRows = List.of(rowInput);
expectedRows = List.of(inputRow);

assertEquals(expectedRows, actualRows);
}
Expand Down
3 changes: 2 additions & 1 deletion docs/user/dql/metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Example 1: Show All Indices Information
SQL query::

os> SHOW TABLES LIKE '%'
fetched rows / total rows = 11/11
fetched rows / total rows = 12/12
+----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION |
|----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------|
Expand All @@ -44,6 +44,7 @@ SQL query::
| docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | expand | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null |
Expand Down
90 changes: 88 additions & 2 deletions docs/user/ppl/cmd/expand.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,91 @@
=============
flatten
expand
=============

TODO #3016: Add documentation for expand command
.. rubric:: Table of contents

.. contents::
:local:
:depth: 2

Description
============

The ``expand`` command expands a field that contains an array of values to produce a seperate row for each value in the
array. If the field does not contain an array, the row is not modified.

Syntax
============

``expand <field>``

* ``field``: reference to the field to flatten.

Example 1: Expand a field
=========================

PPL query::

os> source=expand | expand team | fields city, team.name
fetched rows / total rows = 7/7
+--------------+-------------------+
| city | team.name |
|--------------+-------------------|
| Seattle | Seattle Seahawks |
| Seattle | Seattle Kraken |
| Vancouver | Vancouver Canucks |
| Vancouver | BC Lions |
| San Antonio | San Antonio Spurs |
| Null Team | null |
| Missing Team | null |
+--------------+-------------------+

Example 2: Expand a nested field
=================================

PPL query::

os> source=expand | where city = 'San Antonio' | expand team.title | fields team.name, team.title
fetched rows / total rows = 5/5
+-------------------+------------+
| team.name | team.title |
|-------------------+------------|
| San Antonio Spurs | 1999 |
| San Antonio Spurs | 2003 |
| San Antonio Spurs | 2005 |
| San Antonio Spurs | 2007 |
| San Antonio Spurs | 2014 |
+-------------------+------------+

Example 3: Expand multiple fields
==================================

PPL query::

os> source=expand | expand team | expand team.title | fields team.name, team.title
fetched rows / total rows = 16/16
+-------------------+------------+
| team.name | team.title |
|-------------------+------------|
| Seattle Seahawks | 2014 |
| Seattle Kraken | null |
| Vancouver Canucks | null |
| BC Lions | 1964 |
| BC Lions | 1985 |
| BC Lions | 1994 |
| BC Lions | 2000 |
| BC Lions | 2006 |
| BC Lions | 2011 |
| San Antonio Spurs | 1999 |
| San Antonio Spurs | 2003 |
| San Antonio Spurs | 2005 |
| San Antonio Spurs | 2007 |
| San Antonio Spurs | 2014 |
| null | null |
| null | null |
+-------------------+------------+

Example 4: Expand and flatten
=============================

TODO #3016: Test once flatten merged.
9 changes: 5 additions & 4 deletions doctest/test_data/expand.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}}
{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}}
{"name": "Null Location", "location": null}
{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}}
{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]}
{"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]}
{"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}}
{"city": "Null Team", "team": null}
{"city": "Missing Team"}
4 changes: 2 additions & 2 deletions doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def set_up_test_indices(test):
load_file("datasources.json", index_name=DATASOURCES)
load_file("weblogs.json", index_name=WEBLOGS)
load_file("json_test.json", index_name=JSON_TEST)
load_file("expand.json", index_name=CITIES)
load_file("expand.json", index_name=EXPAND)


def load_file(filename, index_name):
Expand Down Expand Up @@ -155,7 +155,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, CITIES], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, EXPAND], ignore_unavailable=True)

docsuite = partial(doctest.DocFileSuite,
tearDown=tear_down,
Expand Down
27 changes: 6 additions & 21 deletions doctest/test_mapping/expand.json
Original file line number Diff line number Diff line change
@@ -1,31 +1,16 @@
{
"mappings": {
"properties": {
"name": {
"city": {
"type": "keyword"
},
"location": {
"type": "object",
"team": {
"properties": {
"state": {
"type": "keyword"
"name": {
"type":"keyword"
},
"province": {
"type": "keyword"
},
"country": {
"type": "keyword"
},
"coordinates": {
"type": "object",
"properties": {
"latitude": {
"type": "double"
},
"longitude": {
"type": "double"
}
}
"title": {
"type": "integer"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ public void testBasic() throws IOException {
rows("Vancouver", "Vancouver Canucks"),
rows("Vancouver", "BC Lions"),
rows("San Antonio", "San Antonio Spurs"),
rows("Empty Sports Team", null),
rows("Null Sports Team", null),
rows("Missing Sports Team", null));
rows("Null Team", null),
rows("Missing Team", null));
}

@Test
Expand Down Expand Up @@ -87,7 +86,6 @@ public void testMultiple() throws IOException {
rows("San Antonio Spurs", 2007),
rows("San Antonio Spurs", 2014),
rows(null, null),
rows(null, null),
rows(null, null));
}

Expand Down
8 changes: 3 additions & 5 deletions integ-test/src/test/resources/expand.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
{"index":{"_id":"1"}}
{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": []}]}
{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]}
{"index":{"_id":"2"}}
{"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]}
{"index":{"_id":"3"}}
{"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}}
{"index":{"_id":"4"}}
{"city": "Empty Sports Team", "team": []}
{"city": "Null Team", "team": null}
{"index":{"_id":"5"}}
{"city": "Null Sports Team", "team": null}
{"index":{"_id":"6"}}
{"city": "Missing Sports Team"}
{"city": "Missing Team"}

0 comments on commit c9bd3d1

Please sign in to comment.