Skip to content

Commit

Permalink
added unit test, added feature
Browse files Browse the repository at this point in the history
rcongiu committed Feb 20, 2014

Verified

This commit was signed with the committer’s verified signature.
snyk-bot Snyk bot
1 parent bade2b8 commit 3faf637
Showing 4 changed files with 71 additions and 2 deletions.
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -75,6 +75,31 @@ select languages[0] from json_nested_test; -- result: German
select religions['catholic'][0] from json_nested_test; -- result: 10
```

### SUPPORT FOR ARRAYS
You could have JSON arrays, in that case the SerDe would still work,
and it will expect data in the JSON arrays ordered just like the hive
columns, like you'd see in the regular text/csv serdes.
For instance, if you do
```sql
CREATE TABLE people ( name string, age int)
```
your data should look like
```javascript
["John", 26 ]
["Mary", 23 ]
```
Arrays can still be nested, so you could have
```sql
CREATE TABLE complex_array (
name string, address struct<street:string,city:string>) ...
-- data:
["John", { street:"10 green street", city:"Paris" } .. ]
```


```
### MALFORMED DATA
The default behavior on malformed data is throwing an exception.
11 changes: 9 additions & 2 deletions src/main/java/org/openx/data/jsonserde/JsonSerDe.java
Original file line number Diff line number Diff line change
@@ -157,11 +157,15 @@ public Object deserialize(Writable w) throws SerDeException {
deserializedDataSize = rowText.getBytes().length;

// Try parsing row into JSON object
JSONObject jObj = null;
Object jObj = null;


try {
jObj = new JSONObject(rowText.toString()) {
String txt = rowText.toString().trim();

if(txt.startsWith("{")) {

jObj = new JSONObject(txt) {

/**
* In Hive column names are case insensitive, so lower-case all
@@ -203,6 +207,9 @@ public JSONObject put(String key, Object value) throws JSONException {
return super.put(key.toLowerCase(), value);
}
};
} else if (txt.startsWith("[")){
jObj = new JSONArray(txt);
}
} catch (JSONException e) {
// If row is not a JSON object, make the whole row NULL
onMalformedJson("Row is not a valid JSON Object - JSONException: "
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.openx.data.jsonserde.json.JSONArray;
import org.openx.data.jsonserde.json.JSONException;
import org.openx.data.jsonserde.json.JSONObject;

@@ -59,6 +60,8 @@ public Object getStructFieldData(Object data, StructField fieldRef) {
} if (data instanceof List) {
// somehow we have the object parsed already
return getStructFieldDataFromList((List) data, fieldRef );
} else if (data instanceof JSONArray) {
return getStructFieldDataFromList(((JSONArray) data).getAsArrayList(), fieldRef );
} else {
throw new Error("Data is not JSONObject but " + data.getClass().getCanonicalName() +
" with value " + data.toString()) ;
@@ -80,6 +83,8 @@ public Object getStructFieldDataFromList(List data, StructField fieldRef ) {
}
}



public Object getStructFieldDataFromJsonObject(JSONObject data, StructField fieldRef ) {
if (data == null) {
return null;
32 changes: 32 additions & 0 deletions src/test/java/org/openx/data/jsonserde/JsonSerDeTest.java
Original file line number Diff line number Diff line change
@@ -100,6 +100,38 @@ public void initialize2(JsonSerDe instance) throws Exception {

/**
* Test of deserialize method, of class JsonSerDe.
* expects "one,two,three,four"
* "boolean,float,array&lt;string&gt;,string");
*/
@Test
public void testDeserializeArray() throws Exception {
JsonSerDe instance = new JsonSerDe();
initialize(instance);

System.out.println("deserialize");
Writable w = new Text("[true,19.5, [\"red\",\"yellow\",\"orange\"],\"poop\"]");

Object result = instance.deserialize(w);
assertTrue(result instanceof JSONArray);

StructObjectInspector soi = (StructObjectInspector)instance.getObjectInspector();

assertEquals(Boolean.TRUE, soi.getStructFieldData(result, soi.getStructFieldRef("one")));

JavaStringFloatObjectInspector jsfOi = (JavaStringFloatObjectInspector) soi.getStructFieldRef("two").getFieldObjectInspector();
assertTrue(19.5 == jsfOi.get(soi.getStructFieldData(result, soi.getStructFieldRef("two"))));

Object ar = soi.getStructFieldData(result, soi.getStructFieldRef("three"));
assertTrue(ar instanceof JSONArray);

JSONArray jar = (JSONArray)ar;
assertTrue( jar.get(0) instanceof String );
assertEquals("red", jar.get(0));

}

/**
* Test of deserialize method, but passing an array.
*/
@Test
public void testDeserialize() throws Exception {

0 comments on commit 3faf637

Please sign in to comment.