Skip to content

Commit

Permalink
regarding #82 the iterator now lets pass id_str
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Grimmer committed May 19, 2016
1 parent e52a1ef commit b0254b6
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ String reduceJson(String originalString) throws JSONException {

originalJson = new JSONObject(originalString);

// extract created_at

// extract id_str
reducedJson.put("id_str", originalJson.get("id_str"));

// extract created_at
reducedJson.put("created_at", originalJson.get("created_at"));

// extract text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ public void testExtracor() throws JSONException {
String original2= "{\"created_at\":\"Wed Mar 30 15:05:51 +0000 2016\",\"id\":715193326698364928,\"id_str\":\"715193326698364928\",\"text\":\"@Harrison_Andy very dark #BatmanvSuperman\",\"truncated\":false,\"in_reply_to_status_id\":713860098372845568,\"in_reply_to_status_id_str\":\"713860098372845568\",\"in_reply_to_user_id\":203852692,\"in_reply_to_user_id_str\":\"203852692\",\"in_reply_to_screen_name\":\"Harrison_Andy\",\"user\":{\"id\":703963,\"id_str\":\"703963\",\"name\":\"Philip Oakley\",\"screen_name\":\"philoakley\",\"location\":\"Stafford, Staffordshire, UK.\",\"url\":\"http://philipoakley.org\",\"description\":\"Xero Technologist, Cloud CRM and Social Media. Always happy to talk and connect. Instagram - http://goo.gl/G0ovHF LinkedIn - http://goo.gl/Zr2492\",\"protected\":false,\"verified\":false,\"followers_count\":4860,\"friends_count\":4647,\"listed_count\":284,\"favourites_count\":3096,\"statuses_count\":9026,\"created_at\":\"Thu Jan 25 21:38:37 +0000 2007\",\"utc_offset\":3600,\"time_zone\":\"London\",\"geo_enabled\":true,\"lang\":\"en\",\"contributors_enabled\":false,\"is_translator\":false,\"profile_background_color\":\"9AE4E8\",\"profile_background_image_url\":\"http://pbs.twimg.com/profile_background_images/255980590/twitter_background.jpg\",\"profile_background_image_url_https\":\"https://pbs.twimg.com/profile_background_images/255980590/twitter_background.jpg\",\"profile_background_tile\":true,\"profile_link_color\":\"0000FF\",\"profile_sidebar_border_color\":\"87BC44\",\"profile_sidebar_fill_color\":\"E0FF92\",\"profile_text_color\":\"000000\",\"profile_use_background_image\":true,\"profile_image_url\":\"http://pbs.twimg.com/profile_images/532930093266378752/8wtQek5n_normal.png\",\"profile_image_url_https\":\"https://pbs.twimg.com/profile_images/532930093266378752/8wtQek5n_normal.png\",\"profile_banner_url\":\"https://pbs.twimg.com/profile_banners/703963/1395523718\",\"default_profile\":false,\"default_profile_image\":false,\"following\":null,\"follow_request_sent\":null,\"notifications\":null},\"geo\":null,\"coordinates\":{\"type\":\"Point\",\"coordinates\":[29.21240342,41.0087062]},\"place\":null,\"contributors\":null,\"is_quote_status\":false,\"retweet_count\":0,\"favorite_count\":0,\"entities\":{\"hashtags\":[{\"text\":\"BatmanvSuperman\",\"indices\":[25,41]}],\"urls\":[],\"user_mentions\":[{\"screen_name\":\"Harrison_Andy\",\"name\":\"Andy Harrison\",\"id\":203852692,\"id_str\":\"203852692\",\"indices\":[0,14]}],\"symbols\":[]},\"favorited\":false,\"retweeted\":false,\"filter_level\":\"low\",\"lang\":\"en\",\"timestamp_ms\":\"1459350351391\"}";

// without location and without coordinates
String withoutBoth = "{\"created_at\":\"Wed Mar 30 15:05:51 +0000 2016\",\"text\":\"@Harrison_Andy very dark #BatmanvSuperman\",\"user\":{\"name\":\"Philip Oakley\",\"screen_name\":\"philoakley\",\"profile_image_url\":\"http:\\/\\/pbs.twimg.com\\/profile_images\\/532930093266378752\\/8wtQek5n_normal.png\"}}";
String withoutBoth = "{\"created_at\":\"Wed Mar 30 15:05:51 +0000 2016\",\"id_str\":\"715193326698364928\",\"text\":\"@Harrison_Andy very dark #BatmanvSuperman\",\"user\":{\"name\":\"Philip Oakley\",\"screen_name\":\"philoakley\",\"profile_image_url\":\"http:\\/\\/pbs.twimg.com\\/profile_images\\/532930093266378752\\/8wtQek5n_normal.png\"}}";

// text, user.name, user.screen_name,user.profile_image_url, coordinates, created_at, place.bounding_box.coordinates
String expected = "{\"created_at\":\"Wed Mar 30 15:05:51 +0000 2016\",\"text\":\"@Harrison_Andy very dark #BatmanvSuperman\",\"user\":{\"name\":\"Philip Oakley\",\"screen_name\":\"philoakley\",\"profile_image_url\":\"http:\\/\\/pbs.twimg.com\\/profile_images\\/532930093266378752\\/8wtQek5n_normal.png\"},\"coordinates\":null,\"place\":{\"bounding_box\":{\"coordinates\":[[[-2.164786,52.546974],[-2.164786,52.637542],[-2.048029,52.637542],[-2.048029,52.546974]]]}}}";
String expected = "{\"id_str\":\"715193326698364928\",\"created_at\":\"Wed Mar 30 15:05:51 +0000 2016\",\"text\":\"@Harrison_Andy very dark #BatmanvSuperman\",\"user\":{\"name\":\"Philip Oakley\",\"screen_name\":\"philoakley\",\"profile_image_url\":\"http:\\/\\/pbs.twimg.com\\/profile_images\\/532930093266378752\\/8wtQek5n_normal.png\"},\"coordinates\":null,\"place\":{\"bounding_box\":{\"coordinates\":[[[-2.164786,52.546974],[-2.164786,52.637542],[-2.048029,52.637542],[-2.048029,52.546974]]]}}}";

ExtractIterator iter = new ExtractIterator();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,15 @@ public void testReduceIterator() throws AccumuloSecurityException, AccumuloExcep
String value = kv.getValue().toString();
JSONObject json = new JSONObject(value);
// needed fields
assertTrue(json.has("id_str"));
assertTrue(json.has("created_at"));
assertTrue(json.has("text"));
assertTrue(json.has("user"));
assertTrue(json.has("coordinates"));
assertTrue(json.has("place"));

// test for some forbidden fields
assertFalse(json.has("id_str"));
assertFalse(json.has("id"));
assertFalse(json.has("entities"));
}
System.out.println("[testReduceIterator end]");
Expand Down

0 comments on commit b0254b6

Please sign in to comment.