apache · fjy · Dec 13, 2016 · Nov 15, 2016 · Dec 11, 2016 · Dec 11, 2016
diff --git a/docs/content/development/extensions-contrib/thrift.md b/docs/content/development/extensions-contrib/thrift.md
@@ -0,0 +1,104 @@
+---
+layout: doc_page
+---
+
+# Thrift
+
+To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-thrift-extensions`.
+
+This extension enables Druid to ingest thrift compact data online (`ByteBuffer`) and offline (SequenceFile of type `<Writable, BytesWritable>` or LzoThriftBlock File).
+
+You may want to use another version of thrift, change the dependency in pom and compile yourself.
+
+## Thrift Parser
+
+
+| Field       | Type        | Description                              | Required |
+| ----------- | ----------- | ---------------------------------------- | -------- |
+| type        | String      | This should say `thrift`                 | yes      |
+| parseSpec   | JSON Object | Specifies the timestamp and dimensions of the data. Should be a Json parseSpec. | yes      |
+| thriftJar   | String      | path of thrift jar, if not provided, it will try to find the thrift class in classpath. Thrift jar in batch ingestion should be uploaded to HDFS first and configure `jobProperties` with `"tmpjars":"/path/to/your/thrift.jar"` | no       |
+| thriftClass | String      | classname of thrift                      | yes      |
+
+- Realtime Ingestion (tranquility example)
+
+```json
+{
+  "dataSources": [{
+    "spec": {
+      "dataSchema": {
+        "dataSource": "book",
+        "granularitySpec": {          },
+        "parser": {
+          "type": "thrift",
+          "thriftClass": "io.druid.data.input.thrift.Book",
+          "protocol": "compact",
+          "parseSpec": {
+            "format": "json",
+            ...
+          }
+        },
+        "metricsSpec": [...]
+      },
+      "tuningConfig": {...}
+    },
+    "properties": {...}
+  }],
+  "properties": {...}
+}
+```
+
+To use it with tranquility,
+
+```bash
+bin/tranquility kafka \
+  -configFile $jsonConfig \
+  -Ddruid.extensions.directory=/path/to/extensions \
+  -Ddruid.extensions.loadList='["druid-thrift-extensions"]'
+```
+
+Hadoop-client is also needed, you may copy all the hadoop-client dependency jars into directory `druid-thrift-extensions` to make is simple.
+
+
+- Batch Ingestion - `inputFormat` and `tmpjars` should be set.
+
+This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of inputSpec in ioConfig could be one of `"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"` and `com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat`. Be carefull, when `LzoThriftBlockInputFormat` is used, thrift class must be provided twice.
+
+```json
+{
+  "type": "index_hadoop",
+  "spec": {
+    "dataSchema": {
+      "dataSource": "book",
+      "parser": {
+        "type": "thrift",
+        "jarPath": "book.jar",
+        "thriftClass": "io.druid.data.input.thrift.Book",
+        "protocol": "compact",
+        "parseSpec": {
+          "format": "json",
+          ...
+        }
+      },
+      "metricsSpec": [],
+      "granularitySpec": {}
+    },
+    "ioConfig": {
+      "type": "hadoop",
+      "inputSpec": {
+        "type": "static",
+        "inputFormat": "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+        // "inputFormat": "com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat",
+        "paths": "/user/to/some/book.seq"
+      }
+    },
+    "tuningConfig": {
+      "type": "hadoop",
+      "jobProperties": {
+        "tmpjars":"/user/h_user_profile/du00/druid/test/book.jar",
+        // "elephantbird.class.for.MultiInputFormat" : "${YOUR_THRIFT_CLASS_NAME}"
+      }
+    }
+  }
+}
+```
diff --git a/docs/content/development/extensions.md b/docs/content/development/extensions.md
@@ -63,6 +63,7 @@ All of these community extensions can be downloaded using *pull-deps* with the c
 |sqlserver-metadata-storage|Microsoft SqlServer deep storage.|[link](../development/extensions-contrib/sqlserver.html)|
 |graphite-emitter|Graphite metrics emitter|[link](../development/extensions-contrib/graphite.html)|
 |statsd-emitter|StatsD metrics emitter|[link](../development/extensions-contrib/statsd.html)|
+|druid-thrift-extensions|Support thrift ingestion |[link](../development/extensions-contrib/thrift.html)|
 
 ## Promoting Community Extension to Core Extension
 

diff --git a/extensions-contrib/thrift-extensions/example/books.json b/extensions-contrib/thrift-extensions/example/books.json
@@ -0,0 +1,65 @@
+{
+  "type" : "index_hadoop",
+  "spec" : {
+    "dataSchema" : {
+      "dataSource" : "test",
+      "parser" : {
+        "type" : "thrift",
+        "jarPath" : "example/book.jar",
+        "thriftClass": "io.druid.data.input.thrift.Book",
+        "protocol" : "compact",
+        "parseSpec" : {
+          "format" : "json",
+          "timestampSpec" : {
+            "column" : "date",
+            "format" : "auto"
+          },
+          "flattenSpec" : {
+            "useFieldDiscovery" : true,
+            "fields" : [ {
+              "type" : "path",
+              "name" : "lastName",
+              "expr" : "$.author.lastName"
+            }, "title" ]
+          },
+          "dimensionsSpec" : {
+            "dimensions" : [ "title", "lastName" ]
+          }
+        }
+      },
+      "metricsSpec" : [ {
+        "type" : "count",
+        "name" : "count"
+      }, {
+        "type" : "doubleSum",
+        "name" : "cost",
+        "fieldName" : "price"
+      } ],
+      "granularitySpec" : {
+        "type" : "uniform",
+        "segmentGranularity" : "DAY",
+        "queryGranularity" : "DAY",
+        "intervals" : [ "2015-09-01/2015-10-01" ]
+      }
+    },
+    "ioConfig" : {
+      "type" : "hadoop",
+      "inputSpec" : {
+        "type" : "static",
+        "inputFormat" : "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+        "paths" : "example/book.seq"
+      }
+    },
+    "tuningConfig" : {
+      "type" : "hadoop",
+      "partitionsSpec" : {
+        "type" : "hashed",
+        "targetPartitionSize" : 5000000
+      },
+      "jobProperties" : {
+        "tmpjars": "/user/xxx/druid/test/book.jar"
+      }
+    }
+  },
+  "dataSource" : "test"
+}
diff --git a/extensions-contrib/thrift-extensions/pom.xml b/extensions-contrib/thrift-extensions/pom.xml
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <groupId>io.druid.extensions.contrib</groupId>
+  <artifactId>druid-thrift-extensions</artifactId>
+  <name>druid-thrift-extensions</name>
+  <description>druid-thrift-extensions</description>
+
+  <parent>
+    <artifactId>druid</artifactId>
+    <groupId>io.druid</groupId>
+    <version>0.9.3-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <properties>
+    <thrift.version>0.9.3</thrift.version>
+    <elephantbird.version>4.8</elephantbird.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <version>${thrift.version}</version>
+      <exclusions>
+        <exclusion>
+          <artifactId>commons-logging</artifactId>
+          <groupId>commons-logging</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>httpclient</artifactId>
+          <groupId>org.apache.httpcomponents</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>httpcore</artifactId>
+          <groupId>org.apache.httpcomponents</groupId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>io.druid</groupId>
+      <artifactId>druid-indexing-hadoop</artifactId>
+      <version>${project.parent.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter.elephantbird</groupId>
+      <artifactId>elephant-bird-core</artifactId>
+      <version>${elephantbird.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter.elephantbird</groupId>
+      <artifactId>elephant-bird-hadoop-compat</artifactId>
+      <version>${elephantbird.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>scrooge-core_2.11</artifactId>
+      <version>4.10.0</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>com.twitter</groupId>
+        <artifactId>scrooge-maven-plugin</artifactId>
+        <version>4.11.0</version>
+        <configuration>
+          <language>java</language>
+        </configuration>
+        <executions>
+          <execution>
+            <id>thrift-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>