microsoft · mzmssg · Dec 21, 2018 · Dec 20, 2018 · Dec 20, 2018 · Dec 20, 2018
diff --git a/deployment/quick-start/services-configuration.yaml.template b/deployment/quick-start/services-configuration.yaml.template
@@ -58,6 +58,12 @@ cluster:
 #      description: Default VC.
 #      capacity: 100
 
+#Uncomment following lines if you want to customize hdfs
+#hadoop-data-node:
+#  # storage path for hdfs, support comma-delimited list of directories, eg. /path/to/folder1,/path/to/folder2 ...
+#  # if left empty, will use cluster.common.data-path/hdfs/data
+#  storage_path:
+
 
 # uncomment following if you want to change customeize yarn-frameworklauncher
 #yarn-frameworklauncher:

diff --git a/examples/cluster-configuration/services-configuration.yaml b/examples/cluster-configuration/services-configuration.yaml
@@ -60,6 +60,13 @@
 #      description: Default VC.
 #      capacity: 100
 
+#Uncomment following lines if you want to customize hdfs
+#hadoop-data-node:
+#  # storage path for hdfs, support comma-delimited list of directories, eg. /path/to/folder1,/path/to/folder2 ...
+#  # if left empty, will use cluster.common.data-path/hdfs/data
+#  storage_path:
+
+
 
 #uncomment following if you want to change customeize yarn-frameworklauncher
 #yarn-frameworklauncher:

diff --git a/src/hadoop-data-node/config/hadoop-data-node.md b/src/hadoop-data-node/config/hadoop-data-node.md
@@ -0,0 +1,44 @@
+## Hadoop data node section parser
+
+- [Default Configuration](#D_Config)
+- [How to Configure](#HT_Config)
+- [Generated Configuraiton](#G_Config)
+- [Data Table](#T_config)
+
+#### Default configuration <a name="D_Config"></a>
+
+[hadoop-data-node default configuration](hadoop-data-node.yaml)
+
+#### How to configure cluster section in service-configuraiton.yaml <a name="HT_Config"></a>
+
+All configurations in this section is optional. If you want to customized these value, you can configure it in service-configuration.yaml.
+
+- `storage_path` The hdfs storage folders, support comma-delimited list of directories. 
+if isn't specified, will use `cluster.common.data-path/hdfs/data`
+
+
+
+#### Generated Configuration <a name="G_Config"></a>
+
+After parsing, object model will be a comma-delimited string, every substring is a directory:
+```yaml
+storage_path: /path/to/folder1,/path/to/folder2,...
+```
+
+
+#### Table <a name="T_Config"></a>
+
+<table>
+<tr>
+    <td>Data in Configuration File</td>
+    <td>Data in Cluster Object Model</td>
+    <td>Data in Jinja2 Template</td>
+    <td>Data type</td>
+</tr>
+<tr>
+    <td>hadoop-data-node.virtualClusters</td>
+    <td>com["hadoop-data-node"]["storage_path"]</td>
+    <td>cluster_cfg["hadoop-data-node"]["storage_path"]</td>
+    <td>Str</td>
+</tr>
+</table>
diff --git a/src/hadoop-data-node/config/hadoop-data-node.yaml b/src/hadoop-data-node/config/hadoop-data-node.yaml
@@ -15,3 +15,4 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
+storage_path:
diff --git a/src/hadoop-data-node/config/hadoop_data_node.py b/src/hadoop-data-node/config/hadoop_data_node.py
@@ -22,17 +22,29 @@
 
 class HadoopDataNode:
 
-    def __init__(self, cluster_configuration, service_configuration, default_service_configuraiton):
+    def __init__(self, cluster_configuration, service_configuration, default_service_configuration):
         self.logger = logging.getLogger(__name__)
 
         self.cluster_configuration = cluster_configuration
+        self.service_configuration = self.merge_service_configuration(service_configuration,
+                                                                      default_service_configuration)
+
+    def merge_service_configuration(self, overwrite_srv_cfg, default_srv_cfg):
+        if overwrite_srv_cfg is None:
+            return default_srv_cfg
+        srv_cfg = default_srv_cfg.copy()
+        for k in overwrite_srv_cfg:
+            srv_cfg[k] = overwrite_srv_cfg[k]
+        return srv_cfg
 
     def validation_pre(self):
         return True, None
 
     def run(self):
         com = {}
-
+        # com["storage_path"] = self.service_configuration.get("storage_path") or \
+        #                       "{}/hdfs/data".format(self.cluster_configuration["cluster"]["common"]["data-path"])
+        com["storage_path"] = self.service_configuration.get("storage_path")
         return com
 
     def validation_post(self, cluster_object_model):

diff --git a/src/hadoop-data-node/deploy/hadoop-data-node-configuration/datanode-generate-script.sh b/src/hadoop-data-node/deploy/hadoop-data-node-configuration/datanode-generate-script.sh
@@ -26,4 +26,5 @@ cp  /hadoop-configuration/mapred-site.xml $HADOOP_CONF_DIR/mapred-site.xml
 HOST_NAME=`hostname`
 /usr/local/host-configure.py -c /host-configuration/host-configuration.yaml -f $HADOOP_CONF_DIR/hdfs-site.xml -n $HOST_NAME
 
-sed -i "s/{HDFS_ADDRESS}/${HDFS_ADDRESS}/g" $HADOOP_CONF_DIR/core-site.xml 
+sed -i "s/{HDFS_ADDRESS}/${HDFS_ADDRESS}/g" $HADOOP_CONF_DIR/core-site.xml
+sed -i "s#{HADOOP_DATANODE_DATA_DIR}#${HADOOP_DATANODE_DATA_DIR}#g" $HADOOP_CONF_DIR/hdfs-site.xml
diff --git a/src/hadoop-data-node/deploy/hadoop-data-node-configuration/hdfs-site.xml b/src/hadoop-data-node/deploy/hadoop-data-node-configuration/hdfs-site.xml
@@ -39,7 +39,7 @@
 
 <property>
   <name>dfs.datanode.data.dir</name>
-  <value>file:///var/lib/hdfs/data</value>
+  <value>{HADOOP_DATANODE_DATA_DIR}</value>
   <description>
          This property specifies the URIs of the directories where the DataNode stores
          blocks.

diff --git a/src/hadoop-data-node/deploy/hadoop-data-node.yaml.template b/src/hadoop-data-node/deploy/hadoop-data-node.yaml.template
@@ -15,6 +15,7 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
+{% set folders = cluster_cfg[ "hadoop-data-node" ][ "storage_path" ] or cluster_cfg["cluster"]["common"][ "data-path" ] + "/hdfs/data" %}
 apiVersion: apps/v1
 kind: DaemonSet
 metadata:
@@ -35,8 +36,12 @@ spec:
         image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}hadoop-run:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }}
         imagePullPolicy: Always
         volumeMounts:
-        - mountPath: /var/lib/hdfs/data
-          name: hadoop-data-storage
+        {% set mount_points = [] %}
+        {% for folder in folders.split(",") %}
+        - mountPath: /var/lib/hdfs/data-{{ loop.index }}
+          name: hadoop-data-storage-{{ loop.index }}
+          {% set ignored = mount_points.append("file:///var/lib/hdfs/data-" + loop.index|string) %}
+        {% endfor %}
         - mountPath: /hadoop-configuration
           name: hadoop-data-node-config-volume
         - mountPath: /host-configuration
@@ -64,15 +69,19 @@ spec:
           value: datanode-generate-script.sh
         - name: START_SERVICE
           value: datanode-start-service.sh
+        - name: HADOOP_DATANODE_DATA_DIR
+          value: {{ mount_points|join(",") }}
       imagePullSecrets:
       - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }}
       volumes:
       - name: hadoop-tmp-storage
         hostPath:
           path: {{ cluster_cfg["cluster"]["common"][ "data-path" ] }}/hadooptmp/datanode
-      - name: hadoop-data-storage
+      {% for folder in folders.split(",") %}
+      - name: hadoop-data-storage-{{ loop.index }}
         hostPath:
-          path: {{ cluster_cfg["cluster"]["common"][ "data-path" ] }}/hdfs/data
+          path: {{ folder }}
+      {% endfor %}
       - name: hadoop-data-node-config-volume
         configMap:
           name:  hadoop-data-node-configuration
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,3 +15,4 @@
		# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

		storage_path: