diff --git a/src/dshuttle-csi/deploy/delete.sh b/src/dshuttle-csi/deploy/delete.sh new file mode 100644 index 0000000000..1b952d11cc --- /dev/null +++ b/src/dshuttle-csi/deploy/delete.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +echo "Call stop script to stop all service first" +/bin/bash stop.sh || exit $? + + +popd > /dev/null \ No newline at end of file diff --git a/src/dshuttle-csi/deploy/dshuttle-csi-daemon.yaml.template b/src/dshuttle-csi/deploy/dshuttle-csi-daemon.yaml.template new file mode 100644 index 0000000000..65dccdd1ea --- /dev/null +++ b/src/dshuttle-csi/deploy/dshuttle-csi-daemon.yaml.template @@ -0,0 +1,143 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: dshuttle-csi-daemon +spec: + selector: + matchLabels: + app: dshuttle-csi-daemon + template: + metadata: + labels: + app: dshuttle-csi-daemon + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + initContainers: + # This change aimed to avoid OS build file index for alluxio-fuse-fs, which will consume much disk space + - name: change-updatedb-conf + image: dshuttle.azurecr.io/dshuttle/dshuttle-csi:25037dc + imagePullPolicy: Always + securityContext: + runAsUser: 0 + command: ["/bin/bash", "-c"] + args: + - FILE=/host-config/updatedb.conf && grep -q 'PRUNEFS=".*fuse.alluxio-fuse.*"' "$FILE" || echo $(sed '/PRUNEFS=/s/"$/ fuse.alluxio-fuse"/' "$FILE") > $FILE + volumeMounts: + - name: etc + mountPath: /host-config/updatedb.conf + subPath: updatedb.conf + containers: + - name: node-driver-registrar + image: quay.io/k8scsi/csi-node-driver-registrar:v1.0.2 + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "rm -rf /registration/dshuttle-reg.sock /var/lib/kubelet/plugins/csi-dshuttle-plugin"] + args: + - --v=5 + - --csi-address=/plugin/csi.sock + - --kubelet-registration-path=/var/lib/kubelet/plugins/csi-dshuttle-plugin/csi.sock + env: + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: plugin-dir + mountPath: /plugin + - name: registration-dir + mountPath: /registration + - name: dshuttle-csi-daemon + securityContext: + privileged: true + runAsUser: 0 + image: dshuttle.azurecr.io/dshuttle/dshuttle-csi:25037dc + command: ["/usr/local/bin/dshuttle-csi"] + args : + - "--v=4" + - "--nodeid=$(NODE_ID)" + - "--endpoint=$(CSI_ENDPOINT)" + env: + - name: ALLUXIO_CLIENT_HOSTNAME + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: ALLUXIO_CLIENT_JAVA_OPTS + value: " -Dalluxio.user.hostname=$(ALLUXIO_CLIENT_HOSTNAME) -Dalluxio.worker.hostname=$(ALLUXIO_CLIENT_HOSTNAME) " + - name: NODE_ID + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: CSI_ENDPOINT + value: unix://plugin/csi.sock + envFrom: + - configMapRef: + name: dshuttle-config + {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %} + resources: + limits: + memory: "{{ cluster_cfg['dshuttle']['csi_daemon_limit_mem'] }}" + requests: + memory: "{{ cluster_cfg['dshuttle']['csi_daemon_request_mem'] }}" + {%- endif %} + imagePullPolicy: "Always" + volumeMounts: + - name: plugin-dir + mountPath: /plugin + - name: pods-mount-dir + mountPath: /var/lib/kubelet/pods + mountPropagation: "Bidirectional" + - name: dshuttle-domain + mountPath: /opt/domain + - name: fuse-logs + mountPath: /opt/alluxio/logs + - name: dshuttle-log-config + mountPath: /opt/alluxio/conf/log4j.properties + subPath: log4j.properties + volumes: + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/csi-dshuttle-plugin + type: DirectoryOrCreate + - name: pods-mount-dir + hostPath: + path: /var/lib/kubelet/pods + type: Directory + - hostPath: + path: /var/lib/kubelet/plugins_registry + type: Directory + name: registration-dir + - name: dshuttle-domain + hostPath: + path: /tmp/alluxio-domain + type: "Directory" + - name: fuse-logs + hostPath: + path: /var/log/dshuttle + type: DirectoryOrCreate + - name: dshuttle-log-config + configMap: + name: dshuttle-log-config + - name: etc + hostPath: + path: /etc + imagePullSecrets: + - name: dshuttle-regcred diff --git a/src/dshuttle-csi/deploy/dshuttle-csi-driver.yaml b/src/dshuttle-csi/deploy/dshuttle-csi-driver.yaml new file mode 100644 index 0000000000..95eaeecb34 --- /dev/null +++ b/src/dshuttle-csi/deploy/dshuttle-csi-driver.yaml @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: storage.k8s.io/v1beta1 +kind: CSIDriver +metadata: + name: dshuttle +spec: + attachRequired: false + podInfoOnMount: true diff --git a/src/dshuttle-csi/deploy/service.yaml b/src/dshuttle-csi/deploy/service.yaml new file mode 100644 index 0000000000..130ed428a5 --- /dev/null +++ b/src/dshuttle-csi/deploy/service.yaml @@ -0,0 +1,36 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +cluster-type: + - k8s + +prerequisite: + - cluster-configuration + - dshuttle-master + - dshuttle-worker + +template-list: + - dshuttle-csi-daemon.yaml + - start.sh + - stop.sh + +start-script: start.sh +stop-script: stop.sh +delete-script: delete.sh + +deploy-rules: + - in: pai-worker diff --git a/src/dshuttle-csi/deploy/start.sh.template b/src/dshuttle-csi/deploy/start.sh.template new file mode 100644 index 0000000000..6cf7779836 --- /dev/null +++ b/src/dshuttle-csi/deploy/start.sh.template @@ -0,0 +1,31 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +{%- if cluster_cfg['cluster']['common']['dshuttle'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +kubectl apply --overwrite=true -f dshuttle-csi-driver.yaml || exit $? +kubectl apply --overwrite=true -f dshuttle-csi-daemon.yaml || exit $? + +sleep 10 +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v dshuttle-csi-daemon || exit $? + +popd > /dev/null +{%- endif %} diff --git a/src/dshuttle-csi/deploy/stop.sh.template b/src/dshuttle-csi/deploy/stop.sh.template new file mode 100644 index 0000000000..fae7289d9c --- /dev/null +++ b/src/dshuttle-csi/deploy/stop.sh.template @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +{%- if cluster_cfg['cluster']['common']['dshuttle'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +if kubectl get daemonset | grep -q "dshuttle-csi-daemon"; then + kubectl delete daemonset dshuttle-csi-daemon || exit $? +fi + +if kubectl get csidriver | grep -q "dshuttle"; then + kubectl delete csidriver dshuttle || exit $? +fi + +popd > /dev/null +{%- endif %} \ No newline at end of file diff --git a/src/dshuttle-master/deploy/delete.sh b/src/dshuttle-master/deploy/delete.sh new file mode 100644 index 0000000000..45ce086303 --- /dev/null +++ b/src/dshuttle-master/deploy/delete.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +echo "Call stop to stop all dshuttle-master pod first" +/bin/bash stop.sh || exit $? + +echo "Create dshuttle-master-delete configmap for deleting data on the host" +kubectl create configmap dshuttle-master-delete --from-file=dshuttle-master-delete/ --dry-run -o yaml | kubectl apply --overwrite=true -f - || exit $? + +echo "Create cleaner daemon" +kubectl apply --overwrite=true -f delete.yaml || exit $? +sleep 5 + +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v delete-batch-job-dshuttle-master || exit $? + +echo "Dshuttle master clean job is done" +echo "Delete dshuttle master cleaner daemon and configmap" +if kubectl get daemonset | grep -q "delete-batch-job-dshuttle-master"; then + kubectl delete ds delete-batch-job-dshuttle-master || exit $? +fi + +if kubectl get configmap | grep -q "dshuttle-master-delete"; then + kubectl delete configmap dshuttle-master-delete || exit $? +fi +sleep 5 + +popd > /dev/null \ No newline at end of file diff --git a/src/dshuttle-master/deploy/delete.yaml.template b/src/dshuttle-master/deploy/delete.yaml.template new file mode 100644 index 0000000000..4e94814c11 --- /dev/null +++ b/src/dshuttle-master/deploy/delete.yaml.template @@ -0,0 +1,81 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: delete-batch-job-dshuttle-master +spec: + selector: + matchLabels: + app: delete-batch-job-dshuttle-master + template: + metadata: + labels: + app: delete-batch-job-dshuttle-master + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: pai-master + operator: In + values: + - 'true' + hostNetwork: true + hostPID: false + containers: + - name: cleaning-one-shot + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}cleaning-image:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + imagePullPolicy: Always + volumeMounts: + - mountPath: /dshuttle-master-delete + name: dshuttle-master-delete-config + - mountPath: /mnt/log + name: log-path + - mountPath: /mnt/ufs + name: ufs + - mountPath: /mnt/journal + name: journal + env: + - name: DELETE_CONFIG + value: dshuttle-master-delete + - name: WORKER_CONFIG + value: delete-data.sh + readinessProbe: + exec: + command: + - cat + - /jobstatus/jobok + initialDelaySeconds: 5 + periodSeconds: 3 + imagePullSecrets: + - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} + volumes: + - name: dshuttle-master-delete-config + configMap: + name: dshuttle-master-delete + - name: log-path + hostPath: + path: /var/log/dshuttle + - name: ufs + hostPath: + path: /mnt/dshuttle/ufs + - name: journal + hostPath: + path: /mnt/dshuttle/journal diff --git a/src/dshuttle-master/deploy/dshuttle-config.yaml.template b/src/dshuttle-master/deploy/dshuttle-config.yaml.template new file mode 100644 index 0000000000..660052eb25 --- /dev/null +++ b/src/dshuttle-master/deploy/dshuttle-config.yaml.template @@ -0,0 +1,94 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +kind: ConfigMap +apiVersion: v1 +metadata: + name: dshuttle-config +data: + ALLUXIO_JAVA_OPTS: >- + -Dalluxio.master.hostname=dshuttle-service.default + -Dalluxio.master.ufs.path.cache.capacity=10000000 + -Dalluxio.job.master.worker.heartbeat.interval=1sec + -Dalluxio.job.master.finished.job.retention.time=1sec + -Dalluxio.job.worker.threadpool.size=200 + -Dalluxio.master.journal.type=UFS + -Dalluxio.master.journal.folder=/journal + -Dalluxio.master.mount.table.root.ufs=/opt/alluxio/underFSStorage + -Dalluxio.master.rpc.executor.parallelism=200 + -Dalluxio.security.stale.channel.purge.interval=365d + -Dalluxio.user.metrics.collection.enabled=true + -Dalluxio.master.rpc.port={{ cluster_cfg['dshuttle']['master_rpc_port'] }} + -Dalluxio.master.web.port={{ cluster_cfg['dshuttle']['master_web_port'] }} + -Dalluxio.job.master.rpc.port={{ cluster_cfg['dshuttle']['job_master_rpc_port'] }} + -Dalluxio.job.master.web.port={{ cluster_cfg['dshuttle']['job_master_web_port'] }} + -Dalluxio.user.block.size.bytes.default=32MB + -Dalluxio.user.ufs.block.read.location.policy=alluxio.client.block.policy.MostAvailableFirstPolicy + -Dalluxio.user.file.passive.cache.enabled=false + -Dalluxio.security.authentication.type=NOSASL + -Dalluxio.security.authorization.permission.enabled=false + -Dalluxio.user.block.write.location.policy.class=alluxio.client.block.policy.MostAvailableFirstPolicy + -Dalluxio.worker.rpc.port={{ cluster_cfg['dshuttle']['worker_rpc_port'] }} + -Dalluxio.worker.web.port={{ cluster_cfg['dshuttle']['worker_web_port'] }} + -Dalluxio.worker.network.reader.buffer.size=1MB + -Dalluxio.user.file.readtype.default=CACHE + -Dalluxio.user.streaming.reader.chunk.size.bytes=256KB + -Dalluxio.user.streaming.reader.buffer.size.messages=2 + -Dalluxio.user.network.streaming.flowcontrol.window=1MB + -Dalluxio.user.block.worker.client.pool.min=50 + -Dalluxio.user.update.file.accesstime.disabled=true + -Dalluxio.user.metadata.cache.enabled=true + -Dalluxio.user.metadata.cache.max.size=2000000 + -Dalluxio.user.metadata.cache.expiration.time=24h + -Dalluxio.user.file.master.client.pool.size.max=50 + -Dalluxio.fuse.jnifuse.enabled=true + ALLUXIO_MASTER_JAVA_OPTS: >- + -Dalluxio.master.web.bind.host=0.0.0.0 + -XX:ActiveProcessorCount={{ cluster_cfg['dshuttle']['master_active_processor_count'] }} + -Xms1G -Xmx{{ cluster_cfg['dshuttle']['master_max_heap_size'] }} + ALLUXIO_JOB_MASTER_JAVA_OPTS: >- + -Dalluxio.job.master.bind.host=0.0.0.0 + -XX:ActiveProcessorCount={{ cluster_cfg['dshuttle']['job_master_active_processor_count'] }} + -Xms1G -Xmx{{ cluster_cfg['dshuttle']['job_master_max_heap_size'] }} + ALLUXIO_WORKER_JAVA_OPTS: >- + -Dalluxio.worker.bind.host=0.0.0.0 + -Dalluxio.worker.data.server.domain.socket.address=/opt/domain + -Dalluxio.worker.data.server.domain.socket.as.uuid=true + -Dalluxio.worker.memory.size={{ cluster_cfg['dshuttle']['worker_request_mem'] }} + -Dalluxio.worker.hostname=${ALLUXIO_WORKER_HOSTNAME} + -Dalluxio.worker.tieredstore.levels={{ cluster_cfg['dshuttle']['tieredstores']|length|string }} + {% for tieredstore in cluster_cfg['dshuttle']['tieredstores'] -%} + -Dalluxio.worker.tieredstore.level{{ tieredstore['level'] }}.dirs.mediumtype={{ tieredstore['mediumtype'] }} + -Dalluxio.worker.tieredstore.level{{ tieredstore['level'] }}.alias={{ tieredstore['alias'] }} + -Dalluxio.worker.tieredstore.level{{ tieredstore['level'] }}.dirs.path={{ tieredstore['path'] }} + -Dalluxio.worker.tieredstore.level{{ tieredstore['level'] }}.dirs.quota={{ tieredstore['quota'] }} + -Dalluxio.worker.tieredstore.level{{ tieredstore['level'] }}.watermark.high.ratio={{ tieredstore['watermark_high_ratio'] }} + -Dalluxio.worker.tieredstore.level{{ tieredstore['level'] }}.watermark.low.ratio={{ tieredstore['watermark_low_ratio'] }} + {% endfor -%} + -XX:ActiveProcessorCount={{ cluster_cfg['dshuttle']['worker_active_processor_count'] }} + -Xms1G -Xmx{{ cluster_cfg['dshuttle']['worker_max_heap_size'] }} + ALLUXIO_JOB_WORKER_JAVA_OPTS: >- + -Dalluxio.job.worker.rpc.port={{ cluster_cfg['dshuttle']['job_worker_rpc_port'] }} + -Dalluxio.job.worker.data.port={{ cluster_cfg['dshuttle']['job_worker_data_port'] }} + -Dalluxio.job.worker.web.port={{ cluster_cfg['dshuttle']['job_worker_web_port'] }} + -Dalluxio.worker.hostname=${ALLUXIO_WORKER_HOSTNAME} + -XX:ActiveProcessorCount={{ cluster_cfg['dshuttle']['job_worker_active_processor_count'] }} + -Xms1G -Xmx{{ cluster_cfg['dshuttle']['job_worker_max_heap_size'] }} + ALLUXIO_FUSE_JAVA_OPTS: >- + -Xms1G -Xmx{{ cluster_cfg['dshuttle']['fuse_max_heap_size'] }} -XX:MaxDirectMemorySize={{ cluster_cfg['dshuttle']['fuse_max_direct_mem_size'] }} + -XX:ActiveProcessorCount={{ cluster_cfg['dshuttle']['fuse_active_processor_count'] }} + ALLUXIO_WORKER_TIEREDSTORE_LEVEL0_DIRS_PATH: /dev/shm diff --git a/src/dshuttle-master/deploy/dshuttle-master-delete/delete-data.sh b/src/dshuttle-master/deploy/dshuttle-master-delete/delete-data.sh new file mode 100644 index 0000000000..e4eb139d75 --- /dev/null +++ b/src/dshuttle-master/deploy/dshuttle-master-delete/delete-data.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +echo "Clean the dshuttle-master's data on the disk" + + +rm -rf /mnt/journal/* 2>/dev/null +rm -rf /mnt/ufs/* 2>/dev/null +rm -rf /mnt/log/*master*.log* 2>/dev/null \ No newline at end of file diff --git a/src/dshuttle-master/deploy/dshuttle-master.yaml.template b/src/dshuttle-master/deploy/dshuttle-master.yaml.template new file mode 100644 index 0000000000..fb2126afba --- /dev/null +++ b/src/dshuttle-master/deploy/dshuttle-master.yaml.template @@ -0,0 +1,133 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: dshuttle-master +spec: + replicas: 1 + serviceName: dshuttle-master + selector: + matchLabels: + app: dshuttle-master + template: + metadata: + labels: + app: dshuttle-master + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: dshuttle-master + image: dshuttle.azurecr.io/dshuttle/dshuttle:25037dc + imagePullPolicy: Always + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %} + resources: + limits: + memory: {{ cluster_cfg['dshuttle']['master_limit_mem'] }} + requests: + memory: {{ cluster_cfg['dshuttle']['master_limit_mem'] }} + {%- endif %} + command: ["/entrypoint.sh"] + args: + - master-only + - --no-format + envFrom: + - configMapRef: + name: dshuttle-config + ports: + - name: rpc + containerPort: {{ cluster_cfg['dshuttle']['master_rpc_port'] }} + - name: web + containerPort: {{ cluster_cfg['dshuttle']['master_web_port'] }} + volumeMounts: + - name: alluxio-journal + mountPath: /journal + - name: dshuttle-data + mountPath: /opt/alluxio/underFSStorage + - name: dshuttle-log + mountPath: /opt/alluxio/logs + - name: dshuttle-log-config + mountPath: /opt/alluxio/conf/log4j.properties + subPath: log4j.properties + - name: dshuttle-job-master + image: dshuttle.azurecr.io/dshuttle/dshuttle:25037dc + imagePullPolicy: Always + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %} + resources: + limits: + memory: {{ cluster_cfg['dshuttle']['job_master_limit_mem'] }} + requests: + memory: {{ cluster_cfg['dshuttle']['job_master_request_mem'] }} + {%- endif %} + command: ["/entrypoint.sh"] + args: + - job-master + envFrom: + - configMapRef: + name: dshuttle-config + ports: + - containerPort: {{ cluster_cfg['dshuttle']['job_master_rpc_port'] }} + name: job-rpc + - containerPort: {{ cluster_cfg['dshuttle']['job_master_web_port'] }} + name: job-web + volumeMounts: + - name: dshuttle-log + mountPath: /opt/alluxio/logs + - name: dshuttle-log-config + mountPath: /opt/alluxio/conf/log4j.properties + subPath: log4j.properties + restartPolicy: Always + volumes: + - name: alluxio-journal + hostPath: + path: /mnt/dshuttle/journal + type: DirectoryOrCreate + - name: dshuttle-data + hostPath: + path: /mnt/dshuttle/ufs + type: DirectoryOrCreate + - name: dshuttle-log + hostPath: + path: /var/log/dshuttle + type: DirectoryOrCreate + - name: dshuttle-log-config + configMap: + name: dshuttle-log-config + initContainers: + - name: journal-chown + image: dshuttle.azurecr.io/dshuttle/dshuttle:25037dc + imagePullPolicy: Always + securityContext: + runAsUser: 0 + command: ["/bin/chown","-R", "1000:1000", "/journal", "/logs", "underFSStorage"] + volumeMounts: + - name: alluxio-journal + mountPath: /journal + - name: dshuttle-log + mountPath: /logs + - name: dshuttle-data + mountPath: /underFSStorage + imagePullSecrets: + - name: dshuttle-regcred diff --git a/src/dshuttle-master/deploy/dshuttle-service.yaml.template b/src/dshuttle-master/deploy/dshuttle-service.yaml.template new file mode 100644 index 0000000000..2cf52428ec --- /dev/null +++ b/src/dshuttle-master/deploy/dshuttle-service.yaml.template @@ -0,0 +1,38 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +kind: Service +apiVersion: v1 +metadata: + name: dshuttle-service +spec: + type: ClusterIP + ports: + - port: {{ cluster_cfg['dshuttle']['master_rpc_port'] }} + targetPort: {{ cluster_cfg['dshuttle']['master_rpc_port'] }} + name: rpc + - port: {{ cluster_cfg['dshuttle']['master_web_port'] }} + targetPort: {{ cluster_cfg['dshuttle']['master_web_port'] }} + name: web + - port: {{ cluster_cfg['dshuttle']['job_master_rpc_port'] }} + targetPort: {{ cluster_cfg['dshuttle']['job_master_rpc_port'] }} + name: job-rpc + - port: {{ cluster_cfg['dshuttle']['job_master_web_port'] }} + targetPort: {{ cluster_cfg['dshuttle']['job_master_web_port'] }} + name: job-web + selector: + app: dshuttle-master diff --git a/src/dshuttle-master/deploy/log4j.properties.template b/src/dshuttle-master/deploy/log4j.properties.template new file mode 100644 index 0000000000..e2c96d7c0e --- /dev/null +++ b/src/dshuttle-master/deploy/log4j.properties.template @@ -0,0 +1,202 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +# May get overridden by System Property + +log4j.rootLogger={{ cluster_cfg['dshuttle']['log']['level'] }}, ${alluxio.logger.type}, ${alluxio.remote.logger.type} + +log4j.category.alluxio.logserver=INFO, ${alluxio.logserver.logger.type} +log4j.additivity.alluxio.logserver=false + +log4j.logger.AUDIT_LOG=INFO, ${alluxio.master.audit.logger.type} +log4j.additivity.AUDIT_LOG=false + +# Configures an appender whose name is "" (empty string) to be NullAppender. +# By default, if a Java class does not specify a particular appender, log4j will +# use "" as the appender name, then it will use Null appender. +log4j.appender.=org.apache.log4j.varia.NullAppender + +log4j.appender.Console=org.apache.log4j.ConsoleAppender +log4j.appender.Console.Target=System.out +log4j.appender.Console.layout=org.apache.log4j.PatternLayout +log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# The netty transport has info-level logging on every connection, even successful +# connections. This can result in hundreds of log messages per second. +log4j.category.io.atomix.catalyst.transport.netty=WARN + +# The ParquetWriter logs for every row group which is not noisy for large row group size, +# but very noisy for small row group size. +log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN +log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordReader=WARN + +# Appender for Job Master +log4j.appender.JOB_MASTER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.JOB_MASTER_LOGGER.File=${alluxio.logs.dir}/job_master.log +log4j.appender.JOB_MASTER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.JOB_MASTER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.JOB_MASTER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.JOB_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M) - %m%n + +# Appender for Job Workers +log4j.appender.JOB_WORKER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.JOB_WORKER_LOGGER.File=${alluxio.logs.dir}/job_worker.log +log4j.appender.JOB_WORKER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.JOB_WORKER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.JOB_WORKER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.JOB_WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M) - %m%n + +# Appender for Master +log4j.appender.MASTER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.MASTER_LOGGER.File=${alluxio.logs.dir}/master.log +log4j.appender.MASTER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.MASTER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.MASTER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Appender for Master +log4j.appender.SECONDARY_MASTER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.SECONDARY_MASTER_LOGGER.File=${alluxio.logs.dir}/secondary_master.log +log4j.appender.SECONDARY_MASTER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.SECONDARY_MASTER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.SECONDARY_MASTER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.SECONDARY_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Appender for Master audit +log4j.appender.MASTER_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.MASTER_AUDIT_LOGGER.File=${alluxio.logs.dir}/master_audit.log +log4j.appender.MASTER_AUDIT_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.MASTER_AUDIT_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.MASTER_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M) - %m%n + +# Appender for Proxy +log4j.appender.PROXY_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.PROXY_LOGGER.File=${alluxio.logs.dir}/proxy.log +log4j.appender.PROXY_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.PROXY_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.PROXY_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.PROXY_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Appender for Workers +log4j.appender.WORKER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.WORKER_LOGGER.File=${alluxio.logs.dir}/worker.log +log4j.appender.WORKER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.WORKER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.WORKER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Remote appender for Job Master +log4j.appender.REMOTE_JOB_MASTER_LOGGER=org.apache.log4j.net.SocketAppender +log4j.appender.REMOTE_JOB_MASTER_LOGGER.Port=${alluxio.logserver.port} +log4j.appender.REMOTE_JOB_MASTER_LOGGER.RemoteHost=${alluxio.logserver.hostname} +log4j.appender.REMOTE_JOB_MASTER_LOGGER.ReconnectionDelay=10000 +log4j.appender.REMOTE_JOB_MASTER_LOGGER.filter.ID=alluxio.AlluxioRemoteLogFilter +log4j.appender.REMOTE_JOB_MASTER_LOGGER.filter.ID.ProcessType=JOB_MASTER +log4j.appender.REMOTE_JOB_MASTER_LOGGER.Threshold=WARN + +# Remote appender for Job Workers +log4j.appender.REMOTE_JOB_WORKER_LOGGER=org.apache.log4j.net.SocketAppender +log4j.appender.REMOTE_JOB_WORKER_LOGGER.Port=${alluxio.logserver.port} +log4j.appender.REMOTE_JOB_WORKER_LOGGER.RemoteHost=${alluxio.logserver.hostname} +log4j.appender.REMOTE_JOB_WORKER_LOGGER.ReconnectionDelay=10000 +log4j.appender.REMOTE_JOB_WORKER_LOGGER.filter.ID=alluxio.AlluxioRemoteLogFilter +log4j.appender.REMOTE_JOB_WORKER_LOGGER.filter.ID.ProcessType=JOB_WORKER +log4j.appender.REMOTE_JOB_WORKER_LOGGER.Threshold=WARN + +# Remote appender for Master +log4j.appender.REMOTE_MASTER_LOGGER=org.apache.log4j.net.SocketAppender +log4j.appender.REMOTE_MASTER_LOGGER.Port=${alluxio.logserver.port} +log4j.appender.REMOTE_MASTER_LOGGER.RemoteHost=${alluxio.logserver.hostname} +log4j.appender.REMOTE_MASTER_LOGGER.ReconnectionDelay=10000 +log4j.appender.REMOTE_MASTER_LOGGER.filter.ID=alluxio.AlluxioRemoteLogFilter +log4j.appender.REMOTE_MASTER_LOGGER.filter.ID.ProcessType=MASTER +log4j.appender.REMOTE_MASTER_LOGGER.Threshold=WARN + +# Remote appender for Secondary Master +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER=org.apache.log4j.net.SocketAppender +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER.Port=${alluxio.logserver.port} +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER.RemoteHost=${alluxio.logserver.hostname} +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER.ReconnectionDelay=10000 +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER.filter.ID=alluxio.AlluxioRemoteLogFilter +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER.filter.ID.ProcessType=SECONDARY_MASTER +log4j.appender.REMOTE_SECONDARY_MASTER_LOGGER.Threshold=WARN + +# Remote appender for Proxy +log4j.appender.REMOTE_PROXY_LOGGER=org.apache.log4j.net.SocketAppender +log4j.appender.REMOTE_PROXY_LOGGER.Port=${alluxio.logserver.port} +log4j.appender.REMOTE_PROXY_LOGGER.RemoteHost=${alluxio.logserver.hostname} +log4j.appender.REMOTE_PROXY_LOGGER.ReconnectionDelay=10000 +log4j.appender.REMOTE_PROXY_LOGGER.filter.ID=alluxio.AlluxioRemoteLogFilter +log4j.appender.REMOTE_PROXY_LOGGER.filter.ID.ProcessType=PROXY +log4j.appender.REMOTE_PROXY_LOGGER.Threshold=WARN + +# Remote appender for Workers +log4j.appender.REMOTE_WORKER_LOGGER=org.apache.log4j.net.SocketAppender +log4j.appender.REMOTE_WORKER_LOGGER.Port=${alluxio.logserver.port} +log4j.appender.REMOTE_WORKER_LOGGER.RemoteHost=${alluxio.logserver.hostname} +log4j.appender.REMOTE_WORKER_LOGGER.ReconnectionDelay=10000 +log4j.appender.REMOTE_WORKER_LOGGER.filter.ID=alluxio.AlluxioRemoteLogFilter +log4j.appender.REMOTE_WORKER_LOGGER.filter.ID.ProcessType=WORKER +log4j.appender.REMOTE_WORKER_LOGGER.Threshold=WARN + +# (Local) appender for log server itself +log4j.appender.LOGSERVER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.LOGSERVER_LOGGER.File=${alluxio.logs.dir}/logserver.log +log4j.appender.LOGSERVER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.LOGSERVER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.LOGSERVER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.LOGSERVER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# (Local) appender for log server to log on behalf of log clients +# No need to configure file path because log server will dynamically +# figure out for each appender. +log4j.appender.LOGSERVER_CLIENT_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.LOGSERVER_CLIENT_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.LOGSERVER_CLIENT_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.LOGSERVER_CLIENT_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.LOGSERVER_CLIENT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Appender for User +log4j.appender.USER_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.USER_LOGGER.File=${alluxio.user.logs.dir}/user_${user.name}.log +log4j.appender.USER_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.USER_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.USER_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.USER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Appender for Fuse +log4j.appender.FUSE_LOGGER=org.apache.log4j.RollingFileAppender +log4j.appender.FUSE_LOGGER.File=${alluxio.logs.dir}/fuse.log +log4j.appender.FUSE_LOGGER.MaxFileSize={{ cluster_cfg['dshuttle']['log']['max_file_size'] }} +log4j.appender.FUSE_LOGGER.MaxBackupIndex={{ cluster_cfg['dshuttle']['log']['max_backup_index'] }} +log4j.appender.FUSE_LOGGER.layout=org.apache.log4j.PatternLayout +log4j.appender.FUSE_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +# Disable noisy DEBUG logs +log4j.logger.io.grpc.netty.NettyServerHandler=OFF diff --git a/src/dshuttle-master/deploy/service.yaml b/src/dshuttle-master/deploy/service.yaml new file mode 100644 index 0000000000..83bfd49194 --- /dev/null +++ b/src/dshuttle-master/deploy/service.yaml @@ -0,0 +1,38 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +cluster-type: + - k8s + +prerequisite: + - cluster-configuration + +template-list: + - dshuttle-service.yaml + - dshuttle-config.yaml + - dshuttle-master.yaml + - log4j.properties + - delete.yaml + - start.sh + - stop.sh + +start-script: start.sh +stop-script: stop.sh +delete-script: delete.sh + +deploy-rules: + - in: pai-master diff --git a/src/dshuttle-master/deploy/start.sh.template b/src/dshuttle-master/deploy/start.sh.template new file mode 100644 index 0000000000..26cc3543bc --- /dev/null +++ b/src/dshuttle-master/deploy/start.sh.template @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +{%- if cluster_cfg['cluster']['common']['dshuttle'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +kubectl create configmap dshuttle-log-config --from-file=log4j.properties --dry-run -o yaml | kubectl apply --overwrite=true -f - || exit $? +kubectl create secret docker-registry dshuttle-regcred --docker-server=dshuttle.azurecr.io \ + --docker-username={{ cluster_cfg['dshuttle']['registry_username'] }} --docker-password={{ cluster_cfg['dshuttle']['registry_password'] }} --dry-run -o yaml \ + | kubectl apply --overwrite=true -f - || exit $? +kubectl apply --overwrite=true -f dshuttle-config.yaml || exit $? +kubectl apply --overwrite=true -f dshuttle-service.yaml || exit $? +kubectl apply --overwrite=true -f dshuttle-master.yaml || exit $? + +sleep 10 +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v dshuttle-master || exit $? + +popd > /dev/null +{%- endif %} diff --git a/src/dshuttle-master/deploy/stop.sh.template b/src/dshuttle-master/deploy/stop.sh.template new file mode 100644 index 0000000000..f59d723c14 --- /dev/null +++ b/src/dshuttle-master/deploy/stop.sh.template @@ -0,0 +1,43 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +{%- if cluster_cfg['cluster']['common']['dshuttle'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.maintaintool.update_resource \ + --operation delete --resource statefulset --name dshuttle-master + +if kubectl get service | grep -q "dshuttle-service"; then + kubectl delete service dshuttle-service || exit $? +fi + +if kubectl get configmap | grep -q "dshuttle-config"; then + kubectl delete configmap dshuttle-config || exit $? +fi + +if kubectl get configmap | grep -q "dshuttle-log-config"; then + kubectl delete configmap dshuttle-log-config || exit $? +fi + +if kubectl get secret | grep -q "dshuttle-regcred"; then + kubectl delete secret dshuttle-regcred || exit $? +fi + +popd > /dev/null +{%- endif %} diff --git a/src/dshuttle-worker/deploy/delete.sh b/src/dshuttle-worker/deploy/delete.sh new file mode 100644 index 0000000000..e818bcf090 --- /dev/null +++ b/src/dshuttle-worker/deploy/delete.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +echo "Call stop to stop all dshuttle-worker pod first" +/bin/bash stop.sh || exit $? + +echo "Create dshuttle-wroker-delete configmap for deleting data on the host" +kubectl create configmap dshuttle-worker-delete --from-file=dshuttle-worker-delete/ --dry-run -o yaml | kubectl apply --overwrite=true -f - || exit $? + +echo "Create cleaner daemon" +kubectl apply --overwrite=true -f delete.yaml || exit $? +sleep 5 + +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v delete-batch-job-dshuttle-worker || exit $? + +echo "Dshuttle worker clean job is done" +echo "Delete dshuttle worker cleaner daemon and configmap" +if kubectl get daemonset | grep -q "delete-batch-job-dshuttle-worker"; then + kubectl delete ds delete-batch-job-dshuttle-worker || exit $? +fi + +if kubectl get configmap | grep -q "dshuttle-worker-delete"; then + kubectl delete configmap dshuttle-worker-delete || exit $? +fi +sleep 5 + +popd > /dev/null \ No newline at end of file diff --git a/src/dshuttle-worker/deploy/delete.yaml.template b/src/dshuttle-worker/deploy/delete.yaml.template new file mode 100644 index 0000000000..5d5e0283eb --- /dev/null +++ b/src/dshuttle-worker/deploy/delete.yaml.template @@ -0,0 +1,76 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: delete-batch-job-dshuttle-worker +spec: + selector: + matchLabels: + app: delete-batch-job-dshuttle-worker + template: + metadata: + labels: + app: delete-batch-job-dshuttle-worker + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: pai-worker + operator: In + values: + - 'true' + hostNetwork: true + hostPID: false + containers: + - name: cleaning-one-shot + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}cleaning-image:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + imagePullPolicy: Always + volumeMounts: + - mountPath: /dshuttle-worker-delete + name: dshuttle-worker-delete-config + - mountPath: /mnt/log + name: log-path + - mountPath: /mnt/ssd + name: ssd + env: + - name: DELETE_CONFIG + value: dshuttle-worker-delete + - name: WORKER_CONFIG + value: delete-data.sh + readinessProbe: + exec: + command: + - cat + - /jobstatus/jobok + initialDelaySeconds: 5 + periodSeconds: 3 + imagePullSecrets: + - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} + volumes: + - name: dshuttle-worker-delete-config + configMap: + name: dshuttle-worker-delete + - name: log-path + hostPath: + path: /var/log/dshuttle + - name: ssd + hostPath: + path: /mnt/dshuttle/ssd diff --git a/src/dshuttle-worker/deploy/dshuttle-worker-delete/delete-data.sh b/src/dshuttle-worker/deploy/dshuttle-worker-delete/delete-data.sh new file mode 100644 index 0000000000..8907a58b93 --- /dev/null +++ b/src/dshuttle-worker/deploy/dshuttle-worker-delete/delete-data.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +echo "Clean the dshuttle-wroker's data on the disk" + + +rm -rf /mnt/ssd/* 2>/dev/null +rm -rf /mnt/log/*worker.log* 2>/dev/null + + diff --git a/src/dshuttle-worker/deploy/dshuttle-worker.yaml.template b/src/dshuttle-worker/deploy/dshuttle-worker.yaml.template new file mode 100644 index 0000000000..51c195635f --- /dev/null +++ b/src/dshuttle-worker/deploy/dshuttle-worker.yaml.template @@ -0,0 +1,156 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: dshuttle-worker +spec: + selector: + matchLabels: + app: dshuttle-worker + template: + metadata: + labels: + app: dshuttle-worker + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + securityContext: + fsGroup: 1000 + nodeSelector: + initContainers: + - name: socket-chown + securityContext: + runAsUser: 0 + image: dshuttle.azurecr.io/dshuttle/dshuttle:25037dc + command: ["/bin/chown","-R", "1000:1000", "/opt/domain", "/opt/logs", "/mnt/ssd"] + volumeMounts: + - name: dshuttle-domain + mountPath: /opt/domain + - name: worker-logs + mountPath: /opt/logs + - name: ssd + mountPath: /mnt/ssd + containers: + - name: dshuttle-worker + image: dshuttle.azurecr.io/dshuttle/dshuttle:25037dc + imagePullPolicy: Always + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %} + resources: + limits: + memory: {{ cluster_cfg['dshuttle']['worker_limit_mem'] }} + requests: + memory: {{ cluster_cfg['dshuttle']['worker_request_mem'] }} + {%- endif %} + command: ["/entrypoint.sh"] + args: + - worker-only + - --no-format + env: + - name: ALLUXIO_WORKER_HOSTNAME + valueFrom: + fieldRef: + fieldPath: status.hostIP + envFrom: + - configMapRef: + name: dshuttle-config + ports: + - containerPort: {{ cluster_cfg['dshuttle']['worker_rpc_port'] }} + name: rpc + - containerPort: {{ cluster_cfg['dshuttle']['worker_web_port'] }} + name: web + volumeMounts: + - name: dshuttle-domain + mountPath: /opt/domain + - mountPath: /dev/shm + name: mem + - mountPath: /mnt/ssd + name: ssd + - name: worker-logs + mountPath: /opt/alluxio/logs + - name: dshuttle-log-config + mountPath: /opt/alluxio/conf/log4j.properties + subPath: log4j.properties + - name: dshuttle-job-worker + image: dshuttle.azurecr.io/dshuttle/dshuttle:25037dc + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + imagePullPolicy: Always + {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %} + resources: + limits: + memory: {{ cluster_cfg['dshuttle']['job_worker_limit_mem'] }} + requests: + memory: {{ cluster_cfg['dshuttle']['job_worker_request_mem'] }} + {%- endif %} + command: ["/entrypoint.sh"] + args: + - job-worker + env: + - name: ALLUXIO_WORKER_HOSTNAME + valueFrom: + fieldRef: + fieldPath: status.hostIP + envFrom: + - configMapRef: + name: dshuttle-config + ports: + - containerPort: {{ cluster_cfg['dshuttle']['job_worker_rpc_port'] }} + name: job-rpc + - containerPort: {{ cluster_cfg['dshuttle']['job_worker_data_port'] }} + name: job-data + - containerPort: {{ cluster_cfg['dshuttle']['job_worker_web_port'] }} + name: job-web + volumeMounts: + - name: dshuttle-domain + mountPath: /opt/domain + - mountPath: /dev/shm + name: mem + - mountPath: /mnt/ssd + name: ssd + - name: worker-logs + mountPath: /opt/alluxio/logs + - name: dshuttle-log-config + mountPath: /opt/alluxio/conf/log4j.properties + subPath: log4j.properties + restartPolicy: Always + volumes: + - name: dshuttle-domain + hostPath: + path: /tmp/alluxio-domain + type: DirectoryOrCreate + - name: worker-logs + hostPath: + path: /var/log/dshuttle + type: DirectoryOrCreate + - name: ssd + hostPath: + path: /mnt/dshuttle/ssd + type: DirectoryOrCreate + - name: mem + emptyDir: + medium: "Memory" + - name: dshuttle-log-config + configMap: + name: dshuttle-log-config + imagePullSecrets: + - name: dshuttle-regcred diff --git a/src/dshuttle-worker/deploy/service.yaml b/src/dshuttle-worker/deploy/service.yaml new file mode 100644 index 0000000000..3539894a00 --- /dev/null +++ b/src/dshuttle-worker/deploy/service.yaml @@ -0,0 +1,36 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +cluster-type: + - k8s + +prerequisite: + - cluster-configuration + - dshuttle-master + +template-list: + - start.sh + - stop.sh + - dshuttle-worker.yaml + - delete.yaml + +start-script: start.sh +stop-script: stop.sh +delete-script: delete.sh + +deploy-rules: + - in: pai-worker diff --git a/src/dshuttle-worker/deploy/start.sh.template b/src/dshuttle-worker/deploy/start.sh.template new file mode 100644 index 0000000000..3cf2273455 --- /dev/null +++ b/src/dshuttle-worker/deploy/start.sh.template @@ -0,0 +1,30 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +{%- if cluster_cfg['cluster']['common']['dshuttle'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +kubectl apply --overwrite=true -f dshuttle-worker.yaml || exit $? + +sleep 10 +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v dshuttle-worker || exit $? + +popd > /dev/null +{%- endif %} diff --git a/src/dshuttle-worker/deploy/stop.sh.template b/src/dshuttle-worker/deploy/stop.sh.template new file mode 100644 index 0000000000..4532eeb475 --- /dev/null +++ b/src/dshuttle-worker/deploy/stop.sh.template @@ -0,0 +1,28 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +{%- if cluster_cfg['cluster']['common']['dshuttle'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +if kubectl get daemonset | grep -q "dshuttle-worker"; then + kubectl delete daemonset dshuttle-worker || exit $? +fi + +popd > /dev/null +{%- endif %} \ No newline at end of file diff --git a/src/dshuttle/config/dshuttle.md b/src/dshuttle/config/dshuttle.md new file mode 100644 index 0000000000..c6bb8367c6 --- /dev/null +++ b/src/dshuttle/config/dshuttle.md @@ -0,0 +1,217 @@ +# Dshuttle section parser + +- [Dshuttle section parser](#dshuttle-section-parser) + - [Default configuration](#default-configuration) + - [How to configure Dshuttle section in service-configuration.yaml](#how-to-configure-dshuttle-section-in-service-configurationyaml) + - [Generated Configuration](#generated-configuration) + - [Table](#table) + - [Notice](#notice) + - [How to config DShuttle](#how-to-config-dshuttle) + +## Default configuration + +[Dshuttle default configuration](dshuttle.yaml) + +## How to configure Dshuttle section in service-configuration.yaml + +All config fields are optional. We recommend admin change the config according the cluster hardware and job demands. + +- `worker_request_mem: 2G`: The dshuttle-worker requests memory, it's the total memory requests include store data and services requirement +- `worker_limit_mem: 8G`: The memory limit for dshuttle-worker. Should equal or larger than request memory and worker_max_heap_size +- `worker_max_heap_size: 4G`: The max memory consumed by java heap +- `worker_rpc_port: 4999`: Dshuttle-worker rpc port +- `worker_web_port: 5000`: Dshuttle-worker web port +- `worker_active_processor_count: 5`: activate process count, program will use this value to decide how many worker threads to be created + +- `job_worker_max_heap_size: 2G`: The max memory consumed by java heap +- `job_worker_request_mem: 1G`: The dshuttle-job-worker requests memory +- `job_worker_limit_mem: 3G` The memory limit for dshuttle-job-worker. Should equal or larger than request memory and job_worker_max_heap_size +- `job_worker_rpc_port: 5001`: Dshuttle-job-worker rpc port +- `job_worker_data_port: 5002`: Dshuttle job worker data port +- `job_worker_web_port: 5003`: Dshutte job worker port +- `job_worker_active_processor_count: 5`: activate process count, program will use this value to decide how many worker threads to be created + +- `master_max_heap_size: 12G`: The max memory consumed by java heap +- `master_limit_mem: 16G`: The memory limit for dshuttle-master +- `master_request_mem: 8G`: The dshuttle-master requests memory +- `master_rpc_port: 30998`: Master rpc port +- `master_web_port: 30999`: Master web port +- `master_active_processor_count: 10`: activate process count, program will use this value to decide how many worker threads to be created + + +- `job_master_rpc_port: 31001`: Job master rpc port +- `job_master_web_port: 31002`: Job master web port +- `job_master_request_mem: 4G`: The dshuttle-job-master requests memory +- `job_master_limit_mem: 8G`: The memory limit for dshuttle-job-master +- `job_master_max_heap_size: 6G`: The max memory consumed by java heap +- `job_master_active_processor_count: 10`: activate process count, program will use this value to decide how many worker threads to be created + + +- `fuse_max_direct_mem_size: 3G`: Max java direct memory consumed by fuse. It's depends on workload, if this size is too small will cause OOM. +- `fuse_max_heap_size: 5G`: The max memory consumed by java heap +- `fuse_active_processor_count: 10`: activate process count, program will use this value to decide how many worker threads to be created +- `csi_daemon_request_mem: 4G`: The dshuttle-csi requests memory +- `csi_daemon_limit_mem: 10G`: The memory limit for dshuttle-csi + +- `tieredstores`: The tired store config. The default is shown below, admin can change the tired store level and medium size according to the environment. The `watermark_high_ratio` set the high wartermark of the space in the storage tier. `watermark_low_ratio` set low watermark of the space in the storage tier. If worker contain data more than `watermark_high_ratio`, data evict starts until data space reach `watermark_low_ratio`. + ```yaml + - level: 0 + mediumtype: MEM + alias: MEM + path: /dev/shm + quota: 1GB + watermark_low_ratio: 0.7 + watermark_high_ratio: 0.95 + - level: 1 + mediumtype: SSD + alias: SSD + path: /mnt/ssd + quota: 100G + watermark_low_ratio: 0.7 + watermark_high_ratio: 0.95 + ``` + +## Generated Configuration + +After parsing, if you configured the dshuttle the model will be like: + +```yaml +dshuttle: + worker_request_mem: 2G + worker_limit_mem: 8G + worker_max_heap_size: 4G + worker_rpc_port: 4999 + worker_web_port: 5000 + worker_active_processor_count: 5 + + job_worker_max_heap_size: 2G + job_worker_request_mem: 1G + job_worker_limit_mem: 3G + job_worker_rpc_port: 5001 + job_worker_data_port: 5002 + job_worker_web_port: 5003 + job_worker_active_processor_count: 5 + + master_max_heap_size: 12G + master_limit_mem: 16G + master_request_mem: 8G + master_rpc_port: 30998 + master_web_port: 30999 + master_active_processor_count: 10 + + job_master_rpc_port: 31001 + job_master_web_port: 31002 + job_master_request_mem: 4G + job_master_limit_mem: 8G + job_master_max_heap_size: 6G + job_master_active_processor_count: 10 + + fuse_max_direct_mem_size: 3G + fuse_max_heap_size: 5G + fuse_active_processor_count: 10 + csi_daemon_request_mem: 4G + csi_daemon_limit_mem: 10G + + tieredstores: + - level: 0 + mediumtype: MEM,SSD + alias: MEM + path: /dev/shm,/mnt/ssd + quota: 1GB,100GB + watermark_low_ratio: 0.7 + watermark_high_ratio: 0.95 +``` + +## Table + +| Data in Configuration File | Data in Cluster Object Model | Data in Jinja2 Template | Data type | +|----------------------------------------|-------------------------------------------------|----------------------------------------------------------|-----------| +| dshuttle.worker_request_mem | com["dshuttle"]["worker_request_mem"] | cluster_cfg["dshuttle"]["worker_request_mem"] | String | +| dshuttle.worker_limit_mem | com["dshuttle"]["worker_limit_mem"] | cluster_cfg["dshuttle"]["worker_limit_mem"] | String | +| dshuttle.worker_max_heap_size | com["dshuttle"]["worker_max_heap_size"] | cluster_cfg["dshuttle"]["worker_max_heap_size"] | String | +| dshuttle.worker_rpc_port | com["dshuttle"]["worker_rpc_port"] | cluster_cfg["dshuttle"]["worker_rpc_port"] | Number | +| dshuttle.worker_web_port | com["dshuttle"]["worker_web_port"] | cluster_cfg["dshuttle"]["worker_web_port"] | Number | +| dshuttle.worker_active_processor_count | com["dshuttle"]["worker_active_processor_count"]| cluster_cfg["dshuttle"]["worker_active_processor_count"] | Number | +| dshuttle.job_worker_request_mem | com["dshuttle"]["job_worker_request_mem"] | cluster_cfg["dshuttle"]["job_worker_request_mem"] | String | +| dshuttle.job_worker_limit_mem | com["dshuttle"]["job_worker_limit_mem"] | cluster_cfg["dshuttle"]["job_worker_limit_mem"] | String | +| dshuttle.job_worker_max_heap_size | com["dshuttle"]["job_worker_max_heap_size"] | cluster_cfg["dshuttle"]["job_worker_max_heap_size"] | String | +| dshuttle.job_worker_rpc_port | com["dshuttle"]["job_worker_rpc_port"] | cluster_cfg["dshuttle"]["job_worker_rpc_port"] | Number | +| dshuttle.job_worker_data_port | com["dshuttle"]["job_worker_data_port"] | cluster_cfg["dshuttle"]["job_worker_data_port"] | Number | +| dshuttle.job_worker_web_port | com["dshuttle"]["job_worker_web_port"] | cluster_cfg["dshuttle"]["job_worker_web_port"] | Number | +| dshuttle.job_worker_active_processor_count | com["dshuttle"]["job_worker_active_processor_count"]| cluster_cfg["dshuttle"]["job_worker_active_processor_count"] | Number | +| dshuttle.master_request_mem | com["dshuttle"]["master_request_mem"] | cluster_cfg["dshuttle"]["master_request_mem"] | String | +| dshuttle.master_limit_mem | com["dshuttle"]["master_limit_mem"] | cluster_cfg["dshuttle"]["master_limit_mem"] | String | +| dshuttle.master_max_heap_size | com["dshuttle"]["master_max_heap_size"] | cluster_cfg["dshuttle"]["master_max_heap_size"] | String | +| dshuttle.master_rpc_port | com["dshuttle"]["master_rpc_port"] | cluster_cfg["dshuttle"]["master_rpc_port"] | Number | +| dshuttle.master_web_port | com["dshuttle"]["master_web_port "] | cluster_cfg["dshuttle"]["master_web_port"] | Number | +| dshuttle.master_active_processor_count | com["dshuttle"]["master_active_processor_count "] | cluster_cfg["dshuttle"]["master_active_processor_count"] | Number | +| dshuttle.job_master_rpc_port | com["dshuttle"]["job_master_rpc_port"] | cluster_cfg["dshuttle"]["job_master_rpc_port"] | Number | +| dshuttle.job_master_web_port | com["dshuttle"]["job_master_web_port"] | cluster_cfg["dshuttle"]["job_master_web_port"] | Number | +| dshuttle.job_master_request_mem | com["dshuttle"]["job_master_request_mem"] | cluster_cfg["dshuttle"]["job_master_request_mem"] | String | +| dshuttle.job_master_limit_mem | com["dshuttle"]["job_master_limit_mem"] | cluster_cfg["dshuttle"]["job_master_limit_mem"] | String | +| dshuttle.job_master_max_heap_size | com["dshuttle"]["job_master_max_heap_size"] | cluster_cfg["dshuttle"]["job_master_max_heap_size"] | String | +| dshuttle.job_master_active_processor_count | com["dshuttle"]["job_master_active_processor_count"]| cluster_cfg["dshuttle"]["job_master_active_processor_count"] | Number | +| dshuttle.tieredstores | com["dshuttle"]["tieredstores"] | cluster_cfg["dshuttle"]["tieredstores"] | Object | +| fuse_max_direct_mem_size | com["dshuttle"]["fuse_max_direct_mem_size"] | cluster_cfg["dshuttle"]["fuse_max_direct_mem_size"] | String | +| fuse_max_heap_size | com["dshuttle"]["fuse_max_heap_size"] | cluster_cfg["dshuttle"]["fuse_max_heap_size"] | String | +| fuse_active_processor_count | com["dshuttle"]["fuse_active_processor_count"] | cluster_cfg["dshuttle"]["fuse_active_processor_count"] | Number | +| csi_daemon_request_mem | com["dshuttle"]["csi_daemon_request_mem"] | cluster_cfg["dshuttle"]["csi_daemon_request_mem"] | String | +| csi_daemon_limit_mem | com["dshuttle"]["csi_daemon_limit_mem"] | cluster_cfg["dshuttle"]["csi_daemon_limit_mem"] | String | + +## Notice +For `tieredstores`, the valid medium type is `MEM` and `SSD`. And the valid path is `/dev/shm` and `/mnt/ssd` +If you set medium type to `MEM`, please set path to `/dev/shm`. If you set medium type to `SSD`, please set path to `/mnt/ssd`. +Using other `mediumtype` or `path` will cause errors. + +## How to config DShuttle +1. Hived configuration changes: + If you use Hived scheduler as your default scheduler. You need to recalculate the node resource since DShuttle requests some resources in worker node. For default configuration, + in each worker node DShuttle requests 7GB memory (3 GB for worker and 4GB for CSI client). So when you calculate the SKU for worker nodes, need to reserve this 7GB memory for DShuttle. +2. Using DShuttle as a PAI storage: + 1. Please set `dshuttle: 'true'` in `services-configuration.yaml` under `cluster:common` filed. + 2. DShuttle only support AzureBlob in current version. To use DShuttle, please make sure the you already have a azureBlob storage and mounted in DShuttle. For details, please refer [DShuttle Doc](https://github.com/microsoft/DShuttle/blob/dev/deploy/readme.md) + 3. Create PV & PVC for DShuttle storage. The `dshuttlePath` in PV should be the azureBlob mounted path in DShuttle. A sample for DShuttle PV & PVC is. + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: dshuttle-pv + spec: + accessModes: + - ReadWriteMany + capacity: + storage: 100Gi + csi: + driver: dshuttle + volumeHandle: dshuttle + volumeAttributes: + dshuttlePath: /azurene + mountOptions: + - kernel_cache + - allow_other + - entry_timeout=36000 + - attr_timeout=36000 + - max_readahead=0 + persistentVolumeReclaimPolicy: Retain + volumeMode: Filesystem + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: dshuttle-pvc + namespace: default + spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 100Gi + selector: + matchExpressions: + - key: name + operator: In + values: + - dshuttle-pv + volumeMode: Filesystem + volumeName: dshuttle-pv + ``` \ No newline at end of file diff --git a/src/dshuttle/config/dshuttle.py b/src/dshuttle/config/dshuttle.py new file mode 100644 index 0000000000..528537a30d --- /dev/null +++ b/src/dshuttle/config/dshuttle.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import copy +import logging + + +class Dshuttle(object): + def __init__(self, cluster_conf, service_conf, default_service_conf): + self.cluster_conf = cluster_conf + self.service_conf = service_conf + self.default_service_conf = default_service_conf + + def validation_pre(self): + return True, None + + def get_master_ip(self): + for host_conf in self.cluster_conf["machine-list"]: + if "pai-master" in host_conf and host_conf["pai-master"] == "true": + return host_conf["hostip"] + + def run(self): + result = copy.deepcopy(self.default_service_conf) + result.update(self.service_conf) + result["uri"] = "http://{0}:{1}".format(self.get_master_ip(), result["master_web_port"]) + return result + + def validation_post(self, conf): + return True, None diff --git a/src/dshuttle/config/dshuttle.yaml b/src/dshuttle/config/dshuttle.yaml new file mode 100644 index 0000000000..0b01bdf60c --- /dev/null +++ b/src/dshuttle/config/dshuttle.yaml @@ -0,0 +1,70 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +service_type: "k8s" + +worker_request_mem: 2G +worker_limit_mem: 8G +worker_max_heap_size: 4G +worker_rpc_port: 4999 +worker_web_port: 5000 +worker_active_processor_count: 5 + +job_worker_max_heap_size: 2G +job_worker_request_mem: 1G +job_worker_limit_mem: 3G +job_worker_rpc_port: 5001 +job_worker_data_port: 5002 +job_worker_web_port: 5003 +job_worker_active_processor_count: 5 + +master_max_heap_size: 12G +master_limit_mem: 16G +master_request_mem: 8G +master_rpc_port: 30998 +master_web_port: 30999 +master_active_processor_count: 10 + +job_master_rpc_port: 31001 +job_master_web_port: 31002 +job_master_request_mem: 4G +job_master_limit_mem: 8G +job_master_max_heap_size: 6G +job_master_active_processor_count: 10 + +fuse_max_direct_mem_size: 3G +fuse_max_heap_size: 5G +fuse_active_processor_count: 10 +csi_daemon_request_mem: 4G +csi_daemon_limit_mem: 10G + +tieredstores: +- level: 0 + mediumtype: MEM,SSD + alias: MEM + path: /dev/shm,/mnt/ssd + quota: 1GB,100GB + watermark_low_ratio: 0.7 + watermark_high_ratio: 0.95 + +log: + level: INFO + max_file_size: 50MB + max_backup_index: 20 + +registry_username: username +registry_password: password \ No newline at end of file diff --git a/src/pylon/deploy/pylon-config/location.conf.template b/src/pylon/deploy/pylon-config/location.conf.template index 907289e1f9..b1e303203c 100644 --- a/src/pylon/deploy/pylon-config/location.conf.template +++ b/src/pylon/deploy/pylon-config/location.conf.template @@ -253,6 +253,37 @@ location ~ ^/grafana(.*)$$ { 'appSubUrl+"/grafana/"'; } +{%- if DSHUTTLE_URI %} +# Dshuttle server +location ~ ^/dshuttle$ { + # Add '/' to the end of the URL, otherwise there will be a 404 error. + return 301 $scheme://$http_host$request_uri/; +} +location ~ ^/dshuttle/(.*)$ { + proxy_pass {{DSHUTTLE_URI}}/$1$is_args$args; + sub_filter_types *; + sub_filter_once off; + sub_filter + 'href="/' + 'href="/dshuttle/'; + sub_filter + '