From 60a444f56d2ec5655c33038aabbb96639813f1f7 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Thu, 7 Nov 2019 20:36:20 +0800 Subject: [PATCH 01/26] add loop storage --- .../build/internal-storage-create.dockerfile | 24 ++++++++ .../build/internal-storage-delete.dockerfile | 24 ++++++++ .../config/internal-storage.yaml | 4 ++ .../config/internal_storage.py | 43 ++++++++++++++ .../deploy/create.yaml.template | 57 +++++++++++++++++++ src/internal-storage/deploy/delete.sh | 29 ++++++++++ .../deploy/delete.yaml.template | 54 ++++++++++++++++++ src/internal-storage/deploy/refresh.sh | 26 +++++++++ src/internal-storage/deploy/service.yaml | 34 +++++++++++ src/internal-storage/deploy/start.sh | 27 +++++++++ src/internal-storage/deploy/stop.sh | 20 +++++++ src/internal-storage/src/create.sh | 42 ++++++++++++++ src/internal-storage/src/delete.sh | 33 +++++++++++ 13 files changed, 417 insertions(+) create mode 100644 src/internal-storage/build/internal-storage-create.dockerfile create mode 100644 src/internal-storage/build/internal-storage-delete.dockerfile create mode 100644 src/internal-storage/config/internal-storage.yaml create mode 100644 src/internal-storage/config/internal_storage.py create mode 100644 src/internal-storage/deploy/create.yaml.template create mode 100644 src/internal-storage/deploy/delete.sh create mode 100644 src/internal-storage/deploy/delete.yaml.template create mode 100644 src/internal-storage/deploy/refresh.sh create mode 100644 src/internal-storage/deploy/service.yaml create mode 100644 src/internal-storage/deploy/start.sh create mode 100644 src/internal-storage/deploy/stop.sh create mode 100644 src/internal-storage/src/create.sh create mode 100644 src/internal-storage/src/delete.sh diff --git a/src/internal-storage/build/internal-storage-create.dockerfile b/src/internal-storage/build/internal-storage-create.dockerfile new file mode 100644 index 0000000000..f4c5f5cfca --- /dev/null +++ b/src/internal-storage/build/internal-storage-create.dockerfile @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +FROM ubuntu:16.04 + +RUN mkdir -p /init_scripts + +COPY src/create.sh /init_scripts + +ENTRYPOINT /bin/bash /init_scripts/create.sh diff --git a/src/internal-storage/build/internal-storage-delete.dockerfile b/src/internal-storage/build/internal-storage-delete.dockerfile new file mode 100644 index 0000000000..76f48e4e07 --- /dev/null +++ b/src/internal-storage/build/internal-storage-delete.dockerfile @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +FROM ubuntu:16.04 + +RUN mkdir -p /init_scripts + +COPY src/delete.sh /init_scripts + +ENTRYPOINT /bin/bash /init_scripts/delete.sh diff --git a/src/internal-storage/config/internal-storage.yaml b/src/internal-storage/config/internal-storage.yaml new file mode 100644 index 0000000000..a647018409 --- /dev/null +++ b/src/internal-storage/config/internal-storage.yaml @@ -0,0 +1,4 @@ +enable: true +type: hostPath +path: /paiInternal +quotaGB: 10 \ No newline at end of file diff --git a/src/internal-storage/config/internal_storage.py b/src/internal-storage/config/internal_storage.py new file mode 100644 index 0000000000..b64ebdd9b6 --- /dev/null +++ b/src/internal-storage/config/internal_storage.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +import copy +import logging + + +class InternalStorage(object): + def __init__(self, cluster_conf, service_conf, default_service_conf): + self.cluster_conf = cluster_conf + self.service_conf = self.merge_service_configuration(default_service_conf, service_conf) + self.logger = logging.getLogger(__name__) + + @staticmethod + def merge_service_configuration(overwrite_srv_cfg, default_srv_cfg): + if overwrite_srv_cfg is None: + return default_srv_cfg + srv_cfg = default_srv_cfg.copy() + for k in overwrite_srv_cfg: + srv_cfg[k] = overwrite_srv_cfg[k] + return srv_cfg + + def validation_pre(self): + type_ = self.service_conf.get('type', '') + if type_ == 'hostPath': + machine_list = self.cluster_conf['machine-list'] + if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: + return False, '1 and only 1 "pai-master=true" machine is required to deploy the rest server' + quotaGB = int(self.cluster_conf['quotaGB']) + assert quotaGB >= 1 + return True, None + else: + return False, 'Unknown internal storage type {}'.format(type_) + + def run(self): + result = copy.deepcopy(self.service_conf) + if result['enable']: + machine_list = self.cluster_conf['machine-list'] + master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] + result['masterIp'] = master_ip + result['quotaGB'] = int(result['quotaGB']) + return result + + def validation_post(self, conf): + return True, None diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template new file mode 100644 index 0000000000..430fcf0b52 --- /dev/null +++ b/src/internal-storage/deploy/create.yaml.template @@ -0,0 +1,57 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: internal-storage-create-ds +spec: + selector: + matchLabels: + app: internal-storage-create + template: + metadata: + name: internal-storage-create + labels: + app: internal-storage-create + spec: + hostNetwork: false + containers: + - name: internal-storage-create + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}internal-storage-create:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + securityContext: + privileged: true + imagePullPolicy: Always + readinessProbe: + exec: + command: + - ls + - /paiInternal/storage/READY + initialDelaySeconds: 10 + periodSeconds: 3 + env: + - name: QUOTA_BYTES + value: '{{ cluster_cfg["internal-storage"]["quotaGB"] }}' + volumeMounts: + - name: internal-data-dir + mountPath: /paiInternal + volumes: + - name: internal-data-dir + hostPath: + path: {{ cluster_cfg["internal-storage"]["path"] }} + imagePullSecrets: + - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} diff --git a/src/internal-storage/deploy/delete.sh b/src/internal-storage/deploy/delete.sh new file mode 100644 index 0000000000..12bb869010 --- /dev/null +++ b/src/internal-storage/deploy/delete.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +kubectl apply --overwrite=true -f delete.yaml || exit $? + +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v internal-storage-delete || exit $? + +kubectl delete --ignore-not-found --now "daemonset/internal-storage-delete-ds" + +popd > /dev/null diff --git a/src/internal-storage/deploy/delete.yaml.template b/src/internal-storage/deploy/delete.yaml.template new file mode 100644 index 0000000000..e06f2f8ef1 --- /dev/null +++ b/src/internal-storage/deploy/delete.yaml.template @@ -0,0 +1,54 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: internal-storage-delete-ds +spec: + selector: + matchLabels: + app: internal-storage-delete + template: + metadata: + name: internal-storage-delete + labels: + app: internal-storage-delete + spec: + hostNetwork: false + containers: + - name: internal-storage-delete + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}internal-storage-delete:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + securityContext: + privileged: true + imagePullPolicy: Always + readinessProbe: + exec: + command: + - ls + - /DELETED + initialDelaySeconds: 10 + periodSeconds: 3 + volumeMounts: + - name: internal-data-dir + mountPath: /paiInternal + volumes: + - name: internal-data-dir + hostPath: + path: {{ cluster_cfg["internal-storage"]["path"] }} + imagePullSecrets: + - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} diff --git a/src/internal-storage/deploy/refresh.sh b/src/internal-storage/deploy/refresh.sh new file mode 100644 index 0000000000..3b6d2ca17d --- /dev/null +++ b/src/internal-storage/deploy/refresh.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +pushd $(dirname "$0") > /dev/null + +bash stop.sh +bash start.sh + +popd > /dev/null diff --git a/src/internal-storage/deploy/service.yaml b/src/internal-storage/deploy/service.yaml new file mode 100644 index 0000000000..283c7b0cc0 --- /dev/null +++ b/src/internal-storage/deploy/service.yaml @@ -0,0 +1,34 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +cluster-type: + - k8s + - yarn + + +template-list: + - create.yaml + - delete.yaml + +start-script: start.sh +stop-script: stop.sh +delete-script: delete.sh +refresh-script: refresh.sh + + +deploy-rules: + - in: pai-master diff --git a/src/internal-storage/deploy/start.sh b/src/internal-storage/deploy/start.sh new file mode 100644 index 0000000000..3cba5f1261 --- /dev/null +++ b/src/internal-storage/deploy/start.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +kubectl apply --overwrite=true -f create.yaml || exit $? + +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v internal-storage-create || exit $? + +popd > /dev/null diff --git a/src/internal-storage/deploy/stop.sh b/src/internal-storage/deploy/stop.sh new file mode 100644 index 0000000000..7fd5885c9e --- /dev/null +++ b/src/internal-storage/deploy/stop.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +kubectl delete --ignore-not-found --now "daemonset/internal-storage-create-ds" diff --git a/src/internal-storage/src/create.sh b/src/internal-storage/src/create.sh new file mode 100644 index 0000000000..709f0c8656 --- /dev/null +++ b/src/internal-storage/src/create.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +cd /paiInternal/ + +if [ -f storage.ext4 ]; then + echo "Skip storage.ext4 creation." +else + echo "Creating storage.ext4 of ${QUOTA_GB}G, please wait..." + fallocate -l ${QUOTA_GB}G storage.ext4 + /sbin/mkfs -t ext4 -q storage.ext4 -F +fi + +ls /paiInternal/storage/READY &> /dev/null + +if [ $? -ne 0 ]; then + if [ -d storage ]; then + umount storage + else + mkdir -p storage + fi + mount -o loop,rw,usrquota,grpquota storage.ext4 storage + touch storage/READY +fi + +sleep infinity + diff --git a/src/internal-storage/src/delete.sh b/src/internal-storage/src/delete.sh new file mode 100644 index 0000000000..632d7344c9 --- /dev/null +++ b/src/internal-storage/src/delete.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +cd /paiInternal/ + +if [ -d storage ]; then + umount storage + rm -rf storage +fi + +if [ -f storage.ext4 ]; then + rm -f storage.ext4; +fi + +touch /DELETED + +sleep infinity \ No newline at end of file From c26090dd6bb3e812927ed6473044737bcb1aa03f Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Thu, 7 Nov 2019 20:40:22 +0800 Subject: [PATCH 02/26] fix --- src/internal-storage/config/internal_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal-storage/config/internal_storage.py b/src/internal-storage/config/internal_storage.py index b64ebdd9b6..be995aff02 100644 --- a/src/internal-storage/config/internal_storage.py +++ b/src/internal-storage/config/internal_storage.py @@ -24,7 +24,7 @@ def validation_pre(self): machine_list = self.cluster_conf['machine-list'] if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: return False, '1 and only 1 "pai-master=true" machine is required to deploy the rest server' - quotaGB = int(self.cluster_conf['quotaGB']) + quotaGB = int(self.service_conf['quotaGB']) assert quotaGB >= 1 return True, None else: From 97c36eb47adc84c646174ae4e60818ed698812d5 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Thu, 7 Nov 2019 20:44:32 +0800 Subject: [PATCH 03/26] fix --- src/internal-storage/deploy/create.yaml.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template index 430fcf0b52..a765e7ba82 100644 --- a/src/internal-storage/deploy/create.yaml.template +++ b/src/internal-storage/deploy/create.yaml.template @@ -44,7 +44,7 @@ spec: initialDelaySeconds: 10 periodSeconds: 3 env: - - name: QUOTA_BYTES + - name: QUOTA_GB value: '{{ cluster_cfg["internal-storage"]["quotaGB"] }}' volumeMounts: - name: internal-data-dir From a401f485d5f6083968c96007de7430bcd0beac30 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Thu, 7 Nov 2019 21:16:55 +0800 Subject: [PATCH 04/26] fix mountPropagation --- src/internal-storage/deploy/create.yaml.template | 1 + src/internal-storage/deploy/delete.yaml.template | 1 + 2 files changed, 2 insertions(+) diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template index a765e7ba82..e66de38111 100644 --- a/src/internal-storage/deploy/create.yaml.template +++ b/src/internal-storage/deploy/create.yaml.template @@ -49,6 +49,7 @@ spec: volumeMounts: - name: internal-data-dir mountPath: /paiInternal + mountPropagation: Bidirectional volumes: - name: internal-data-dir hostPath: diff --git a/src/internal-storage/deploy/delete.yaml.template b/src/internal-storage/deploy/delete.yaml.template index e06f2f8ef1..b9f7067c0f 100644 --- a/src/internal-storage/deploy/delete.yaml.template +++ b/src/internal-storage/deploy/delete.yaml.template @@ -46,6 +46,7 @@ spec: volumeMounts: - name: internal-data-dir mountPath: /paiInternal + mountPropagation: Bidirectional volumes: - name: internal-data-dir hostPath: From d9f6faca60dcc18eeec2a181a3e1cbd0ba36e9ef Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 10:40:18 +0800 Subject: [PATCH 05/26] add postgresql --- src/postgresql/README.md | 28 +++++++++ src/postgresql/build/postgresql.dockerfile | 22 +++++++ src/postgresql/config/postgresql.py | 38 ++++++++++++ src/postgresql/config/postgresql.yaml | 4 ++ src/postgresql/deploy/delete.sh | 26 ++++++++ .../deploy/postgresql.yaml.template | 59 +++++++++++++++++++ src/postgresql/deploy/refresh.sh | 26 ++++++++ src/postgresql/deploy/service.yaml | 35 +++++++++++ src/postgresql/deploy/start.sh | 27 +++++++++ src/postgresql/deploy/stop.sh | 20 +++++++ src/postgresql/src/init_table.sql | 2 + 11 files changed, 287 insertions(+) create mode 100644 src/postgresql/README.md create mode 100644 src/postgresql/build/postgresql.dockerfile create mode 100644 src/postgresql/config/postgresql.py create mode 100644 src/postgresql/config/postgresql.yaml create mode 100644 src/postgresql/deploy/delete.sh create mode 100644 src/postgresql/deploy/postgresql.yaml.template create mode 100644 src/postgresql/deploy/refresh.sh create mode 100644 src/postgresql/deploy/service.yaml create mode 100644 src/postgresql/deploy/start.sh create mode 100644 src/postgresql/deploy/stop.sh create mode 100644 src/postgresql/src/init_table.sql diff --git a/src/postgresql/README.md b/src/postgresql/README.md new file mode 100644 index 0000000000..0e00bf5925 --- /dev/null +++ b/src/postgresql/README.md @@ -0,0 +1,28 @@ +## Postgresql + +Postgresql is an internal service for structured information persistence. By default, the service uses the folder `/paiInternal` on the master node for data storage. The real data directory for the postgresql would be `/paiInternal/pgdata`. The default service configurations are as follows: + +```yaml +postgresql: + user: root + passwd: rootpass + port: 5432 + db: openpai +``` + +One can override these settings by editing `services-configuration.yaml` . + +### Table Initialization + +If it is the first time the service launches, it will execute `src/init_table.sql` to initialize the table structure. The initialization won't be fired if the service detects old data. If you want to re-trigger it, please remove `/paiInternal/pgdata` and restart the service manually. + +### How to Access the Database + +The database connection string is written to the cluster configuration object in `config/postgresql.py`. One can use `cluster_cfg['postgresql']['connectionStr']` to retrieve it in any template files. + +Particularly, the connection string is exposed as an environmental variable in `rest-server`: + +```bash +# in rest-server pod +SQL_CONNECTION_STR=postgresql://root:rootpass@:5432/openpai +``` diff --git a/src/postgresql/build/postgresql.dockerfile b/src/postgresql/build/postgresql.dockerfile new file mode 100644 index 0000000000..372becc9e5 --- /dev/null +++ b/src/postgresql/build/postgresql.dockerfile @@ -0,0 +1,22 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +FROM postgres:12.0 + +RUN mkdir -p /docker-entrypoint-initdb.d + +COPY src/init_table.sql /docker-entrypoint-initdb.d diff --git a/src/postgresql/config/postgresql.py b/src/postgresql/config/postgresql.py new file mode 100644 index 0000000000..996ad97a0e --- /dev/null +++ b/src/postgresql/config/postgresql.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import copy +import logging + + +class Postgresql(object): + def __init__(self, cluster_conf, service_conf, default_service_conf): + self.cluster_conf = cluster_conf + self.service_conf = self.merge_service_configuration(default_service_conf, service_conf) + self.logger = logging.getLogger(__name__) + + @staticmethod + def merge_service_configuration(overwrite_srv_cfg, default_srv_cfg): + if overwrite_srv_cfg is None: + return default_srv_cfg + srv_cfg = default_srv_cfg.copy() + for k in overwrite_srv_cfg: + srv_cfg[k] = overwrite_srv_cfg[k] + return srv_cfg + + def validation_pre(self): + machine_list = self.cluster_conf['machine-list'] + if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: + return False, '1 and only 1 "pai-master=true" machine is required to deploy the postgresql service' + return True, None + + def run(self): + result = copy.deepcopy(self.service_conf) + machine_list = self.cluster_conf['machine-list'] + master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] + result['host'] = master_ip + result['connectionStr'] = 'postgresql://{}:{}@{}:{}/{}'.format( + result['user'], result['passwd'], result['host'], result['port'], result['db']) + result['configured'] = True + return result + + def validation_post(self, conf): + return True, None \ No newline at end of file diff --git a/src/postgresql/config/postgresql.yaml b/src/postgresql/config/postgresql.yaml new file mode 100644 index 0000000000..c4b865d5dd --- /dev/null +++ b/src/postgresql/config/postgresql.yaml @@ -0,0 +1,4 @@ +user: root +passwd: rootpass +db: openpai +port: 5432 \ No newline at end of file diff --git a/src/postgresql/deploy/delete.sh b/src/postgresql/deploy/delete.sh new file mode 100644 index 0000000000..1b952d11cc --- /dev/null +++ b/src/postgresql/deploy/delete.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +echo "Call stop script to stop all service first" +/bin/bash stop.sh || exit $? + + +popd > /dev/null \ No newline at end of file diff --git a/src/postgresql/deploy/postgresql.yaml.template b/src/postgresql/deploy/postgresql.yaml.template new file mode 100644 index 0000000000..65ece3f880 --- /dev/null +++ b/src/postgresql/deploy/postgresql.yaml.template @@ -0,0 +1,59 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: postgresql-ds +spec: + selector: + matchLabels: + app: postgresql + template: + metadata: + name: postgresql + labels: + app: postgresql + spec: + hostNetwork: false + containers: + - name: postgresql + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}postgresql:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + imagePullPolicy: Always + env: + - name: POSTGRES_USER + value: {{ cluster_cfg["postgresql"]["user"] }} + - name: POSTGRES_PASSWORD + value: {{ cluster_cfg["postgresql"]["passwd"] }} + - name: POSTGRES_DB + value: {{ cluster_cfg["postgresql"]["db"] }} + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + volumeMounts: + - name: internal-data-dir + mountPath: /var/lib/postgresql/data/ + mountPropagation: HostToContainer + ports: + - containerPort: 5432 + hostPort: {{ cluster_cfg["postgresql"]["port"] }} + name: postgresql-port + volumes: + - name: internal-data-dir + hostPath: + path: /paiInternal/storage + imagePullSecrets: + - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} diff --git a/src/postgresql/deploy/refresh.sh b/src/postgresql/deploy/refresh.sh new file mode 100644 index 0000000000..3b6d2ca17d --- /dev/null +++ b/src/postgresql/deploy/refresh.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +pushd $(dirname "$0") > /dev/null + +bash stop.sh +bash start.sh + +popd > /dev/null diff --git a/src/postgresql/deploy/service.yaml b/src/postgresql/deploy/service.yaml new file mode 100644 index 0000000000..437aca27fd --- /dev/null +++ b/src/postgresql/deploy/service.yaml @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +cluster-type: + - k8s + - yarn + +prerequisite: + - internal-storage + +template-list: + - postgresql.yaml + +start-script: start.sh +stop-script: stop.sh +delete-script: delete.sh +refresh-script: refresh.sh + + +deploy-rules: + - in: pai-master diff --git a/src/postgresql/deploy/start.sh b/src/postgresql/deploy/start.sh new file mode 100644 index 0000000000..04f0c52f93 --- /dev/null +++ b/src/postgresql/deploy/start.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pushd $(dirname "$0") > /dev/null + +kubectl apply --overwrite=true -f postgresql.yaml || exit $? + +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v postgresql || exit $? + +popd > /dev/null diff --git a/src/postgresql/deploy/stop.sh b/src/postgresql/deploy/stop.sh new file mode 100644 index 0000000000..70da84a7ea --- /dev/null +++ b/src/postgresql/deploy/stop.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +kubectl delete --ignore-not-found --now "daemonset/postgresql-ds" diff --git a/src/postgresql/src/init_table.sql b/src/postgresql/src/init_table.sql new file mode 100644 index 0000000000..8ec13d63d0 --- /dev/null +++ b/src/postgresql/src/init_table.sql @@ -0,0 +1,2 @@ +CREATE TABLE job(name VARCHAR(20), create_date DATE); + From 04d171491430676bbfac75f3fe5e3fb166a6e9d5 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 10:45:43 +0800 Subject: [PATCH 06/26] fix --- src/internal-storage/deploy/create.yaml.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template index e66de38111..c1fb37fc66 100644 --- a/src/internal-storage/deploy/create.yaml.template +++ b/src/internal-storage/deploy/create.yaml.template @@ -49,7 +49,7 @@ spec: volumeMounts: - name: internal-data-dir mountPath: /paiInternal - mountPropagation: Bidirectional + mountPropagation: Bidirectional volumes: - name: internal-data-dir hostPath: From fd0c52c4f96d9adc2182928da02d459746741550 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 10:46:53 +0800 Subject: [PATCH 07/26] fix --- src/internal-storage/deploy/delete.yaml.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal-storage/deploy/delete.yaml.template b/src/internal-storage/deploy/delete.yaml.template index b9f7067c0f..b1df9d8d92 100644 --- a/src/internal-storage/deploy/delete.yaml.template +++ b/src/internal-storage/deploy/delete.yaml.template @@ -46,7 +46,7 @@ spec: volumeMounts: - name: internal-data-dir mountPath: /paiInternal - mountPropagation: Bidirectional + mountPropagation: Bidirectional volumes: - name: internal-data-dir hostPath: From c308b357941071b75ce848c79d38d6c376c500a8 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 10:53:19 +0800 Subject: [PATCH 08/26] fix node affinity for delete.yaml --- src/internal-storage/deploy/delete.yaml.template | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/internal-storage/deploy/delete.yaml.template b/src/internal-storage/deploy/delete.yaml.template index b1df9d8d92..c1789bdada 100644 --- a/src/internal-storage/deploy/delete.yaml.template +++ b/src/internal-storage/deploy/delete.yaml.template @@ -29,6 +29,15 @@ spec: labels: app: internal-storage-delete spec: + affinity: # deploy-rules doesn't take effect during deleting + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: pai-master + operator: In + values: + - 'true' hostNetwork: false containers: - name: internal-storage-delete From e6ffb717b56be5b7388a278685a6f521f2718adf Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 15:03:23 +0800 Subject: [PATCH 09/26] fix enable for service --- .../config/internal_storage.py | 21 +++++++++++-------- src/internal-storage/deploy/start.sh | 4 ++++ src/postgresql/config/postgresql.py | 21 ++++++++++--------- src/postgresql/config/postgresql.yaml | 1 + src/postgresql/deploy/start.sh | 4 ++++ 5 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/internal-storage/config/internal_storage.py b/src/internal-storage/config/internal_storage.py index be995aff02..328dac8584 100644 --- a/src/internal-storage/config/internal_storage.py +++ b/src/internal-storage/config/internal_storage.py @@ -19,16 +19,19 @@ def merge_service_configuration(overwrite_srv_cfg, default_srv_cfg): return srv_cfg def validation_pre(self): - type_ = self.service_conf.get('type', '') - if type_ == 'hostPath': - machine_list = self.cluster_conf['machine-list'] - if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: - return False, '1 and only 1 "pai-master=true" machine is required to deploy the rest server' - quotaGB = int(self.service_conf['quotaGB']) - assert quotaGB >= 1 - return True, None + if self.service_conf['enable']: + type_ = self.service_conf.get('type', '') + if type_ == 'hostPath': + machine_list = self.cluster_conf['machine-list'] + if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: + return False, '1 and only 1 "pai-master=true" machine is required to deploy the rest server' + quotaGB = int(self.service_conf['quotaGB']) + assert quotaGB >= 1 + return True, None + else: + return False, 'Unknown internal storage type {}'.format(type_) else: - return False, 'Unknown internal storage type {}'.format(type_) + return True, None def run(self): result = copy.deepcopy(self.service_conf) diff --git a/src/internal-storage/deploy/start.sh b/src/internal-storage/deploy/start.sh index 3cba5f1261..98f355e065 100644 --- a/src/internal-storage/deploy/start.sh +++ b/src/internal-storage/deploy/start.sh @@ -19,9 +19,13 @@ pushd $(dirname "$0") > /dev/null +{% if cluster_cfg['internal-storage']['enable'] %} + kubectl apply --overwrite=true -f create.yaml || exit $? # Wait until the service is ready. PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v internal-storage-create || exit $? +{% endif %} + popd > /dev/null diff --git a/src/postgresql/config/postgresql.py b/src/postgresql/config/postgresql.py index 996ad97a0e..11bf1c8fca 100644 --- a/src/postgresql/config/postgresql.py +++ b/src/postgresql/config/postgresql.py @@ -19,20 +19,21 @@ def merge_service_configuration(overwrite_srv_cfg, default_srv_cfg): return srv_cfg def validation_pre(self): - machine_list = self.cluster_conf['machine-list'] - if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: - return False, '1 and only 1 "pai-master=true" machine is required to deploy the postgresql service' + if self.service_conf['enable']: + machine_list = self.cluster_conf['machine-list'] + if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: + return False, '1 and only 1 "pai-master=true" machine is required to deploy the postgresql service' return True, None def run(self): result = copy.deepcopy(self.service_conf) - machine_list = self.cluster_conf['machine-list'] - master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] - result['host'] = master_ip - result['connectionStr'] = 'postgresql://{}:{}@{}:{}/{}'.format( - result['user'], result['passwd'], result['host'], result['port'], result['db']) - result['configured'] = True + if self.service_conf['enable']: + machine_list = self.cluster_conf['machine-list'] + master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] + result['host'] = master_ip + result['connectionStr'] = 'postgresql://{}:{}@{}:{}/{}'.format( + result['user'], result['passwd'], result['host'], result['port'], result['db']) return result def validation_post(self, conf): - return True, None \ No newline at end of file + return True, None diff --git a/src/postgresql/config/postgresql.yaml b/src/postgresql/config/postgresql.yaml index c4b865d5dd..4c2866fc5c 100644 --- a/src/postgresql/config/postgresql.yaml +++ b/src/postgresql/config/postgresql.yaml @@ -1,3 +1,4 @@ +enable: true user: root passwd: rootpass db: openpai diff --git a/src/postgresql/deploy/start.sh b/src/postgresql/deploy/start.sh index 04f0c52f93..90b3833616 100644 --- a/src/postgresql/deploy/start.sh +++ b/src/postgresql/deploy/start.sh @@ -19,9 +19,13 @@ pushd $(dirname "$0") > /dev/null +{% if cluster_cfg['postgresql']['enable'] %} + kubectl apply --overwrite=true -f postgresql.yaml || exit $? # Wait until the service is ready. PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v postgresql || exit $? +{% endif %} + popd > /dev/null From 88f7561b9ac5203f66690169231aacf7779c047c Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 15:12:42 +0800 Subject: [PATCH 10/26] fix --- src/internal-storage/deploy/service.yaml | 1 + src/internal-storage/deploy/{start.sh => start.sh.template} | 0 src/postgresql/deploy/service.yaml | 1 + src/postgresql/deploy/{start.sh => start.sh.template} | 0 4 files changed, 2 insertions(+) rename src/internal-storage/deploy/{start.sh => start.sh.template} (100%) rename src/postgresql/deploy/{start.sh => start.sh.template} (100%) diff --git a/src/internal-storage/deploy/service.yaml b/src/internal-storage/deploy/service.yaml index 283c7b0cc0..a7c50d3249 100644 --- a/src/internal-storage/deploy/service.yaml +++ b/src/internal-storage/deploy/service.yaml @@ -23,6 +23,7 @@ cluster-type: template-list: - create.yaml - delete.yaml + - start.sh start-script: start.sh stop-script: stop.sh diff --git a/src/internal-storage/deploy/start.sh b/src/internal-storage/deploy/start.sh.template similarity index 100% rename from src/internal-storage/deploy/start.sh rename to src/internal-storage/deploy/start.sh.template diff --git a/src/postgresql/deploy/service.yaml b/src/postgresql/deploy/service.yaml index 437aca27fd..11bf78364c 100644 --- a/src/postgresql/deploy/service.yaml +++ b/src/postgresql/deploy/service.yaml @@ -24,6 +24,7 @@ prerequisite: template-list: - postgresql.yaml + - start.sh start-script: start.sh stop-script: stop.sh diff --git a/src/postgresql/deploy/start.sh b/src/postgresql/deploy/start.sh.template similarity index 100% rename from src/postgresql/deploy/start.sh rename to src/postgresql/deploy/start.sh.template From 9438a2bfc7e159c5464949843da5c17517b1b989 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 16:30:00 +0800 Subject: [PATCH 11/26] add docs --- src/internal-storage/README.md | 58 ++++++++++++++++++++++++++++++++++ src/postgresql/README.md | 3 +- 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 src/internal-storage/README.md diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md new file mode 100644 index 0000000000..58f616f18b --- /dev/null +++ b/src/internal-storage/README.md @@ -0,0 +1,58 @@ +## PAI Internal Storage + +Internal Storage is designed to make database and other stateful applications available in PAI. +It leverages `loop device` in Linux to provide a storage with strictly limited quota. The default service configuration for internal storage is: + +```yaml +internal-storage: + enable: true + type: hostPath + path: /paiInternal + quotaGB: 10 +``` + +User can override these settings in `services-configuration.yaml`. + +## Set up Internal Storage + +For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/paiInternal`) on the `pai-master` node first, then create a loop device in the folder, which is a filesystem inside a file. Please refer to the following commands for details. + +```bash +fallocate -l ${QUOTA_GB}G storage.ext4 +/sbin/mkfs -t ext4 -q storage.ext4 -F +mkdir -p storage +mount -o loop,rw,usrquota,grpquota storage.ext4 storage +``` + +The advantage of using a loop device is that it can limit the disk quota for every user strictly. + +Since the service uses a `mount` inside a container, `mountPropagation` is set to `Bidirectional` to ensure the `mount` behavior propagates to the host. + + +## Use the Internal Storage + +In fact, the internal storage is a disk path on the `pai-master` node, thus only pod on the same node can reference it by using `hostPath` in kubernetes, e.g. + +```yaml +apiVersion: v1 +kind: Pod +... +spec: + containers: + - image: + volumeMounts: + - name: internal-data-dir + mountPath: /data + mountPropagation: HostToContainer + volumes: + - name: internal-data-dir + hostPath: + path: /paiInternal/storage +``` + +Please note that `mountPropagation` should be set to `HostToContainer`, to ensure the `mount` propagates between hosts and pods. + +## References + - [Loop Device](http://man7.org/linux/man-pages/man4/loop.4.html) + - [Linux Quota Tutorial](http://souptonuts.sourceforge.net/quota_tutorial.html) + - [Mount Propagation](https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation) \ No newline at end of file diff --git a/src/postgresql/README.md b/src/postgresql/README.md index 0e00bf5925..9bd7f691ba 100644 --- a/src/postgresql/README.md +++ b/src/postgresql/README.md @@ -1,9 +1,10 @@ ## Postgresql -Postgresql is an internal service for structured information persistence. By default, the service uses the folder `/paiInternal` on the master node for data storage. The real data directory for the postgresql would be `/paiInternal/pgdata`. The default service configurations are as follows: +Postgresql is an internal service for structured information persistence. By default, the service uses the folder `/paiInternal/storage` on the master node for data storage. The real data directory for the postgresql would be `/paiInternal/storage/pgdata`. The default service configurations are as follows: ```yaml postgresql: + enable: true user: root passwd: rootpass port: 5432 From a0f58d88d2ded04a5fa96554d6857a1c163dd74b Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Fri, 8 Nov 2019 16:48:46 +0800 Subject: [PATCH 12/26] Update README.md --- src/internal-storage/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 58f616f18b..b6e1c4b069 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -1,7 +1,6 @@ ## PAI Internal Storage -Internal Storage is designed to make database and other stateful applications available in PAI. -It leverages `loop device` in Linux to provide a storage with strictly limited quota. The default service configuration for internal storage is: +Internal Storage is designed to create a limited size storage in PAI. The storage can be used by database service or other stateful application internally. It leverages [`loop device`](http://man7.org/linux/man-pages/man4/loop.4.html) in Linux to provide a storage with strictly limited quota. The default service configuration for internal storage is: ```yaml internal-storage: @@ -15,7 +14,7 @@ User can override these settings in `services-configuration.yaml`. ## Set up Internal Storage -For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/paiInternal`) on the `pai-master` node first, then create a loop device in the folder, which is a filesystem inside a file. Please refer to the following commands for details. +For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/paiInternal`) on the `pai-master` node first, then create a loop device in the folder. Please refer to the following commands for details. ```bash fallocate -l ${QUOTA_GB}G storage.ext4 @@ -55,4 +54,4 @@ Please note that `mountPropagation` should be set to `HostToContainer`, to ensur ## References - [Loop Device](http://man7.org/linux/man-pages/man4/loop.4.html) - [Linux Quota Tutorial](http://souptonuts.sourceforge.net/quota_tutorial.html) - - [Mount Propagation](https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation) \ No newline at end of file + - [Mount Propagation](https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation) From 21b4af4b631bc0908ee06b4f82709036ffc0323d Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Fri, 8 Nov 2019 18:08:06 +0800 Subject: [PATCH 13/26] add fault tolerance for internal storage --- src/internal-storage/README.md | 29 +++++++++++++++++++++++++++-- src/internal-storage/src/create.sh | 18 +++++++++++++++--- src/internal-storage/src/delete.sh | 6 +++--- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 58f616f18b..a5f72a97e3 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -43,14 +43,39 @@ spec: volumeMounts: - name: internal-data-dir mountPath: /data - mountPropagation: HostToContainer + mountPropagation: "None" volumes: - name: internal-data-dir hostPath: path: /paiInternal/storage ``` -Please note that `mountPropagation` should be set to `HostToContainer`, to ensure the `mount` propagates between hosts and pods. +Please note that `mountPropagation` should be set to `None`, to ensure that any unexpected unmount of the data folder will not be propagates to the pod. + +## Assumption of Failure + +1. Failure during setup + +This service uses the readiness probe in k8s to ensure the corresponding loop device is created successfully. Possible errors during setup: + + - Allocation Failure: The storage uses `fallocate` to reserve quota during setup. If the remaining disk size doesn't meet the need, allocation failure happens. + - Mount Failure: Since the `mount` command needs some privileges of the host, it may also fail during setup. + +If any of the above failures happen, the service will never be ready (because of the readiness probe). See [create.sh](src/create.sh) and [create.yaml.template](deploy/create.yaml.template) for details. + +2. Failure after setup + +Possibility is that users may delete our storage file `storage.ext4` or `storage` folder unexpectedly. The service checks them every 60 seconds: + + - If the `storage` folder is unmounted or deleted, the service will restart to create and mount it again in 60 seconds. Data won't be lost. Since pods are using the internal service with `mountPropagation=None`, nothing will happen to them. + - If the `storage.ext4` file is deleted, the service will restart to create a new `storage.ext4` in 60 seconds. However, in such case, user data will be lost. We cannot prevent it since users can always remove files on their disks. + +3. Failure during deletion + +During service deletion, if we cannot unmount or delete the data, the deletion process won't be successful. There is also a readiness probe for these purposes. See [delete.yaml.template](deploy/delete.yaml.template) for details. + + + ## References - [Loop Device](http://man7.org/linux/man-pages/man4/loop.4.html) diff --git a/src/internal-storage/src/create.sh b/src/internal-storage/src/create.sh index 709f0c8656..029e72c293 100644 --- a/src/internal-storage/src/create.sh +++ b/src/internal-storage/src/create.sh @@ -22,7 +22,7 @@ if [ -f storage.ext4 ]; then echo "Skip storage.ext4 creation." else echo "Creating storage.ext4 of ${QUOTA_GB}G, please wait..." - fallocate -l ${QUOTA_GB}G storage.ext4 + fallocate -l ${QUOTA_GB}G storage.ext4 || { echo "allocation failed!"; sleep infinity; } /sbin/mkfs -t ext4 -q storage.ext4 -F fi @@ -34,9 +34,21 @@ if [ $? -ne 0 ]; then else mkdir -p storage fi - mount -o loop,rw,usrquota,grpquota storage.ext4 storage + mount -o loop,rw,usrquota,grpquota storage.ext4 storage || { echo "mount failed!"; sleep infinity; } touch storage/READY fi -sleep infinity +while true; do + ls /paiInternal/storage/READY &> /dev/null + if [ $? -ne 0 ]; then + echo "Cannot find storage/READY! Abort." + exit 1 + fi + if [ ! -f storage.ext4 ]; then + echo "Cannot find storage.ext4! Abort." + exit 1 + fi + sleep 1m +done + diff --git a/src/internal-storage/src/delete.sh b/src/internal-storage/src/delete.sh index 632d7344c9..a7cb7251b3 100644 --- a/src/internal-storage/src/delete.sh +++ b/src/internal-storage/src/delete.sh @@ -20,12 +20,12 @@ cd /paiInternal/ if [ -d storage ]; then - umount storage - rm -rf storage + umount storage || { echo "umount failed!"; sleep infinity; } + rm -rf storage || { echo "rm storage failed!"; sleep infinity; } fi if [ -f storage.ext4 ]; then - rm -f storage.ext4; + rm -f storage.ext4 || { echo "rm storage.ext4 failed!"; sleep infinity; } fi touch /DELETED From 502e72c717c5f398216a1c52b5bbcb9d909d4a27 Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Fri, 8 Nov 2019 18:11:16 +0800 Subject: [PATCH 14/26] minor docs change --- src/internal-storage/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 3625708643..555cc89ff9 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -53,7 +53,7 @@ Please note that `mountPropagation` should be set to `None`, to ensure that any ## Assumption of Failure -1. Failure during setup +### 1. Failure during setup This service uses the readiness probe in k8s to ensure the corresponding loop device is created successfully. Possible errors during setup: @@ -62,14 +62,14 @@ This service uses the readiness probe in k8s to ensure the corresponding loop de If any of the above failures happen, the service will never be ready (because of the readiness probe). See [create.sh](src/create.sh) and [create.yaml.template](deploy/create.yaml.template) for details. -2. Failure after setup +### 2. Failure after setup Possibility is that users may delete our storage file `storage.ext4` or `storage` folder unexpectedly. The service checks them every 60 seconds: - If the `storage` folder is unmounted or deleted, the service will restart to create and mount it again in 60 seconds. Data won't be lost. Since pods are using the internal service with `mountPropagation=None`, nothing will happen to them. - If the `storage.ext4` file is deleted, the service will restart to create a new `storage.ext4` in 60 seconds. However, in such case, user data will be lost. We cannot prevent it since users can always remove files on their disks. -3. Failure during deletion +### 3. Failure during deletion During service deletion, if we cannot unmount or delete the data, the deletion process won't be successful. There is also a readiness probe for these purposes. See [delete.yaml.template](deploy/delete.yaml.template) for details. From 75d99ca24afd7d4a77c40e9aa2fa0cc0efb76866 Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Fri, 8 Nov 2019 18:15:54 +0800 Subject: [PATCH 15/26] Update README.md --- src/internal-storage/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 555cc89ff9..5b5b319b9c 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -55,18 +55,18 @@ Please note that `mountPropagation` should be set to `None`, to ensure that any ### 1. Failure during setup -This service uses the readiness probe in k8s to ensure the corresponding loop device is created successfully. Possible errors during setup: +This service uses the readiness probe in k8s to ensure the corresponding loop device is created successfully. Possible errors during setup are as follows: - Allocation Failure: The storage uses `fallocate` to reserve quota during setup. If the remaining disk size doesn't meet the need, allocation failure happens. - - Mount Failure: Since the `mount` command needs some privileges of the host, it may also fail during setup. + - Mount Failure: Since the `mount` command needs some privileges from the host to work, it may also fail during setup. -If any of the above failures happen, the service will never be ready (because of the readiness probe). See [create.sh](src/create.sh) and [create.yaml.template](deploy/create.yaml.template) for details. +If any of the above failures happens, the service will never be ready (because of the readiness probe). See [create.sh](src/create.sh) and [create.yaml.template](deploy/create.yaml.template) for details. ### 2. Failure after setup Possibility is that users may delete our storage file `storage.ext4` or `storage` folder unexpectedly. The service checks them every 60 seconds: - - If the `storage` folder is unmounted or deleted, the service will restart to create and mount it again in 60 seconds. Data won't be lost. Since pods are using the internal service with `mountPropagation=None`, nothing will happen to them. + - If the `storage` folder is unmounted or deleted, the service will restart to create and mount it again in 60 seconds. Data won't be lost. Since pods are using the internal storage with `mountPropagation=None`, they won't notice any change. - If the `storage.ext4` file is deleted, the service will restart to create a new `storage.ext4` in 60 seconds. However, in such case, user data will be lost. We cannot prevent it since users can always remove files on their disks. ### 3. Failure during deletion From fd689079fb84680ecdf3cbc4dfbd557dc4bb0397 Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Fri, 8 Nov 2019 19:29:35 +0800 Subject: [PATCH 16/26] Update README.md --- src/internal-storage/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 5b5b319b9c..df8aa3760c 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -64,11 +64,14 @@ If any of the above failures happens, the service will never be ready (because o ### 2. Failure after setup +Please note that this storage doesn't have any replica mechanism. If the `pai-master` node crashes with a disk failure or other hardware issues, users will not be able to restore the data. In fact, all the data are stored in a single file `storage.ext4` on the `pai-master` node. + Possibility is that users may delete our storage file `storage.ext4` or `storage` folder unexpectedly. The service checks them every 60 seconds: - If the `storage` folder is unmounted or deleted, the service will restart to create and mount it again in 60 seconds. Data won't be lost. Since pods are using the internal storage with `mountPropagation=None`, they won't notice any change. - If the `storage.ext4` file is deleted, the service will restart to create a new `storage.ext4` in 60 seconds. However, in such case, user data will be lost. We cannot prevent it since users can always remove files on their disks. + ### 3. Failure during deletion During service deletion, if we cannot unmount or delete the data, the deletion process won't be successful. There is also a readiness probe for these purposes. See [delete.yaml.template](deploy/delete.yaml.template) for details. From d308cf7afcd097958774506ac965bba5aded7f0b Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Mon, 11 Nov 2019 13:52:43 +0800 Subject: [PATCH 17/26] link db with storage --- src/internal-storage/README.md | 2 +- .../config/internal-storage.yaml | 4 ++-- .../config/internal_storage.py | 8 ++++---- .../deploy/create.yaml.template | 4 ++-- .../deploy/delete.yaml.template | 2 +- src/internal-storage/src/create.sh | 6 +++--- src/internal-storage/src/delete.sh | 2 +- src/postgresql/config/postgresql.py | 20 ++++++++++++++++++- .../deploy/postgresql.yaml.template | 2 +- 9 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 3625708643..08148f8fab 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -6,7 +6,7 @@ Internal Storage is designed to create a limited size storage in PAI. The storag internal-storage: enable: true type: hostPath - path: /paiInternal + root-path: /paiInternal quotaGB: 10 ``` diff --git a/src/internal-storage/config/internal-storage.yaml b/src/internal-storage/config/internal-storage.yaml index a647018409..761f2bb500 100644 --- a/src/internal-storage/config/internal-storage.yaml +++ b/src/internal-storage/config/internal-storage.yaml @@ -1,4 +1,4 @@ enable: true type: hostPath -path: /paiInternal -quotaGB: 10 \ No newline at end of file +root-path: /paiInternal +quota-gb: 10 \ No newline at end of file diff --git a/src/internal-storage/config/internal_storage.py b/src/internal-storage/config/internal_storage.py index 328dac8584..d6d84f742c 100644 --- a/src/internal-storage/config/internal_storage.py +++ b/src/internal-storage/config/internal_storage.py @@ -6,7 +6,7 @@ class InternalStorage(object): def __init__(self, cluster_conf, service_conf, default_service_conf): self.cluster_conf = cluster_conf - self.service_conf = self.merge_service_configuration(default_service_conf, service_conf) + self.service_conf = self.merge_service_configuration(service_conf, default_service_conf) self.logger = logging.getLogger(__name__) @staticmethod @@ -25,7 +25,7 @@ def validation_pre(self): machine_list = self.cluster_conf['machine-list'] if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: return False, '1 and only 1 "pai-master=true" machine is required to deploy the rest server' - quotaGB = int(self.service_conf['quotaGB']) + quotaGB = int(self.service_conf['quota-gb']) assert quotaGB >= 1 return True, None else: @@ -38,8 +38,8 @@ def run(self): if result['enable']: machine_list = self.cluster_conf['machine-list'] master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] - result['masterIp'] = master_ip - result['quotaGB'] = int(result['quotaGB']) + result['master-ip'] = master_ip + result['quota-gb'] = int(result['quota-gb']) return result def validation_post(self, conf): diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template index c1fb37fc66..5501f3bc7a 100644 --- a/src/internal-storage/deploy/create.yaml.template +++ b/src/internal-storage/deploy/create.yaml.template @@ -45,7 +45,7 @@ spec: periodSeconds: 3 env: - name: QUOTA_GB - value: '{{ cluster_cfg["internal-storage"]["quotaGB"] }}' + value: '{{ cluster_cfg["internal-storage"]["quota-gb"] }}' volumeMounts: - name: internal-data-dir mountPath: /paiInternal @@ -53,6 +53,6 @@ spec: volumes: - name: internal-data-dir hostPath: - path: {{ cluster_cfg["internal-storage"]["path"] }} + path: {{ cluster_cfg["internal-storage"]["root-path"] }} imagePullSecrets: - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} diff --git a/src/internal-storage/deploy/delete.yaml.template b/src/internal-storage/deploy/delete.yaml.template index c1789bdada..1bba474084 100644 --- a/src/internal-storage/deploy/delete.yaml.template +++ b/src/internal-storage/deploy/delete.yaml.template @@ -59,6 +59,6 @@ spec: volumes: - name: internal-data-dir hostPath: - path: {{ cluster_cfg["internal-storage"]["path"] }} + path: {{ cluster_cfg["internal-storage"]["root-path"] }} imagePullSecrets: - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} diff --git a/src/internal-storage/src/create.sh b/src/internal-storage/src/create.sh index 029e72c293..0ad9ac6721 100644 --- a/src/internal-storage/src/create.sh +++ b/src/internal-storage/src/create.sh @@ -16,7 +16,7 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -cd /paiInternal/ +cd /paiInternal if [ -f storage.ext4 ]; then echo "Skip storage.ext4 creation." @@ -26,7 +26,7 @@ else /sbin/mkfs -t ext4 -q storage.ext4 -F fi -ls /paiInternal/storage/READY &> /dev/null +ls READY &> /dev/null if [ $? -ne 0 ]; then if [ -d storage ]; then @@ -39,7 +39,7 @@ if [ $? -ne 0 ]; then fi while true; do - ls /paiInternal/storage/READY &> /dev/null + ls READY &> /dev/null if [ $? -ne 0 ]; then echo "Cannot find storage/READY! Abort." exit 1 diff --git a/src/internal-storage/src/delete.sh b/src/internal-storage/src/delete.sh index a7cb7251b3..0e13c0c3ad 100644 --- a/src/internal-storage/src/delete.sh +++ b/src/internal-storage/src/delete.sh @@ -17,7 +17,7 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -cd /paiInternal/ +cd /paiInternal if [ -d storage ]; then umount storage || { echo "umount failed!"; sleep infinity; } diff --git a/src/postgresql/config/postgresql.py b/src/postgresql/config/postgresql.py index 11bf1c8fca..ae155f636f 100644 --- a/src/postgresql/config/postgresql.py +++ b/src/postgresql/config/postgresql.py @@ -1,4 +1,22 @@ #!/usr/bin/env python +# +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + import copy import logging @@ -6,7 +24,7 @@ class Postgresql(object): def __init__(self, cluster_conf, service_conf, default_service_conf): self.cluster_conf = cluster_conf - self.service_conf = self.merge_service_configuration(default_service_conf, service_conf) + self.service_conf = self.merge_service_configuration(service_conf, default_service_conf) self.logger = logging.getLogger(__name__) @staticmethod diff --git a/src/postgresql/deploy/postgresql.yaml.template b/src/postgresql/deploy/postgresql.yaml.template index 65ece3f880..7f8810a09f 100644 --- a/src/postgresql/deploy/postgresql.yaml.template +++ b/src/postgresql/deploy/postgresql.yaml.template @@ -54,6 +54,6 @@ spec: volumes: - name: internal-data-dir hostPath: - path: /paiInternal/storage + path: '{{ cluster_cfg["internal-storage"]["root-path"] }}/storage' imagePullSecrets: - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} From c5704f2ac7e0a6c7a62cb798fc46ad6f5d92ab89 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Mon, 11 Nov 2019 13:55:50 +0800 Subject: [PATCH 18/26] fix doc --- src/internal-storage/README.md | 6 +++--- src/postgresql/deploy/postgresql.yaml.template | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index a3242a7c23..c8f1ef324d 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -7,14 +7,14 @@ internal-storage: enable: true type: hostPath root-path: /paiInternal - quotaGB: 10 + quota-gB: 10 ``` User can override these settings in `services-configuration.yaml`. ## Set up Internal Storage -For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/paiInternal`) on the `pai-master` node first, then create a loop device in the folder. Please refer to the following commands for details. +For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/paiInternal`) on the `pai-master` node first, then create a loop device in the folder. Please refer to the following commands for details. ```bash fallocate -l ${QUOTA_GB}G storage.ext4 @@ -46,7 +46,7 @@ spec: volumes: - name: internal-data-dir hostPath: - path: /paiInternal/storage + path: '{{ cluster_cfg["internal-storage"]["root-path"] }}/storage' ``` Please note that `mountPropagation` should be set to `None`, to ensure that any unexpected unmount of the data folder will not be propagates to the pod. diff --git a/src/postgresql/deploy/postgresql.yaml.template b/src/postgresql/deploy/postgresql.yaml.template index 7f8810a09f..57a3edf230 100644 --- a/src/postgresql/deploy/postgresql.yaml.template +++ b/src/postgresql/deploy/postgresql.yaml.template @@ -46,7 +46,7 @@ spec: volumeMounts: - name: internal-data-dir mountPath: /var/lib/postgresql/data/ - mountPropagation: HostToContainer + mountPropagation: "None" ports: - containerPort: 5432 hostPort: {{ cluster_cfg["postgresql"]["port"] }} From 2c29df475046281900b3fbeff192bdbeb68acc70 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Mon, 11 Nov 2019 14:02:42 +0800 Subject: [PATCH 19/26] fix doc --- src/internal-storage/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index c8f1ef324d..016e96a1b6 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -7,7 +7,7 @@ internal-storage: enable: true type: hostPath root-path: /paiInternal - quota-gB: 10 + quota-gb: 10 ``` User can override these settings in `services-configuration.yaml`. From 9610e4165017b6f7528be2711c77ee14cdbf6327 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Mon, 11 Nov 2019 15:45:27 +0800 Subject: [PATCH 20/26] fix delete --- src/internal-storage/deploy/delete.sh | 2 ++ src/internal-storage/src/delete.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/internal-storage/deploy/delete.sh b/src/internal-storage/deploy/delete.sh index 12bb869010..92e4719258 100644 --- a/src/internal-storage/deploy/delete.sh +++ b/src/internal-storage/deploy/delete.sh @@ -19,6 +19,8 @@ pushd $(dirname "$0") > /dev/null +kubectl delete --ignore-not-found --now "daemonset/internal-storage-create-ds" + kubectl apply --overwrite=true -f delete.yaml || exit $? # Wait until the service is ready. diff --git a/src/internal-storage/src/delete.sh b/src/internal-storage/src/delete.sh index 0e13c0c3ad..9b9199c8ed 100644 --- a/src/internal-storage/src/delete.sh +++ b/src/internal-storage/src/delete.sh @@ -20,7 +20,7 @@ cd /paiInternal if [ -d storage ]; then - umount storage || { echo "umount failed!"; sleep infinity; } + umount storage rm -rf storage || { echo "rm storage failed!"; sleep infinity; } fi From d594c97a64ea8625f377231a4364cb4cba040b33 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Mon, 11 Nov 2019 16:51:02 +0800 Subject: [PATCH 21/26] fix path --- src/internal-storage/config/internal-storage.yaml | 2 +- src/internal-storage/src/delete.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/internal-storage/config/internal-storage.yaml b/src/internal-storage/config/internal-storage.yaml index 761f2bb500..8e4715ec19 100644 --- a/src/internal-storage/config/internal-storage.yaml +++ b/src/internal-storage/config/internal-storage.yaml @@ -1,4 +1,4 @@ enable: true type: hostPath -root-path: /paiInternal +root-path: /mnt/paiInternal quota-gb: 10 \ No newline at end of file diff --git a/src/internal-storage/src/delete.sh b/src/internal-storage/src/delete.sh index 9b9199c8ed..84597e3a5d 100644 --- a/src/internal-storage/src/delete.sh +++ b/src/internal-storage/src/delete.sh @@ -21,11 +21,11 @@ cd /paiInternal if [ -d storage ]; then umount storage - rm -rf storage || { echo "rm storage failed!"; sleep infinity; } + rm -rf storage fi if [ -f storage.ext4 ]; then - rm -f storage.ext4 || { echo "rm storage.ext4 failed!"; sleep infinity; } + rm -f storage.ext4 fi touch /DELETED From 1c7fe79d398d6f6b17040a0529a6862de08c795b Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Mon, 11 Nov 2019 17:40:51 +0800 Subject: [PATCH 22/26] fix --- src/internal-storage/src/create.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/internal-storage/src/create.sh b/src/internal-storage/src/create.sh index 0ad9ac6721..7e348dd3c2 100644 --- a/src/internal-storage/src/create.sh +++ b/src/internal-storage/src/create.sh @@ -38,6 +38,8 @@ if [ $? -ne 0 ]; then touch storage/READY fi +sleep 30m + while true; do ls READY &> /dev/null if [ $? -ne 0 ]; then From 6689509f7429d7e5979e5a14c9fc0283c64caceb Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Tue, 12 Nov 2019 14:09:37 +0800 Subject: [PATCH 23/26] fix: allow multiple master; postgresql port; add rest-server env --- .../config/internal_storage.py | 4 ++-- .../deploy/create.yaml.template | 9 +++++++++ .../deploy/{delete.sh => delete.sh.template} | 4 ++++ .../deploy/delete.yaml.template | 6 +++--- src/internal-storage/deploy/service.yaml | 5 +---- src/postgresql/config/postgresql.py | 6 +++--- .../deploy/postgresql.yaml.template | 20 ++++++++++++++----- src/postgresql/deploy/service.yaml | 4 ---- src/postgresql/deploy/start.sh.template | 2 +- .../deploy/rest-server.yaml.template | 4 ++++ src/rest-server/deploy/service.yaml | 1 + 11 files changed, 43 insertions(+), 22 deletions(-) rename src/internal-storage/deploy/{delete.sh => delete.sh.template} (96%) diff --git a/src/internal-storage/config/internal_storage.py b/src/internal-storage/config/internal_storage.py index d6d84f742c..c92edc46d9 100644 --- a/src/internal-storage/config/internal_storage.py +++ b/src/internal-storage/config/internal_storage.py @@ -23,8 +23,8 @@ def validation_pre(self): type_ = self.service_conf.get('type', '') if type_ == 'hostPath': machine_list = self.cluster_conf['machine-list'] - if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: - return False, '1 and only 1 "pai-master=true" machine is required to deploy the rest server' + if len([host for host in machine_list if host.get('pai-master') == 'true']) < 1: + return False, '"pai-master=true" machine is required to deploy the internal storage' quotaGB = int(self.service_conf['quota-gb']) assert quotaGB >= 1 return True, None diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template index 5501f3bc7a..f53c2dfd3f 100644 --- a/src/internal-storage/deploy/create.yaml.template +++ b/src/internal-storage/deploy/create.yaml.template @@ -29,6 +29,15 @@ spec: labels: app: internal-storage-create spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - {{ cluster_cfg["internal-storage"]["master-ip"] }} hostNetwork: false containers: - name: internal-storage-create diff --git a/src/internal-storage/deploy/delete.sh b/src/internal-storage/deploy/delete.sh.template similarity index 96% rename from src/internal-storage/deploy/delete.sh rename to src/internal-storage/deploy/delete.sh.template index 92e4719258..e4f44aae0b 100644 --- a/src/internal-storage/deploy/delete.sh +++ b/src/internal-storage/deploy/delete.sh.template @@ -21,6 +21,8 @@ pushd $(dirname "$0") > /dev/null kubectl delete --ignore-not-found --now "daemonset/internal-storage-create-ds" +{% if cluster_cfg['internal-storage']['enable'] %} + kubectl apply --overwrite=true -f delete.yaml || exit $? # Wait until the service is ready. @@ -28,4 +30,6 @@ PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ kubectl delete --ignore-not-found --now "daemonset/internal-storage-delete-ds" +{% endif %} + popd > /dev/null diff --git a/src/internal-storage/deploy/delete.yaml.template b/src/internal-storage/deploy/delete.yaml.template index 1bba474084..d053183ace 100644 --- a/src/internal-storage/deploy/delete.yaml.template +++ b/src/internal-storage/deploy/delete.yaml.template @@ -29,15 +29,15 @@ spec: labels: app: internal-storage-delete spec: - affinity: # deploy-rules doesn't take effect during deleting + affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: pai-master + - key: kubernetes.io/hostname operator: In values: - - 'true' + - {{ cluster_cfg["internal-storage"]["master-ip"] }} hostNetwork: false containers: - name: internal-storage-delete diff --git a/src/internal-storage/deploy/service.yaml b/src/internal-storage/deploy/service.yaml index a7c50d3249..36dd9aaeda 100644 --- a/src/internal-storage/deploy/service.yaml +++ b/src/internal-storage/deploy/service.yaml @@ -24,12 +24,9 @@ template-list: - create.yaml - delete.yaml - start.sh + - delete.sh start-script: start.sh stop-script: stop.sh delete-script: delete.sh refresh-script: refresh.sh - - -deploy-rules: - - in: pai-master diff --git a/src/postgresql/config/postgresql.py b/src/postgresql/config/postgresql.py index ae155f636f..6033a44462 100644 --- a/src/postgresql/config/postgresql.py +++ b/src/postgresql/config/postgresql.py @@ -39,8 +39,8 @@ def merge_service_configuration(overwrite_srv_cfg, default_srv_cfg): def validation_pre(self): if self.service_conf['enable']: machine_list = self.cluster_conf['machine-list'] - if len([host for host in machine_list if host.get('pai-master') == 'true']) != 1: - return False, '1 and only 1 "pai-master=true" machine is required to deploy the postgresql service' + if len([host for host in machine_list if host.get('pai-master') == 'true']) < 1: + return False, '"pai-master=true" machine is required to deploy the postgresql service' return True, None def run(self): @@ -49,7 +49,7 @@ def run(self): machine_list = self.cluster_conf['machine-list'] master_ip = [host['hostip'] for host in machine_list if host.get('pai-master') == 'true'][0] result['host'] = master_ip - result['connectionStr'] = 'postgresql://{}:{}@{}:{}/{}'.format( + result['connection-str'] = 'postgresql://{}:{}@{}:{}/{}'.format( result['user'], result['passwd'], result['host'], result['port'], result['db']) return result diff --git a/src/postgresql/deploy/postgresql.yaml.template b/src/postgresql/deploy/postgresql.yaml.template index 57a3edf230..ddc9fea6cc 100644 --- a/src/postgresql/deploy/postgresql.yaml.template +++ b/src/postgresql/deploy/postgresql.yaml.template @@ -29,7 +29,16 @@ spec: labels: app: postgresql spec: - hostNetwork: false + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - {{ cluster_cfg["postgresql"]["host"] }} + hostNetwork: true containers: - name: postgresql image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}postgresql:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} @@ -43,14 +52,15 @@ spec: value: {{ cluster_cfg["postgresql"]["db"] }} - name: PGDATA value: /var/lib/postgresql/data/pgdata + args: ['-c', 'port={{- cluster_cfg["postgresql"]["port"] }}'] volumeMounts: - name: internal-data-dir mountPath: /var/lib/postgresql/data/ mountPropagation: "None" - ports: - - containerPort: 5432 - hostPort: {{ cluster_cfg["postgresql"]["port"] }} - name: postgresql-port + # ports: + # - containerPort: {{ cluster_cfg["postgresql"]["port"] }} + # hostPort: {{ cluster_cfg["postgresql"]["port"] }} + # name: postgresql-port volumes: - name: internal-data-dir hostPath: diff --git a/src/postgresql/deploy/service.yaml b/src/postgresql/deploy/service.yaml index 11bf78364c..cf0d5c721e 100644 --- a/src/postgresql/deploy/service.yaml +++ b/src/postgresql/deploy/service.yaml @@ -30,7 +30,3 @@ start-script: start.sh stop-script: stop.sh delete-script: delete.sh refresh-script: refresh.sh - - -deploy-rules: - - in: pai-master diff --git a/src/postgresql/deploy/start.sh.template b/src/postgresql/deploy/start.sh.template index 90b3833616..e42bfd5f74 100644 --- a/src/postgresql/deploy/start.sh.template +++ b/src/postgresql/deploy/start.sh.template @@ -19,7 +19,7 @@ pushd $(dirname "$0") > /dev/null -{% if cluster_cfg['postgresql']['enable'] %} +{% if (cluster_cfg['internal-storage']['enable']) and (cluster_cfg['postgresql']['enable']) %} kubectl apply --overwrite=true -f postgresql.yaml || exit $? diff --git a/src/rest-server/deploy/rest-server.yaml.template b/src/rest-server/deploy/rest-server.yaml.template index 5326289021..3a4b32f14c 100644 --- a/src/rest-server/deploy/rest-server.yaml.template +++ b/src/rest-server/deploy/rest-server.yaml.template @@ -108,6 +108,10 @@ spec: {% else %} - name: RBAC_IN_CLUSTER value: "false" +{% endif %} +{% if cluster_cfg['postgresql']['enable'] %} + - name: SQL_CONNECTION_STR + value: {{ cluster_cfg['postgresql']['connection-str'] }} {% endif %} ports: - name: rest-server diff --git a/src/rest-server/deploy/service.yaml b/src/rest-server/deploy/service.yaml index c8b0755588..7f22e4eaf9 100644 --- a/src/rest-server/deploy/service.yaml +++ b/src/rest-server/deploy/service.yaml @@ -25,6 +25,7 @@ prerequisite: - frameworkcontroller - hivedscheduler - log-manager + - postgresql template-list: - rest-server.yaml From 57787bf8604be12b015d6a25ba411238200b3ca0 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Wed, 13 Nov 2019 11:22:35 +0800 Subject: [PATCH 24/26] fix: doc --- src/internal-storage/README.md | 16 ++++++++++++---- src/internal-storage/deploy/create.yaml.template | 1 + src/postgresql/README.md | 8 ++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index 016e96a1b6..c519688113 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -6,7 +6,7 @@ Internal Storage is designed to create a limited size storage in PAI. The storag internal-storage: enable: true type: hostPath - root-path: /paiInternal + root-path: /mnt/paiInternal quota-gb: 10 ``` @@ -14,7 +14,7 @@ User can override these settings in `services-configuration.yaml`. ## Set up Internal Storage -For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/paiInternal`) on the `pai-master` node first, then create a loop device in the folder. Please refer to the following commands for details. +For now, `hostPath` is the only supported `type` for internal storage. In summary, it will make a `` folder (The default path is `/mnt/paiInternal`) on the `pai-master` node first, then create a loop device in the folder. If the path does not exist, PAI will create it for you. Please refer to the following commands for details of loop device creation. ```bash fallocate -l ${QUOTA_GB}G storage.ext4 @@ -47,6 +47,16 @@ spec: - name: internal-data-dir hostPath: path: '{{ cluster_cfg["internal-storage"]["root-path"] }}/storage' + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - {{ cluster_cfg["internal-storage"]["master-ip"] }} ``` Please note that `mountPropagation` should be set to `None`, to ensure that any unexpected unmount of the data folder will not be propagates to the pod. @@ -77,8 +87,6 @@ Possibility is that users may delete our storage file `storage.ext4` or `storage During service deletion, if we cannot unmount or delete the data, the deletion process won't be successful. There is also a readiness probe for these purposes. See [delete.yaml.template](deploy/delete.yaml.template) for details. - - ## References - [Loop Device](http://man7.org/linux/man-pages/man4/loop.4.html) - [Linux Quota Tutorial](http://souptonuts.sourceforge.net/quota_tutorial.html) diff --git a/src/internal-storage/deploy/create.yaml.template b/src/internal-storage/deploy/create.yaml.template index f53c2dfd3f..d6c4037c5a 100644 --- a/src/internal-storage/deploy/create.yaml.template +++ b/src/internal-storage/deploy/create.yaml.template @@ -63,5 +63,6 @@ spec: - name: internal-data-dir hostPath: path: {{ cluster_cfg["internal-storage"]["root-path"] }} + type: DirectoryOrCreate imagePullSecrets: - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} diff --git a/src/postgresql/README.md b/src/postgresql/README.md index 9bd7f691ba..50b2a823ae 100644 --- a/src/postgresql/README.md +++ b/src/postgresql/README.md @@ -1,6 +1,6 @@ ## Postgresql -Postgresql is an internal service for structured information persistence. By default, the service uses the folder `/paiInternal/storage` on the master node for data storage. The real data directory for the postgresql would be `/paiInternal/storage/pgdata`. The default service configurations are as follows: +Postgresql is an internal service for structured information persistence. By default, the service uses the `Internal Storage` on the master node for data storage. The real data directory for the postgresql would be `{{ cluster_cfg["internal-storage"]["root-path"] }}/storage`. The default service configurations are as follows: ```yaml postgresql: @@ -15,15 +15,15 @@ One can override these settings by editing `services-configuration.yaml` . ### Table Initialization -If it is the first time the service launches, it will execute `src/init_table.sql` to initialize the table structure. The initialization won't be fired if the service detects old data. If you want to re-trigger it, please remove `/paiInternal/pgdata` and restart the service manually. +If it is the first time the service launches, it will execute `src/init_table.sql` to initialize the table structure. The initialization won't be fired if the service detects old data. If you want to re-trigger it, please remove `Internal Storage` and restart the service manually. ### How to Access the Database -The database connection string is written to the cluster configuration object in `config/postgresql.py`. One can use `cluster_cfg['postgresql']['connectionStr']` to retrieve it in any template files. +The database connection string is written to the cluster configuration object in `config/postgresql.py`. One can use `cluster_cfg['postgresql']['connection-str']` to retrieve it in any template files. Particularly, the connection string is exposed as an environmental variable in `rest-server`: ```bash # in rest-server pod -SQL_CONNECTION_STR=postgresql://root:rootpass@:5432/openpai +SQL_CONNECTION_STR=postgresql://:@:/ ``` From c376335f01d339291304d047917eb4846d2aa125 Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Wed, 13 Nov 2019 13:40:33 +0800 Subject: [PATCH 25/26] minor doc fix --- src/internal-storage/README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index c519688113..ddb374a774 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -37,6 +37,15 @@ apiVersion: v1 kind: Pod ... spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - {{ cluster_cfg["internal-storage"]["master-ip"] }} containers: - image: volumeMounts: @@ -47,16 +56,6 @@ spec: - name: internal-data-dir hostPath: path: '{{ cluster_cfg["internal-storage"]["root-path"] }}/storage' - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - {{ cluster_cfg["internal-storage"]["master-ip"] }} ``` Please note that `mountPropagation` should be set to `None`, to ensure that any unexpected unmount of the data folder will not be propagates to the pod. From 137db04f95bbe58fef849f22b187fc787ef83e8a Mon Sep 17 00:00:00 2001 From: hzy46 <362583303@qq.com> Date: Wed, 20 Nov 2019 16:37:41 +0800 Subject: [PATCH 26/26] fix: disable --- src/internal-storage/README.md | 2 +- src/internal-storage/config/internal-storage.yaml | 2 +- src/postgresql/README.md | 2 +- src/postgresql/config/postgresql.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/internal-storage/README.md b/src/internal-storage/README.md index ddb374a774..18e679b143 100644 --- a/src/internal-storage/README.md +++ b/src/internal-storage/README.md @@ -4,7 +4,7 @@ Internal Storage is designed to create a limited size storage in PAI. The storag ```yaml internal-storage: - enable: true + enable: false type: hostPath root-path: /mnt/paiInternal quota-gb: 10 diff --git a/src/internal-storage/config/internal-storage.yaml b/src/internal-storage/config/internal-storage.yaml index 8e4715ec19..4c6baf2d70 100644 --- a/src/internal-storage/config/internal-storage.yaml +++ b/src/internal-storage/config/internal-storage.yaml @@ -1,4 +1,4 @@ -enable: true +enable: false type: hostPath root-path: /mnt/paiInternal quota-gb: 10 \ No newline at end of file diff --git a/src/postgresql/README.md b/src/postgresql/README.md index 50b2a823ae..99405d64c9 100644 --- a/src/postgresql/README.md +++ b/src/postgresql/README.md @@ -4,7 +4,7 @@ Postgresql is an internal service for structured information persistence. By def ```yaml postgresql: - enable: true + enable: false user: root passwd: rootpass port: 5432 diff --git a/src/postgresql/config/postgresql.yaml b/src/postgresql/config/postgresql.yaml index 4c2866fc5c..edcb1a503e 100644 --- a/src/postgresql/config/postgresql.yaml +++ b/src/postgresql/config/postgresql.yaml @@ -1,4 +1,4 @@ -enable: true +enable: false user: root passwd: rootpass db: openpai