diff --git a/.gitignore b/.gitignore index 91d25dd..7675b11 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ terraform.tfvars # Ignore terraform/examples/io500 active configuration symlink terraform/examples/io500/config/active_config.sh +terraform/examples/io500/login # Ignore other files id_rsa* diff --git a/docs/tutorials/example_daos_cluster.md b/docs/tutorials/example_daos_cluster.md index b55dc8d..748823d 100644 --- a/docs/tutorials/example_daos_cluster.md +++ b/docs/tutorials/example_daos_cluster.md @@ -195,7 +195,8 @@ In order to begin using the storage you must issue a *format* command. To format the storage run ```bash -dmg storage format +sudo dmg storage format +sudo dmg system query -v ``` To learn more see [Storage Formatting](https://docs.daos.io/latest/admin/deployment/#storage-formatting) @@ -209,31 +210,31 @@ Now that the system has been formatted you can create a Pool. First check to see how much free NVMe storage you have. ```bash -dmg storage query usage +sudo dmg storage query usage ``` -This will return something like +This will return storage information for the servers. + +The output looks similar to ``` Hosts SCM-Total SCM-Free SCM-Used NVMe-Total NVMe-Free NVMe-Used ----- --------- -------- -------- ---------- --------- --------- -daos-server-0001 107 GB 107 GB 0 % 3.2 TB 3.2 TB 0 % +daos-server-0001 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % +daos-server-0002 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % +daos-server-0003 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % +daos-server-0004 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % ``` -> If the values in the columns are showing zeros, wait for 1-2 minutes and run the command again. -> -> Even though the `dmg storage format` command returned immediately it can sometimes take a few minutes for the storage system to be ready. -> -> You will know it's ready when you no longer see zeros in the output > from the `dmg storage query usage` command. -In the example output above there is one server with a total of 3.2TB > of free space. +In the example output above there are 4 servers with a total of 6.4TB of free space. -With that information you know you can create a 3TB pool. +With that information you know you can safely create a 6TB pool. Create the pool. ```bash -dmg pool create -z 3TB -t 3 -u ${USER} --label=daos_pool +sudo dmg pool create -z 6TB -t 3 -u ${USER} --label=daos_pool ``` For more information about pools see diff --git a/images/daos-client-image.pkr.hcl b/images/daos-client-image.pkr.hcl index 0275954..059fdbf 100644 --- a/images/daos-client-image.pkr.hcl +++ b/images/daos-client-image.pkr.hcl @@ -68,4 +68,18 @@ build { ] } + provisioner "file" { + source = "./scripts/cert_gen/sm_get_ca.sh" + destination = "/tmp/" + } + + provisioner "shell" { + inline = [ + "sudo mkdir -p /var/daos/cert_gen", + "sudo mv /tmp/sm_get_ca.sh /var/daos/cert_gen", + "sudo chown -R root:root /var/daos/cert_gen", + "sudo chmod +x /var/daos/cert_gen/*.sh" + ] + } + } diff --git a/images/daos-server-image.pkr.hcl b/images/daos-server-image.pkr.hcl index 21c30c7..b8d8199 100644 --- a/images/daos-server-image.pkr.hcl +++ b/images/daos-server-image.pkr.hcl @@ -59,7 +59,7 @@ build { sources = ["source.googlecompute.daos-server-centos-7"] provisioner "shell" { - environment_vars = ["DAOS_REPO_BASE_URL=${var.daos_repo_base_url}", "DAOS_VERSION=${var.daos_version}", "DAOS_INSTALL_TYPE=server"] + environment_vars = ["DAOS_REPO_BASE_URL=${var.daos_repo_base_url}", "DAOS_VERSION=${var.daos_version}", "DAOS_INSTALL_TYPE=all"] execute_command = "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}" pause_before = "5s" scripts = [ @@ -68,4 +68,17 @@ build { ] } + provisioner "file" { + source = "./scripts/cert_gen" + destination = "/tmp/" + } + + provisioner "shell" { + inline = [ + "sudo mkdir -p /var/daos/", + "sudo mv /tmp/cert_gen /var/daos/", + "sudo chown -R root:root /var/daos/cert_gen", + "sudo chmod +x /var/daos/cert_gen/*.sh" + ] + } } diff --git a/images/scripts/cert_gen/SConscript b/images/scripts/cert_gen/SConscript new file mode 100644 index 0000000..b1510f0 --- /dev/null +++ b/images/scripts/cert_gen/SConscript @@ -0,0 +1,15 @@ +"""Build DAOS Certificate Generation""" +import os + +def scons(): + """Execute build""" + Import('env') + + env.Install("$PREFIX/lib64/daos/certgen", ['admin.cnf', + 'agent.cnf', + 'ca.cnf', + 'server.cnf', + 'gen_certificates.sh']) + +if __name__ == "SCons.Script": + scons() diff --git a/images/scripts/cert_gen/admin.cnf b/images/scripts/cert_gen/admin.cnf new file mode 100644 index 0000000..f3c0314 --- /dev/null +++ b/images/scripts/cert_gen/admin.cnf @@ -0,0 +1,12 @@ +# OpenSSL client configuration file +[ req ] +prompt=no +distinguished_name = distinguished_name +basicConstraints = CA:FALSE + +[ distinguished_name ] +organizationName = DAOS +commonName = admin + +#In the future we can do username based certs for login +#commonName = diff --git a/images/scripts/cert_gen/agent.cnf b/images/scripts/cert_gen/agent.cnf new file mode 100644 index 0000000..f1dc2eb --- /dev/null +++ b/images/scripts/cert_gen/agent.cnf @@ -0,0 +1,15 @@ +[ req ] +prompt=no +distinguished_name = distinguished_name +basicConstraints = CA:FALSE +#uncomment if you want to use per agent certificates +#req_extensions = extensions + +[ distinguished_name ] +organizationName = DAOS +# Required value for commonName, do not change. +commonName = agent + +[ extensions ] +#uncomment if you want to use per agent certificates +#subjectAltName = DNS:,DNS:,IP: \ No newline at end of file diff --git a/images/scripts/cert_gen/ca.cnf b/images/scripts/cert_gen/ca.cnf new file mode 100644 index 0000000..24fac68 --- /dev/null +++ b/images/scripts/cert_gen/ca.cnf @@ -0,0 +1,50 @@ + [ ca ] +default_ca = CA_daos + +[ CA_daos ] +dir = ./daosCA +certs = $dir/certs +database = $dir/index.txt +serial = $dir/serial.txt + +# Key and Certificate for the root +certificate = $dir/daosCA.crt +private_key = $dir/private/daosCA.key + +default_md = sha512 # SAFE Crypto Requires SHA-512 +default_days = 1095 # how long to certify for +copy_extensions = copy +unique_subject = no + +[ req ] +prompt = no +distinguished_name = ca_dn +x509_extensions = ca_ext + +[ ca_dn ] +organizationName = DAOS +commonName = DAOS CA + +[ ca_ext ] +keyUsage = critical,digitalSignature,nonRepudiation,keyEncipherment,keyCertSign +basicConstraints = critical,CA:true,pathlen:1 + +[ signing_policy ] +organizationName = supplied +commonName = supplied + +[ signing_agent ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = clientAuth + +[ signing_server ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = serverAuth, clientAuth + +[ signing_admin ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = clientAuth + +[ signing_test ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = clientAuth diff --git a/images/scripts/cert_gen/gen_certificates.sh b/images/scripts/cert_gen/gen_certificates.sh new file mode 100755 index 0000000..f220a0b --- /dev/null +++ b/images/scripts/cert_gen/gen_certificates.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# Copyright 2019-2022 Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted for any purpose (including commercial purposes) +# provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or materials provided with the distribution. +# +# 3. In addition, redistributions of modified forms of the source or binary +# code must carry prominent notices stating that the original code was +# changed and the date of the change. +# +# 4. All publications or advertising materials mentioning features or use of +# this software are asked, but not required, to acknowledge that it was +# developed by Intel Corporation and credit the contributors. +# +# 5. Neither the name of Intel Corporation, nor the name of any Contributor +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +__usage=" +Usage: gen_certificates.sh [DIR] +Generate certificates for DAOS deployment in the [DIR]/daosCA. +By default [DIR] is the current directory. +" + +function print_usage () { + >&2 echo "$__usage" +} + +CA_HOME="${1:-.}/daosCA" +PRIVATE="${CA_HOME}/private" +CERTS="${CA_HOME}/certs" +CONFIGS="$(dirname "${BASH_SOURCE}")" + +function setup_directories () { + mkdir -p "${PRIVATE}" + mkdir -p "${CERTS}" +} + +function generate_ca_cnf () { + echo " +[ ca ] +default_ca = CA_daos + +[ CA_daos ] +dir = ${CA_HOME} +certs = \$dir/certs +database = \$dir/index.txt +serial = \$dir/serial.txt + +# Key and Certificate for the root +certificate = \$dir/daosCA.crt +private_key = \$dir/private/daosCA.key + +default_md = sha512 # SAFE Crypto Requires SHA-512 +default_days = 1095 # how long to certify for +copy_extensions = copy +unique_subject = no + +[ req ] +prompt = no +distinguished_name = ca_dn +x509_extensions = ca_ext + +[ ca_dn ] +organizationName = DAOS +commonName = DAOS CA + +[ ca_ext ] +keyUsage = critical,digitalSignature,nonRepudiation,keyEncipherment,keyCertSign +basicConstraints = critical,CA:true,pathlen:1 + +[ signing_policy ] +organizationName = supplied +commonName = supplied + +[ signing_agent ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = clientAuth + +[ signing_server ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = serverAuth, clientAuth + +[ signing_admin ] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = clientAuth + +" > "${CA_HOME}/ca.cnf" +} + +function generate_ca_cert () { + echo "Generating Private CA Root Certificate" + # Generate Private key and set permissions + openssl genrsa -out "${PRIVATE}/daosCA.key" 3072 + [[ $EUID -eq 0 ]] && chown root.root "${PRIVATE}/daosCA.key" 2>/dev/null + chmod 0400 "${PRIVATE}/daosCA.key" + # Generate CA Certificate + openssl req -new -x509 -config "${CA_HOME}/ca.cnf" -days 365 -sha512 \ + -key "${PRIVATE}/daosCA.key" \ + -out "${CERTS}/daosCA.crt" -batch + [[ $EUID -eq 0 ]] && chown root.root "${CERTS}/daosCA.crt" 2>/dev/null + chmod 0644 "${CERTS}/daosCA.crt" + # Reset the the CA index + rm -f "${CA_HOME}/index.txt" "${CA_HOME}/serial.txt" + touch "${CA_HOME}/index.txt" + echo '01' > "${CA_HOME}/serial.txt" + echo "Private CA Root Certificate created in ${CA_HOME}" +} + +function generate_agent_cert () { + echo "Generating Agent Certificate" + # Generate Private key and set its permissions + openssl genrsa -out "${CERTS}/agent.key" 3072 + [[ $EUID -eq 0 ]] \ + && chown daos_agent.daos_agent "${CERTS}/agent.key" 2>/dev/null + chmod 0400 "${CERTS}/agent.key" + # Generate a Certificate Signing Request (CRS) + openssl req -new -config "${CONFIGS}/agent.cnf" -key "${CERTS}/agent.key" \ + -out "${CA_HOME}/agent.csr" -batch + # Create Certificate from request + openssl ca -config "${CA_HOME}/ca.cnf" -keyfile "${PRIVATE}/daosCA.key" \ + -cert "${CERTS}/daosCA.crt" -policy signing_policy \ + -extensions signing_agent -out "${CERTS}/agent.crt" \ + -outdir "${CERTS}" -in "${CA_HOME}/agent.csr" -batch + [[ $EUID -eq 0 ]] \ + && chown daos_agent.daos_agent "${CERTS}/agent.crt" 2>/dev/null + chmod 0644 "${CERTS}/agent.crt" + + echo "Required Agent Certificate Files: + ${CERTS}/daosCA.crt + ${CERTS}/agent.key + ${CERTS}/agent.crt" +} + +function generate_admin_cert () { + echo "Generating Admin Certificate" + # Generate Private key and set its permissions + openssl genrsa -out "${CERTS}/admin.key" 3072 + # TODO [[ $EUID -eq 0 ]] && chown ?.? "${CERTS}/admin.key" + chmod 0400 "${CERTS}/admin.key" + # Generate a Certificate Signing Request (CRS) + openssl req -new -config "${CONFIGS}/admin.cnf" -key "${CERTS}/admin.key" \ + -out "${CA_HOME}/admin.csr" -batch + # Create Certificate from request + openssl ca -config "${CA_HOME}/ca.cnf" -keyfile "${PRIVATE}/daosCA.key" \ + -cert "${CERTS}/daosCA.crt" -policy signing_policy \ + -extensions signing_admin -out "${CERTS}/admin.crt" \ + -outdir "${CERTS}" -in "${CA_HOME}/admin.csr" -batch + # TODO [[ $EUID -eq 0 ]] && chown ?.? "${CERTS}/admin.crt" + chmod 0644 "${CERTS}/admin.crt" + + echo "Required Admin Certificate Files: + ${CERTS}/daosCA.crt + ${CERTS}/admin.key + ${CERTS}/admin.crt" +} + +function generate_server_cert () { + echo "Generating Server Certificate" + # Generate Private key and set its permissions + openssl genrsa -out "${CERTS}/server.key" 3072 + [[ $EUID -eq 0 ]] && chown daos_server.daos_server "${CERTS}/server.key" + chmod 0400 "${CERTS}/server.key" + # Generate a Certificate Signing Request (CRS) + openssl req -new -config "${CONFIGS}/server.cnf" \ + -key "${CERTS}/server.key" -out "${CA_HOME}/server.csr" -batch + # Create Certificate from request + openssl ca -config "$CA_HOME/ca.cnf" -keyfile "${PRIVATE}/daosCA.key" \ + -cert "${CERTS}/daosCA.crt" -policy signing_policy \ + -extensions signing_server -out "${CERTS}/server.crt" \ + -outdir "${CERTS}" -in "${CA_HOME}/server.csr" -batch + [[ $EUID -eq 0 ]] && chown daos_server.daos_server "${CERTS}/server.crt" + chmod 0644 "${CERTS}/server.crt" + + echo "Required Server Certificate Files: + ${CERTS}/daosCA.crt + ${CERTS}/server.key + ${CERTS}/server.crt" +} + +function cleanup () { + rm -f "${CERTS}/*pem" + rm -f "${CA_HOME}/agent.csr" + rm -f "${CA_HOME}/admin.csr" + rm -f "${CA_HOME}/server.csr" + rm -f "${CA_HOME}/ca.cnf" +} + +function main () { + setup_directories + generate_ca_cnf + generate_ca_cert + generate_server_cert + generate_agent_cert + generate_admin_cert + cleanup +} + +main diff --git a/images/scripts/cert_gen/server.cnf b/images/scripts/cert_gen/server.cnf new file mode 100644 index 0000000..d273a83 --- /dev/null +++ b/images/scripts/cert_gen/server.cnf @@ -0,0 +1,15 @@ +[ req ] +prompt=no +distinguished_name = distinguished_name +basicConstraints = CA:FALSE +#uncomment if you want to use per agent certificates +#req_extensions = extensions + +[ distinguished_name ] +organizationName = DAOS +# Required value for commonName, do not change. +commonName = server + +[ extensions ] +#uncomment if you want to use per agent certificates +#subjectAltName = DNS:,DNS:,IP: \ No newline at end of file diff --git a/images/scripts/cert_gen/sm_get_ca.sh b/images/scripts/cert_gen/sm_get_ca.sh new file mode 100755 index 0000000..10cd5e7 --- /dev/null +++ b/images/scripts/cert_gen/sm_get_ca.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# Copyright 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# PURPOSE / DESCRIPTION +# +# Get certificates from a Secret Manager secret and copy them to /etc/daos/certs +# +# Look for Secret Manager secret that contains the daosCA.tar.gz file. +# The Secret Manager secret is created by Terraform but the certs are generated +# and stored in the secret when the startup script runs on the first DAOS +# server instance. +# +# When the secret is found +# 1. Get the daosCA.tar.gz from the secret version data +# 2. Extract the daosCA.tar.gz to /var/daos/daosCA +# 3. Copy the cert and key files to their proper locations in /etc/daos/certs +# 4. Set ownership and permissions on certs and key files +# 3. Clean up +# +# This script only needs to be run once on each DAOS client or server instance. +# It should be called from the startup script of all DAOS client and server +# instances. +# +# In order for this script to access the secret version containing the +# daosCA.tar.gz file, the service account that is running the instance must +# be given the proper permissions on the secret. The daos_server Terraform +# module will create the secret and assign the necessary IAM +# policies to the service account so that it can access the secret. +# +# NOTE +# +# At the time this script was written DAOS services and the dmg command +# required that permissions on some files such as /etc/daos/certs/admin.key +# and /etc/daos/certs/daosCA.crt files have mode 0700. Not 0600 but 0700. +# So when you see the odd mode, that is why it was done. + +set -e +trap 'echo "An unexpected error occurred. Exiting."' ERR + +SECRET_NAME="${SECRET_NAME:-$1}" # Name of secret that was created by Terraform +INSTALL_TYPE="${INSTALL_TYPE:-$2}" # client or server +DAOS_DIR=/var/daos +SCRIPT_NAME=$(basename "$0") + +if [[ -z "${SECRET_NAME}" ]]; then + echo "ERROR: Secret name must be passed as the first parameter. Exiting..." + exit 1 +fi + +if [[ -z "${INSTALL_TYPE}" ]]; then + echo "ERROR: Install type [client|server] must be passed as the second parameter. Exiting..." + exit 1 +fi + +get_ca_from_sm() { + # Get the daosCA.tar.gz file from Secret Manager + # daosCA.tar.gz contains the certs that need to be copied + # to /etc/daos/certs + + if [[ -f "${DAOS_DIR}/daosCA.tar.gz" ]]; then + # Make sure that an old daosCA.tar.gz file doesn't exist before + # we attempt to retrieve the file from Secret manager. + rm -f "${DAOS_DIR}/daosCA.tar.gz" + fi + + # Loop until the secret exists. + # If the secret is not found in max_secret_wait_time, then exit. + max_secret_wait_time="5 mins" + endtime=$(date -ud "${max_secret_wait_time}" +%s) + until gcloud secrets versions list "${SECRET_NAME}" \ + --filter="NAME:1" \ + --format="value('name')" \ + --verbosity=none | grep -q 1 + do + if [[ $(date -u +%s) -ge ${endtime} ]]; then + echo "ERROR: Secret '${SECRET_NAME}' not found after checking for ${max_secret_wait_time}" + exit 1 + fi + echo "Checking for secret: ${SECRET_NAME}" + sleep 5 + done + + echo "Found secret: ${SECRET_NAME}" + echo "Saving '${SECRET_NAME}' data to ${DAOS_DIR}/daosCA.tar.gz" + + # Always get version 1 of the secret. There should not be other versions. + gcloud secrets versions access 1 --secret="${SECRET_NAME}" \ + --format "value(payload.data.decode(base64).encode(base64))" \ + | base64 --decode > "${DAOS_DIR}/daosCA.tar.gz" + + if [[ ! -f "${DAOS_DIR}/daosCA.tar.gz" ]]; then + echo "ERROR: File not found '${DAOS_DIR}/daosCA.tar.gz'" + exit 1 + fi + + echo "Extracting ${DAOS_DIR}/daosCA.tar.gz" + tar xzf "${DAOS_DIR}/daosCA.tar.gz" -C "${DAOS_DIR}/" + rm -f "${DAOS_DIR}/daosCA.tar.gz" + + # Check to make sure the directory was created before continuing + if [[ ! -d "${DAOS_DIR}/daosCA" ]]; then + echo "ERROR: Directory '${DAOS_DIR}/daosCA' not found. Exiting ..." + exit 1 + fi +} + +echo "BEGIN: ${SCRIPT_NAME}" + +cd "${DAOS_DIR}" + +# Only get the ${DAOS_DIR}/daosCA from Secret Manager +# when the ${DAOS_DIR}/daosCA directory doesn't exist. +# On the first DAOS server instance ${DAOS_DIR}/daosCA will exist because that +# is where the certs were generated. No need to get the daosCA.tar.gz file +# from the secret in that case. +if [[ ! -d "${DAOS_DIR}/daosCA" ]]; then + get_ca_from_sm +fi + +# Cleanup any old certs that may exist. +rm -rf /etc/daos/certs +mkdir -p /etc/daos/certs + +echo "Copying certs and setting permissions" + +# CLIENT CERTS +if [[ "${INSTALL_TYPE,,}" == "client" ]]; then + cp ${DAOS_DIR}/daosCA/certs/daosCA.crt /etc/daos/certs/ + cp ${DAOS_DIR}/daosCA/certs/agent.* /etc/daos/certs/ + chown -R daos_agent:daos_agent /etc/daos/certs + chmod 0755 /etc/daos/certs + chmod 0644 /etc/daos/certs/*.crt + chmod 0600 /etc/daos/certs/*.key +fi + +# SERVER CERTS +if [[ "${INSTALL_TYPE,,}" == "server" ]]; then + # On GCP daos_server runs as root because instances don't have IOMMU + # So all certs and keys should be owned by root + cp ${DAOS_DIR}/daosCA/certs/daosCA.crt /etc/daos/certs/ + cp ${DAOS_DIR}/daosCA/certs/server.* /etc/daos/certs/ + + # Server needs a copy of the agent.crt in /etc/daos/certs/clients + mkdir -p /etc/daos/certs/clients + cp "${DAOS_DIR}/daosCA/certs/agent.crt" /etc/daos/certs/clients + + chown -R root:root /etc/daos/certs + chmod 0755 /etc/daos/certs + chmod 0755 /etc/daos/certs/clients + chmod 0644 /etc/daos/certs/*.crt + chmod 0600 /etc/daos/certs/*.key + chmod 0644 /etc/daos/certs/clients/* +fi + +# +# ADMIN CERTS ON CLIENTS AND SERVERS +# + +# As of 2022-05-05 dmg requires mode 0700 admin.key +# Odd that its not 0600 +# dmg must run as root +cp ${DAOS_DIR}/daosCA/certs/admin.* /etc/daos/certs/ + +chown root:root /etc/daos/certs/admin.* +chmod 0644 /etc/daos/certs/admin.crt +chmod 0700 /etc/daos/certs/admin.key + +# Remove the CA dir now that the certs have been copied to /etc/daos/certs +if [[ -d "${DAOS_DIR}/daosCA" ]]; then + rm -rf "${DAOS_DIR}/daosCA" +fi + +echo "END: ${SCRIPT_NAME}" diff --git a/images/scripts/cert_gen/sm_set_ca.sh b/images/scripts/cert_gen/sm_set_ca.sh new file mode 100755 index 0000000..51841d3 --- /dev/null +++ b/images/scripts/cert_gen/sm_set_ca.sh @@ -0,0 +1,58 @@ + +#!/bin/bash +# Copyright 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Generate private CA, certs and keys. +# Add them to a GCP Secret Manager secret. +# +# This script only needs to be run once in a DAOS cluster. +# It is run from a startup script on the first DAOS server. +# +# In order for this script to add a Secret Version to a given secret, the +# service account that is running the instance must be given the proper +# permissions on the secret. Typically the secret is created by Terraform +# and therefore it is owned by the user who is running Terraform. The +# daos_server Terraform module will create the secret and apply the necessary +# policies. +# + +set -ue +trap 'echo "An unexpected error occurred. Exiting."' ERR + +SECRET_NAME="$1" +DAOS_DIR=/var/daos +CERT_GEN_SCRIPTS_DIR="${DAOS_DIR}/cert_gen" + +if [[ -z "${SECRET_NAME}" ]]; then + echo "ERROR: Secret name must be passed as the first parameter. Exiting..." + exit 1 +fi + +if [[ ! -f "${CERT_GEN_SCRIPTS_DIR}/gen_certificates.sh" ]]; then + echo "ERROR: File not found '${CERT_GEN_SCRIPTS_DIR}/gen_certificates.sh'" + exit 1 +fi + +cd "${DAOS_DIR}" + +# Generate the daosCA directory that contains the certs and keys +"${CERT_GEN_SCRIPTS_DIR}/gen_certificates.sh" "${DAOS_DIR}" +# Create archive of the daosCA directory +tar -cvzf "${DAOS_DIR}/daosCA.tar.gz" ./daosCA +# Store daosCA.tar.gz in Google Cloud Secret Manager +gcloud secrets versions add ${SECRET_NAME} --data-file="${DAOS_DIR}/daosCA.tar.gz" +# Delete certs archive now that it has been added to Secret Manager +rm -f "${DAOS_DIR}/daosCA.tar.gz" diff --git a/images/scripts/cert_gen/test.cnf b/images/scripts/cert_gen/test.cnf new file mode 100644 index 0000000..f7efe9d --- /dev/null +++ b/images/scripts/cert_gen/test.cnf @@ -0,0 +1,12 @@ +# OpenSSL client configuration file +[ req ] +prompt=no +distinguished_name = distinguished_name +basicConstraints = CA:FALSE + +[ distinguished_name ] +organizationName = DAOS +commonName = test + +#In the future we can do username based certs for login +#commonName = diff --git a/images/scripts/install_daos.sh b/images/scripts/install_daos.sh index 394e85e..6697c6e 100644 --- a/images/scripts/install_daos.sh +++ b/images/scripts/install_daos.sh @@ -212,11 +212,17 @@ install_daos() { if [[ "${DAOS_INSTALL_TYPE,,}" =~ ^(all|client)$ ]]; then echo "Install daos-client and daos-devel packages" yum install -y daos-client daos-devel + # Disable daos_agent service. + # It will be enabled by a startup script after the service has been configured. + systemctl disable daos_agent fi if [[ "${DAOS_INSTALL_TYPE,,}" =~ ^(all|server)$ ]]; then echo "Install daos-server packages" yum install -y daos-server + # Disable daos_server service. + # It will be enabled by a startup script after the service has been configured. + systemctl disable daos_server fi if echo "${DAOS_VERSION}" | grep -q -e '^1\..*'; then diff --git a/terraform/examples/daos_cluster/README.md b/terraform/examples/daos_cluster/README.md index 3e8413c..74fe8d7 100644 --- a/terraform/examples/daos_cluster/README.md +++ b/terraform/examples/daos_cluster/README.md @@ -130,7 +130,8 @@ gcloud compute ssh daos-client-0001 Format the storage system. ```bash -dmg storage format +sudo dmg storage format +sudo dmg system query -v ``` Upon successful format, DAOS Control Servers will start DAOS I/O engines that have been specified in the server config file. @@ -139,30 +140,33 @@ For more information see the [Storage Formatting section in the Administration G ### Create a Pool -Now that the system has been formatted a Pool can be created. - Check free NVMe storage. ```bash -dmg storage query usage +sudo dmg storage query usage ``` -This will return something like +This will return storage information for the servers. + +The output looks similar to ``` Hosts SCM-Total SCM-Free SCM-Used NVMe-Total NVMe-Free NVMe-Used ----- --------- -------- -------- ---------- --------- --------- -daos-server-0001 107 GB 107 GB 0 % 3.2 TB 3.2 TB 0 % +daos-server-0001 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % +daos-server-0002 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % +daos-server-0003 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % +daos-server-0004 48 GB 48 GB 0 % 1.6 TB 1.6 TB 0 % ``` -In the example output above there is one server with a total of 3.2TB of free space. +In the example output above there are 4 servers with a total of 6.4TB of free space. -With that information you know you can create a 3TB pool. +With that information you know you can safely create a 6TB pool. Create the pool. ```bash -dmg pool create -z 3TB -t 3 -u ${USER} --label=daos_pool +sudo dmg pool create -z 6TB -t 3 -u ${USER} --label=daos_pool ``` For more information about pools see @@ -247,6 +251,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [allow\_insecure](#input\_allow\_insecure) | Sets the allow\_insecure setting in the transport\_config section of the daos\_*.yml files | `bool` | `false` | no | | [client\_gvnic](#input\_client\_gvnic) | Use Google Virtual NIC (gVNIC) network interface on DAOS clients | `bool` | `false` | no | | [client\_instance\_base\_name](#input\_client\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-client"` | no | | [client\_labels](#input\_client\_labels) | Set of key/value label pairs to assign to daos-client instances | `any` | `{}` | no | @@ -258,7 +263,7 @@ No resources. | [client\_os\_family](#input\_client\_os\_family) | OS GCP image family | `string` | `"daos-client-hpc-centos-7"` | no | | [client\_os\_project](#input\_client\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | | [client\_preemptible](#input\_client\_preemptible) | If preemptible instances | `string` | `false` | no | -| [client\_service\_account](#input\_client\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | +| [client\_service\_account](#input\_client\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [client\_template\_name](#input\_client\_template\_name) | MIG template name | `string` | `"daos-client"` | no | | [network\_name](#input\_network\_name) | Name of the GCP network to use | `string` | `"default"` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | @@ -279,7 +284,7 @@ No resources. | [server\_os\_project](#input\_server\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | | [server\_pools](#input\_server\_pools) | If provided, this module will generate a script to create a list of pools. pool attributes have to be specified in a format acceptable by [dmg](https://docs.daos.io/v2.0/admin/pool_operations/) and daos. |
list(object({
pool_name = string
pool_size = string
containers = list(string)
})
)
| `[]` | no | | [server\_preemptible](#input\_server\_preemptible) | If preemptible instances | `string` | `false` | no | -| [server\_service\_account](#input\_server\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | +| [server\_service\_account](#input\_server\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [server\_template\_name](#input\_server\_template\_name) | MIG template name | `string` | `"daos-server"` | no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network to use | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | diff --git a/terraform/examples/daos_cluster/main.tf b/terraform/examples/daos_cluster/main.tf index af510b6..83276ee 100644 --- a/terraform/examples/daos_cluster/main.tf +++ b/terraform/examples/daos_cluster/main.tf @@ -44,6 +44,7 @@ module "daos_server" { service_account = var.server_service_account pools = var.server_pools gvnic = var.server_gvnic + allow_insecure = var.allow_insecure } module "daos_client" { @@ -69,4 +70,6 @@ module "daos_client" { daos_agent_yml = module.daos_server.daos_agent_yml daos_control_yml = module.daos_server.daos_control_yml gvnic = var.client_gvnic + daos_ca_secret_id = module.daos_server.daos_ca_secret_id + allow_insecure = var.allow_insecure } diff --git a/terraform/examples/daos_cluster/module.json b/terraform/examples/daos_cluster/module.json index d5d6893..18aecc7 100644 --- a/terraform/examples/daos_cluster/module.json +++ b/terraform/examples/daos_cluster/module.json @@ -2,6 +2,13 @@ "header": "Copyright 2022 Intel Corporation\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "footer": "", "inputs": [ + { + "name": "allow_insecure", + "type": "bool", + "description": "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files", + "default": false, + "required": false + }, { "name": "client_gvnic", "type": "bool", @@ -91,7 +98,8 @@ "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" ] }, "required": false @@ -248,7 +256,8 @@ "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" ] }, "required": false diff --git a/terraform/examples/daos_cluster/terraform.tfvars.perf.example b/terraform/examples/daos_cluster/terraform.tfvars.perf.example index 1dc03ec..77d6620 100644 --- a/terraform/examples/daos_cluster/terraform.tfvars.perf.example +++ b/terraform/examples/daos_cluster/terraform.tfvars.perf.example @@ -2,6 +2,7 @@ project_id = "" region = "us-central1" zone = "us-central1-f" +allow_insecure = false server_machine_type = "n2-standard-16" server_os_disk_size_gb = 20 server_daos_disk_count = 4 diff --git a/terraform/examples/daos_cluster/terraform.tfvars.tco.example b/terraform/examples/daos_cluster/terraform.tfvars.tco.example index c8b8b2a..9c324a8 100644 --- a/terraform/examples/daos_cluster/terraform.tfvars.tco.example +++ b/terraform/examples/daos_cluster/terraform.tfvars.tco.example @@ -2,6 +2,7 @@ project_id = "" region = "us-central1" zone = "us-central1-f" +allow_insecure = false server_machine_type = "n2-custom-36-215040" server_os_disk_size_gb = 20 server_daos_disk_count = 16 diff --git a/terraform/examples/daos_cluster/variables.tf b/terraform/examples/daos_cluster/variables.tf index f69f788..1f1c71d 100644 --- a/terraform/examples/daos_cluster/variables.tf +++ b/terraform/examples/daos_cluster/variables.tf @@ -47,6 +47,12 @@ variable "subnetwork_project" { default = null } +variable "allow_insecure" { + description = "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files" + default = false + type = bool +} + variable "server_labels" { description = "Set of key/value label pairs to assign to daos-server instances" type = any @@ -134,7 +140,9 @@ variable "server_service_account" { "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append"] + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" + ] } } @@ -246,7 +254,9 @@ variable "client_service_account" { "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append"] + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" + ] } } diff --git a/terraform/examples/io500/config/config.sh b/terraform/examples/io500/config/config.sh index 1b62563..ac826ef 100644 --- a/terraform/examples/io500/config/config.sh +++ b/terraform/examples/io500/config/config.sh @@ -29,6 +29,7 @@ ID="" # Server and client instances PREEMPTIBLE_INSTANCES="true" SSH_USER="daos-user" +DAOS_ALLOW_INSECURE="false" # Server(s) DAOS_SERVER_INSTANCE_COUNT="1" @@ -70,6 +71,7 @@ export TF_VAR_subnetwork="default" export TF_VAR_subnetwork_project="${TF_VAR_project_id}" export TF_VAR_region="us-central1" export TF_VAR_zone="us-central1-f" +export TF_VAR_allow_insecure="${DAOS_ALLOW_INSECURE}" # Servers export TF_VAR_server_preemptible=${PREEMPTIBLE_INSTANCES} export TF_VAR_server_number_of_instances=${DAOS_SERVER_INSTANCE_COUNT} diff --git a/terraform/examples/io500/config/config_1c_1s_8d.sh b/terraform/examples/io500/config/config_1c_1s_8d.sh index bd63e25..cd71114 100644 --- a/terraform/examples/io500/config/config_1c_1s_8d.sh +++ b/terraform/examples/io500/config/config_1c_1s_8d.sh @@ -29,6 +29,7 @@ ID="" # Server and client instances PREEMPTIBLE_INSTANCES="true" SSH_USER="daos-user" +DAOS_ALLOW_INSECURE="false" # Server(s) DAOS_SERVER_INSTANCE_COUNT="1" @@ -71,6 +72,7 @@ export TF_VAR_subnetwork="default" export TF_VAR_subnetwork_project="${TF_VAR_project_id}" export TF_VAR_region="us-central1" export TF_VAR_zone="us-central1-f" +export TF_VAR_allow_insecure="${DAOS_ALLOW_INSECURE}" # Servers export TF_VAR_server_preemptible=${PREEMPTIBLE_INSTANCES} export TF_VAR_server_number_of_instances=${DAOS_SERVER_INSTANCE_COUNT} diff --git a/terraform/examples/io500/config/config_2c_2s_16d.sh b/terraform/examples/io500/config/config_2c_2s_16d.sh index 9fceb7d..8684b7a 100644 --- a/terraform/examples/io500/config/config_2c_2s_16d.sh +++ b/terraform/examples/io500/config/config_2c_2s_16d.sh @@ -29,6 +29,7 @@ ID="" # Server and client instances PREEMPTIBLE_INSTANCES="true" SSH_USER="daos-user" +DAOS_ALLOW_INSECURE="false" # Server(s) DAOS_SERVER_INSTANCE_COUNT="2" @@ -71,6 +72,7 @@ export TF_VAR_subnetwork="default" export TF_VAR_subnetwork_project="${TF_VAR_project_id}" export TF_VAR_region="us-central1" export TF_VAR_zone="us-central1-f" +export TF_VAR_allow_insecure="${DAOS_ALLOW_INSECURE}" # Servers export TF_VAR_server_preemptible=${PREEMPTIBLE_INSTANCES} export TF_VAR_server_number_of_instances=${DAOS_SERVER_INSTANCE_COUNT} diff --git a/terraform/examples/io500/configure_daos.sh b/terraform/examples/io500/configure_daos.sh deleted file mode 100755 index a085f41..0000000 --- a/terraform/examples/io500/configure_daos.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -# Copyright 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -# Configures the /etc/daos/daos_*.yml files on daos-server and daos-client -# instances. -# -# TODO: Move everything in this script to Terraform and/or startup scripts. -# - -SCRIPT_NAME="$(basename "$0")" -SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" -SCRIPT_COMPLETED_FILE="${SCRIPT_DIR}/${SCRIPT_NAME}.completed" -CONFIG_FILE="${SCRIPT_DIR}/config.sh" - -# Source config file to load variables -source "${CONFIG_FILE}" - -log() { - if [[ -t 1 ]]; then tput setaf 14; fi - printf -- "\n%s\n\n" "${1}" - if [[ -t 1 ]]; then tput sgr0; fi -} - -DAOS_FIRST_SERVER=$(head -n 1 ~/hosts_servers) - -check_already_run() { - # Check to see if this script has already run - if [[ -f "${SCRIPT_COMPLETED_FILE}" ]]; then - # This script has already been run and doesn't need to run again - exit 0 - fi -} - -update_ssh_dir() { - # Clear ~/.ssh/known_hosts so we don't run into any issues - clush --hostfile=hosts_all --dsh 'rm -f ~/.ssh/known_hosts' - - # Copy ~/.ssh directory to all instances - pdcp -w^hosts_all -r ~/.ssh ~/ -} - -configure_servers() { - - echo "Getting /etc/daos/daos_server.yml from ${DAOS_FIRST_SERVER}" - - scp ${DAOS_FIRST_SERVER}:/etc/daos/daos_server.yml "${SCRIPT_DIR}/" - - echo "Updating daos_server.yml" - - # Set nr_hugepages value - # nr_hugepages = (targets * 1Gib) / hugepagesize - # Example: for 8 targets and Hugepagesize = 2048 kB: - # Targets = 8 - # 1Gib = 1048576 KiB - # Hugepagesize = 2048kB - # nr_hugepages=(8*1048576) / 2048 - # So nr_hugepages value is 4096 - hugepagesize=$(ssh ${DAOS_FIRST_SERVER} "grep Hugepagesize /proc/meminfo | awk '{print \$2}'") - nr_hugepages=$(( (${DAOS_SERVER_DISK_COUNT}*1048576) / ${hugepagesize} )) - sed -i "s/^nr_hugepages:.*/nr_hugepages: ${nr_hugepages}/g" "${SCRIPT_DIR}/daos_server.yml" - sed -i "s/^crt_timeout:.*/crt_timeout: ${DAOS_SERVER_CRT_TIMEOUT}/g" "${SCRIPT_DIR}/daos_server.yml" - sed -i "s/^\(\s*\)targets:.*/\1targets: ${DAOS_SERVER_DISK_COUNT}/g" "${SCRIPT_DIR}/daos_server.yml" - sed -i "s/^\(\s*\)scm_size:.*/\1scm_size: ${DAOS_SERVER_SCM_SIZE}/g" "${SCRIPT_DIR}/daos_server.yml" - - # Copy daos_server.yml to all servers - echo "Stopping daos_server on DAOS servers" - clush --hostfile=hosts_servers --dsh "sudo systemctl stop daos_server" - echo "Copying daos_server.yml to /etc/daos/daos_server.yml on DAOS servers" - clush --hostfile=hosts_servers --dsh --copy "${SCRIPT_DIR}/daos_server.yml" --dest 'daos_server.yml' - clush --hostfile=hosts_servers --dsh 'sudo cp -f daos_server.yml /etc/daos/' - echo "Starting daos_server on on DAOS servers" - clush --hostfile=hosts_servers --dsh 'sudo systemctl start daos_server' -} - -configure_clients() { - echo "Getting /etc/daos/daos_agent.yml and /etc/daos/daos_control.yml from ${DAOS_FIRST_SERVER}" - - scp ${DAOS_FIRST_SERVER}:/etc/daos/daos_agent.yml "${SCRIPT_DIR}/" - scp ${DAOS_FIRST_SERVER}:/etc/daos/daos_control.yml "${SCRIPT_DIR}/" - - echo "Stopping daos_agent on DAOS clients" - clush --hostfile=hosts_clients --dsh "sudo systemctl stop daos_agent" - - echo "Copying ~/daos_agent.yml to /etc/daos/daos_agent.yml on DAOS clients" - clush --hostfile=hosts_clients --dsh --copy "daos_agent.yml" --dest "daos_agent.yml" - clush --hostfile=hosts_clients --dsh "sudo cp -f daos_agent.yml /etc/daos/" - - echo "Copying ~/daos_control.yml to /etc/daos/daos_control.yml on DAOS clients" - clush --hostfile=hosts_clients --dsh --copy "daos_control.yml" --dest "daos_control.yml" - clush --hostfile=hosts_clients --dsh "sudo cp -f daos_control.yml /etc/daos/" - - echo "Starting daos_agent on DAOS clients" - clush --hostfile=hosts_clients --dsh "sudo systemctl start daos_agent" -} - -create_completed_file() { - touch "${SCRIPT_COMPLETED_FILE}" -} - -main() { - log "Start configuring DAOS Server and Client instances" - check_already_run - update_ssh_dir - configure_servers - configure_clients - create_completed_file - log "Finished configuring DAOS Server and Client instances" -} - -main diff --git a/terraform/examples/io500/install_scripts/install_intel-oneapi.sh b/terraform/examples/io500/install_scripts/install_intel-oneapi.sh index 57748b6..063083e 100755 --- a/terraform/examples/io500/install_scripts/install_intel-oneapi.sh +++ b/terraform/examples/io500/install_scripts/install_intel-oneapi.sh @@ -49,16 +49,14 @@ name=Intel(R) oneAPI repository baseurl=https://yum.repos.intel.com/oneapi enabled=1 gpgcheck=1 -repo_gpgcheck=0 +repo_gpgcheck=1 gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB EOF # Import GPG Key rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - # Refresh yum cache and install Intel OneAPI - yum clean all - yum makecache + # Install Intel OneAPI MPI yum install -y intel-oneapi-mpi intel-oneapi-mpi-devel } diff --git a/terraform/examples/io500/run_io500-sc21.sh b/terraform/examples/io500/run_io500-sc21.sh index 01d67a6..1de5b62 100755 --- a/terraform/examples/io500/run_io500-sc21.sh +++ b/terraform/examples/io500/run_io500-sc21.sh @@ -105,20 +105,20 @@ cleanup(){ storage_scan() { log "Run DAOS storage scan" - dmg -i -l ${SERVER_LIST} storage scan --verbose + dmg -l ${SERVER_LIST} storage scan --verbose } format_storage() { log_section "Format DAOS storage" - dmg -i -l ${SERVER_LIST} storage format --reformat + dmg -l ${SERVER_LIST} storage format --reformat printf "%s" "Waiting for DAOS storage format to finish" while true do - if [[ $(dmg -i -j system query -v | grep joined | wc -l) -eq ${DAOS_SERVER_INSTANCE_COUNT} ]]; then + if [[ $(dmg -j system query -v | grep joined | wc -l) -eq ${DAOS_SERVER_INSTANCE_COUNT} ]]; then printf "\n%s\n" "DAOS storage format finished" - dmg -i system query -v + dmg system query -v break fi printf "%s" "." @@ -133,10 +133,10 @@ create_pool() { log_section "Create pool: label=${DAOS_POOL_LABEL} size=${DAOS_POOL_SIZE}" # TODO: Don't hardcode tier-ratio to 3 (-t 3) - dmg -i pool create -z ${DAOS_POOL_SIZE} -t 3 -u ${USER} --label=${DAOS_POOL_LABEL} + dmg pool create -z ${DAOS_POOL_SIZE} -t 3 -u ${USER} --label=${DAOS_POOL_LABEL} echo "Set pool property: reclaim=disabled" - dmg -i pool set-prop ${DAOS_POOL_LABEL} --name=reclaim --value=disabled + dmg pool set-prop ${DAOS_POOL_LABEL} --name=reclaim --value=disabled echo "Pool created successfully" dmg pool query "${DAOS_POOL_LABEL}" diff --git a/terraform/examples/io500/start.sh b/terraform/examples/io500/start.sh index fd8ade0..5f03d99 100755 --- a/terraform/examples/io500/start.sh +++ b/terraform/examples/io500/start.sh @@ -430,6 +430,9 @@ EOF ssh -q -F "${SSH_CONFIG_FILE}" "${FIRST_CLIENT_IP}" \ "chmod -R 600 ~/.ssh/*" + echo "#!/bin/bash + ssh -F ./tmp/ssh_config ${FIRST_CLIENT_IP}" > "${SCRIPT_DIR}/login" + chmod +x "${SCRIPT_DIR}/login" } copy_files_to_first_client() { @@ -447,7 +450,6 @@ copy_files_to_first_client() { "${HOSTS_CLIENTS_FILE}" \ "${HOSTS_SERVERS_FILE}" \ "${HOSTS_ALL_FILE}" \ - ${SCRIPT_DIR}/configure_daos.sh \ ${SCRIPT_DIR}/clean_storage.sh \ ${SCRIPT_DIR}/run_io500-sc21.sh \ "${FIRST_CLIENT_IP}:~/" @@ -467,9 +469,26 @@ propagate_ssh_keys_to_all_nodes () { "clush --hostfile=hosts_all --dsh --copy ~/.ssh --dest ~/" } -configure_daos() { - log "Configure DAOS instances" - ssh -q -F "${SSH_CONFIG_FILE}" ${FIRST_CLIENT_IP} "~/configure_daos.sh" +wait_for_startup_script_to_finish () { + ssh -q -F "${SSH_CONFIG_FILE}" "${FIRST_CLIENT_IP}" \ + "printf 'Waiting for startup script to finish\n' + until sudo journalctl -u google-startup-scripts.service --no-pager | grep 'Finished running startup scripts.' + do + printf '.' + sleep 5 + done + printf '\n' + " +} + +set_permissions_on_cert_files () { + if [[ "${DAOS_ALLOW_INSECURE}" == "false" ]]; then + ssh -q -F "${SSH_CONFIG_FILE}" "${FIRST_CLIENT_IP}" \ + "clush --hostfile=hosts_clients --dsh sudo chown ${SSH_USER}:${SSH_USER} /etc/daos/certs/daosCA.crt" + + ssh -q -F "${SSH_CONFIG_FILE}" "${FIRST_CLIENT_IP}" \ + "clush --hostfile=hosts_clients --dsh sudo chown ${SSH_USER}:${SSH_USER} /etc/daos/certs/admin.*" + fi } show_instances() { @@ -500,7 +519,7 @@ show_run_steps() { To run the IO500 benchmark: 1. Log into the first client - ssh -F ./tmp/ssh_config ${FIRST_CLIENT_IP} + ./login 2. Run IO500 ~/run_io500-sc21.sh @@ -520,6 +539,8 @@ main() { configure_ssh copy_files_to_first_client propagate_ssh_keys_to_all_nodes + wait_for_startup_script_to_finish + set_permissions_on_cert_files show_instances check_gvnic show_run_steps diff --git a/terraform/examples/io500/stop.sh b/terraform/examples/io500/stop.sh index a7f4231..e61ef1c 100755 --- a/terraform/examples/io500/stop.sh +++ b/terraform/examples/io500/stop.sh @@ -82,3 +82,7 @@ fi if [[ -d "${IO500_TMP}" ]]; then rm -r "${IO500_TMP}" fi + +if [[ -f "${SCRIPT_DIR}/login" ]]; then + rm -f "${SCRIPT_DIR}/login" +fi diff --git a/terraform/examples/only_daos_client/README.md b/terraform/examples/only_daos_client/README.md index 76815e2..bfd367d 100644 --- a/terraform/examples/only_daos_client/README.md +++ b/terraform/examples/only_daos_client/README.md @@ -125,6 +125,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [allow\_insecure](#input\_allow\_insecure) | Sets the allow\_insecure setting in the transport\_config section of the daos\_*.yml files | `bool` | `false` | no | | [client\_daos\_agent\_yml](#input\_client\_daos\_agent\_yml) | YAML to configure the daos agent. | `string` | n/a | yes | | [client\_daos\_control\_yml](#input\_client\_daos\_control\_yml) | YAML configuring DAOS control. | `string` | n/a | yes | | [client\_instance\_base\_name](#input\_client\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-client"` | no | @@ -139,6 +140,7 @@ No resources. | [client\_preemptible](#input\_client\_preemptible) | If preemptible instances | `string` | `false` | no | | [client\_service\_account](#input\_client\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | | [client\_template\_name](#input\_client\_template\_name) | MIG template name | `string` | `"daos-client"` | no | +| [daos\_ca\_secret\_id](#input\_daos\_ca\_secret\_id) | ID of Secret Manager secret used to store TLS certificates | `string` | n/a | yes | | [network\_name](#input\_network\_name) | Name of the GCP network to use | `string` | `"default"` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | | [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | diff --git a/terraform/examples/only_daos_client/main.tf b/terraform/examples/only_daos_client/main.tf index 1990c60..f30f8e5 100644 --- a/terraform/examples/only_daos_client/main.tf +++ b/terraform/examples/only_daos_client/main.tf @@ -40,4 +40,6 @@ module "daos_client" { service_account = var.client_service_account daos_agent_yml = var.client_daos_agent_yml daos_control_yml = var.client_daos_control_yml + daos_ca_secret_id = var.daos_ca_secret_id + allow_insecure = var.allow_insecure } diff --git a/terraform/examples/only_daos_client/module.json b/terraform/examples/only_daos_client/module.json index 3b1b348..71e86a6 100644 --- a/terraform/examples/only_daos_client/module.json +++ b/terraform/examples/only_daos_client/module.json @@ -2,6 +2,13 @@ "header": "Copyright 2022 Intel Corporation\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "footer": "", "inputs": [ + { + "name": "allow_insecure", + "type": "bool", + "description": "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files", + "default": false, + "required": false + }, { "name": "client_daos_agent_yml", "type": "string", @@ -110,6 +117,13 @@ "default": "daos-client", "required": false }, + { + "name": "daos_ca_secret_id", + "type": "string", + "description": "ID of Secret Manager secret used to store TLS certificates", + "default": null, + "required": true + }, { "name": "network_name", "type": "string", diff --git a/terraform/examples/only_daos_client/terraform.tfvars.example b/terraform/examples/only_daos_client/terraform.tfvars.example index 90a9d20..5cc58c8 100644 --- a/terraform/examples/only_daos_client/terraform.tfvars.example +++ b/terraform/examples/only_daos_client/terraform.tfvars.example @@ -1,16 +1,15 @@ -project_id = "" -region = "us-central1" -zone = "us-central1-f" +project_id = "" +region = "us-central1" +zone = "us-central1-f" +# network_name = "default" +# subnetwork_name = "default" +# subnetwork_project = "" -# network_name = "default" -# subnetwork_project = "" -# subnetwork_name = "default" - -# client_number_of_instances = 4 client_labels = { example = "only_daos_client" } +# client_number_of_instances = 4 # client_preemptible = "false" # client_mig_name = "daos-client" # client_template_name = "daos-client" @@ -20,11 +19,7 @@ client_labels = { # client_os_project = "" # client_os_disk_type = "pd-ssd" # client_os_disk_size_gb = 20 - - -# -# DAOS client yaml files -# +daos_ca_secret_id = "daos-server_ca" client_daos_agent_yml = < [allow\_insecure](#input\_allow\_insecure) | Sets the allow\_insecure setting in the transport\_config section of the daos\_*.yml files | `bool` | `false` | no | | [network\_name](#input\_network\_name) | Name of the GCP network to use | `string` | `"default"` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | | [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | @@ -146,6 +147,7 @@ No resources. | [server\_daos\_disk\_count](#input\_server\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | | [server\_daos\_disk\_type](#input\_server\_daos\_disk\_type) | Daos disk type to use. For now only suported one is local-ssd | `string` | `"local-ssd"` | no | | [server\_daos\_scm\_size](#input\_server\_daos\_scm\_size) | scm\_size | `number` | `200` | no | +| [server\_gvnic](#input\_server\_gvnic) | Use Google Virtual NIC (gVNIC) network interface on DAOS clients | `bool` | `false` | no | | [server\_instance\_base\_name](#input\_server\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-server"` | no | | [server\_labels](#input\_server\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | | [server\_machine\_type](#input\_server\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `"n2-custom-36-215040"` | no | @@ -157,7 +159,7 @@ No resources. | [server\_os\_project](#input\_server\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | | [server\_pools](#input\_server\_pools) | If provided, this module will generate a script to create a list of pools. pool attributes have to be specified in a format acceptable by [dmg](https://docs.daos.io/v2.0/admin/pool_operations/) and daos. |
list(object({
pool_name = string
pool_size = string
containers = list(string)
})
)
| `[]` | no | | [server\_preemptible](#input\_server\_preemptible) | If preemptible instances | `string` | `false` | no | -| [server\_service\_account](#input\_server\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | +| [server\_service\_account](#input\_server\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [server\_template\_name](#input\_server\_template\_name) | MIG template name | `string` | `"daos-server"` | no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network to use | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | diff --git a/terraform/examples/only_daos_server/main.tf b/terraform/examples/only_daos_server/main.tf index 8f8b707..50cc8b9 100644 --- a/terraform/examples/only_daos_server/main.tf +++ b/terraform/examples/only_daos_server/main.tf @@ -41,7 +41,8 @@ module "daos_server" { daos_disk_type = var.server_daos_disk_type daos_crt_timeout = var.server_daos_crt_timeout daos_scm_size = var.server_daos_scm_size - - service_account = var.server_service_account - pools = var.server_pools + service_account = var.server_service_account + pools = var.server_pools + gvnic = var.server_gvnic + allow_insecure = var.allow_insecure } diff --git a/terraform/examples/only_daos_server/module.json b/terraform/examples/only_daos_server/module.json index a087d4e..1bffb83 100644 --- a/terraform/examples/only_daos_server/module.json +++ b/terraform/examples/only_daos_server/module.json @@ -2,6 +2,13 @@ "header": "Copyright 2022 Intel Corporation\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "footer": "", "inputs": [ + { + "name": "allow_insecure", + "type": "bool", + "description": "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files", + "default": false, + "required": false + }, { "name": "network_name", "type": "string", @@ -51,6 +58,13 @@ "default": 200, "required": false }, + { + "name": "server_gvnic", + "type": "bool", + "description": "Use Google Virtual NIC (gVNIC) network interface on DAOS clients", + "default": false, + "required": false + }, { "name": "server_instance_base_name", "type": "string", @@ -140,7 +154,8 @@ "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" ] }, "required": false diff --git a/terraform/examples/only_daos_server/variables.tf b/terraform/examples/only_daos_server/variables.tf index d19ba46..cbe8626 100644 --- a/terraform/examples/only_daos_server/variables.tf +++ b/terraform/examples/only_daos_server/variables.tf @@ -47,6 +47,12 @@ variable "subnetwork_project" { default = null } +variable "allow_insecure" { + description = "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files" + default = false + type = bool +} + variable "server_labels" { description = "Set of key/value label pairs to assign to daos-server instances" type = any @@ -134,7 +140,8 @@ variable "server_service_account" { "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append"] + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform"] } } @@ -166,3 +173,9 @@ variable "server_daos_crt_timeout" { default = 300 type = number } + +variable "server_gvnic" { + description = "Use Google Virtual NIC (gVNIC) network interface on DAOS clients" + default = false + type = bool +} diff --git a/terraform/modules/daos_client/README.md b/terraform/modules/daos_client/README.md index cae343b..4ca42c0 100644 --- a/terraform/modules/daos_client/README.md +++ b/terraform/modules/daos_client/README.md @@ -55,7 +55,9 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [allow\_insecure](#input\_allow\_insecure) | Sets the allow\_insecure setting in the transport\_config section of the daos\_*.yml files | `bool` | `false` | no | | [daos\_agent\_yml](#input\_daos\_agent\_yml) | YAML to configure the daos agent. | `string` | n/a | yes | +| [daos\_ca\_secret\_id](#input\_daos\_ca\_secret\_id) | ID of Secret Manager secret used to store TLS certificates | `string` | n/a | yes | | [daos\_control\_yml](#input\_daos\_control\_yml) | YAML configuring DAOS control. | `string` | n/a | yes | | [gvnic](#input\_gvnic) | Use Google Virtual NIC (gVNIC) network interface | `bool` | `false` | no | | [instance\_base\_name](#input\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-client"` | no | @@ -71,7 +73,7 @@ No modules. | [preemptible](#input\_preemptible) | If preemptible instances | `string` | `false` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | | [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | +| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network to use | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | | [template\_name](#input\_template\_name) | MIG template name | `string` | `"daos-client"` | no | diff --git a/terraform/modules/daos_client/main.tf b/terraform/modules/daos_client/main.tf index 13dacb1..4327391 100644 --- a/terraform/modules/daos_client/main.tf +++ b/terraform/modules/daos_client/main.tf @@ -17,11 +17,19 @@ locals { os_project = var.os_project != null ? var.os_project : var.project_id subnetwork_project = var.subnetwork_project != null ? var.subnetwork_project : var.project_id - client_startup_script = file( - "${path.module}/templates/daos_startup_script.tftpl") # Google Virtual NIC (gVNIC) network interface nic_type = var.gvnic ? "GVNIC" : "VIRTIO_NET" total_egress_bandwidth_tier = var.gvnic ? "TIER_1" : "DEFAULT" + daos_ca_secret_id = basename(var.daos_ca_secret_id) + allow_insecure = var.allow_insecure + + client_startup_script = templatefile( + "${path.module}/templates/daos_startup_script.tftpl", + { + daos_ca_secret_id = local.daos_ca_secret_id + allow_insecure = local.allow_insecure + } + ) } data "google_compute_image" "os_image" { diff --git a/terraform/modules/daos_client/module.json b/terraform/modules/daos_client/module.json index 06b36ed..6424835 100644 --- a/terraform/modules/daos_client/module.json +++ b/terraform/modules/daos_client/module.json @@ -2,6 +2,13 @@ "header": "Copyright 2021 Google LLC\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "footer": "", "inputs": [ + { + "name": "allow_insecure", + "type": "bool", + "description": "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files", + "default": false, + "required": false + }, { "name": "daos_agent_yml", "type": "string", @@ -9,6 +16,13 @@ "default": null, "required": true }, + { + "name": "daos_ca_secret_id", + "type": "string", + "description": "ID of Secret Manager secret used to store TLS certificates", + "default": null, + "required": true + }, { "name": "daos_control_yml", "type": "string", @@ -126,7 +140,8 @@ "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" ] }, "required": false diff --git a/terraform/modules/daos_client/templates/daos_agent.yml.tftpl b/terraform/modules/daos_client/templates/daos_agent.yml.tftpl deleted file mode 100644 index c5c3517..0000000 --- a/terraform/modules/daos_client/templates/daos_agent.yml.tftpl +++ /dev/null @@ -1,10 +0,0 @@ -# -# DAOS agent configuration file -# - -# Management server access points -# Must have the same value for all agents and servers in a system. -access_points: ${jsonencode(access_points)} - -transport_config: - allow_insecure: true diff --git a/terraform/modules/daos_client/templates/daos_control.yml.tftpl b/terraform/modules/daos_client/templates/daos_control.yml.tftpl deleted file mode 100644 index 6c9b686..0000000 --- a/terraform/modules/daos_client/templates/daos_control.yml.tftpl +++ /dev/null @@ -1,7 +0,0 @@ -# -# DAOS manager (dmg) configuration file -# - -hostlist: ${jsonencode(access_points)} -transport_config: - allow_insecure: true diff --git a/terraform/modules/daos_client/templates/daos_startup_script.tftpl b/terraform/modules/daos_client/templates/daos_startup_script.tftpl index 8fc5016..9cd60b9 100644 --- a/terraform/modules/daos_client/templates/daos_startup_script.tftpl +++ b/terraform/modules/daos_client/templates/daos_startup_script.tftpl @@ -2,26 +2,31 @@ METADATA_URL="http://metadata.google.internal/computeMetadata/v1/instance/attributes" DAOS_CONFIG_DIR="/etc/daos" +DAOS_DIR="/var/daos" fetch_attr() { local attr_name=$* - curl -s ${METADATA_URL}/${attr_name} -H "Metadata-Flavor: Google" + curl -s $${METADATA_URL}/$${attr_name} -H "Metadata-Flavor: Google" } -echo "BEGIN: Setting up DAOS Client" +echo "BEGIN: DAOS Client Startup Script" systemctl stop daos_agent -# Create agent config files -mkdir -p "${DAOS_CONFIG_DIR}" -cd "${DAOS_CONFIG_DIR}" +# Create agent config files from metadata +mkdir -p "$${DAOS_CONFIG_DIR}" +cd "$${DAOS_CONFIG_DIR}" fetch_attr "daos_control_yaml_content" > daos_control.yml fetch_attr "daos_agent_yaml_content" > daos_agent.yml +chown -R daos_agent:daos_agent /etc/daos/ -# enable daos_agent in systemd (will be started automatically at boot time) -systemctl enable daos_agent +%{ if !allow_insecure } +# Get certs from Secret Manager and deploy them +$${DAOS_DIR}/cert_gen/sm_get_ca.sh "${daos_ca_secret_id}" "client" +%{ endif } +systemctl enable daos_agent systemctl start daos_agent -echo "END: Setting up DAOS Client" +echo "END: DAOS Client Startup Script" diff --git a/terraform/modules/daos_client/variables.tf b/terraform/modules/daos_client/variables.tf index 2657d26..6c553c4 100644 --- a/terraform/modules/daos_client/variables.tf +++ b/terraform/modules/daos_client/variables.tf @@ -117,7 +117,8 @@ variable "service_account" { "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append"] + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform"] } } @@ -142,3 +143,14 @@ variable "gvnic" { default = false type = bool } + +variable "daos_ca_secret_id" { + description = "ID of Secret Manager secret used to store TLS certificates" + type = string +} + +variable "allow_insecure" { + description = "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files" + default = false + type = bool +} diff --git a/terraform/modules/daos_server/README.md b/terraform/modules/daos_server/README.md index cb46e47..e870658 100644 --- a/terraform/modules/daos_server/README.md +++ b/terraform/modules/daos_server/README.md @@ -49,12 +49,17 @@ No modules. | [google-beta_google_compute_instance_template.daos_sig_template](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_instance_template) | resource | | [google_compute_instance_group_manager.daos_sig](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_group_manager) | resource | | [google_compute_per_instance_config.named_instances](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_per_instance_config) | resource | +| [google_secret_manager_secret.daos_ca](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/secret_manager_secret) | resource | +| [google_secret_manager_secret_iam_policy.daos_ca_secret_policy](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/secret_manager_secret_iam_policy) | resource | +| [google_compute_default_service_account.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_default_service_account) | data source | | [google_compute_image.os_image](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | +| [google_iam_policy.daos_ca_secret_version_manager](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/iam_policy) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [allow\_insecure](#input\_allow\_insecure) | Sets the allow\_insecure setting in the transport\_config section of the daos\_*.yml files | `bool` | `false` | no | | [daos\_crt\_timeout](#input\_daos\_crt\_timeout) | crt\_timeout | `number` | `300` | no | | [daos\_disk\_count](#input\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | | [daos\_disk\_type](#input\_daos\_disk\_type) | Daos disk type to use. For now only suported one is local-ssd | `string` | `"local-ssd"` | no | @@ -74,7 +79,7 @@ No modules. | [preemptible](#input\_preemptible) | If preemptible instances | `string` | `false` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | | [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | +| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network to use | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | | [template\_name](#input\_template\_name) | MIG template name | `string` | `"daos-server"` | no | @@ -86,7 +91,9 @@ No modules. |------|-------------| | [access\_points](#output\_access\_points) | List of DAOS servers to use as access points | | [daos\_agent\_yml](#output\_daos\_agent\_yml) | YAML to configure the daos agent. This is typically saved in /etc/daos/daos\_agent.yml | +| [daos\_ca\_secret\_id](#output\_daos\_ca\_secret\_id) | ID of Secret Manager secret used to store daosCA.tar.gz file generated on first DAOS server instance | | [daos\_client\_install\_script](#output\_daos\_client\_install\_script) | Script to install the DAOS client package. | | [daos\_config\_script](#output\_daos\_config\_script) | Script to configure the DAOS system. This will format the sytem with dmg -l and optionally create the specified pools. | | [daos\_control\_yml](#output\_daos\_control\_yml) | YAML configuring DAOS control. This is typically saved in /etc/daos/daos\_control.yml | +| [default\_service\_account\_email](#output\_default\_service\_account\_email) | Default service account email | diff --git a/terraform/modules/daos_server/main.tf b/terraform/modules/daos_server/main.tf index da2a85d..8b657a4 100644 --- a/terraform/modules/daos_server/main.tf +++ b/terraform/modules/daos_server/main.tf @@ -18,37 +18,53 @@ locals { os_project = var.os_project != null ? var.os_project : var.project_id subnetwork_project = var.subnetwork_project != null ? var.subnetwork_project : var.project_id servers = format("%s-[%04s-%04s]", var.instance_base_name, 1, var.number_of_instances) + first_server = format("%s-%04s", var.instance_base_name, 1) max_aps = var.number_of_instances > 5 ? 5 : (var.number_of_instances % 2) == 1 ? var.number_of_instances : var.number_of_instances - 1 access_points = formatlist("%s-%04s", var.instance_base_name, range(1, local.max_aps + 1)) scm_size = var.daos_scm_size # To get nr_hugepages value: (targets * 1Gib) / hugepagesize - huge_pages = (var.daos_disk_count * 1048576) / 2048 - targets = var.daos_disk_count - crt_timeout = var.daos_crt_timeout + huge_pages = (var.daos_disk_count * 1048576) / 2048 + targets = var.daos_disk_count + crt_timeout = var.daos_crt_timeout + daos_ca_secret_id = basename(google_secret_manager_secret.daos_ca.id) + allow_insecure = var.allow_insecure + daos_server_yaml_content = templatefile( "${path.module}/templates/daos_server.yml.tftpl", { - access_points = local.access_points - nr_hugepages = local.huge_pages - targets = local.targets - scm_size = local.scm_size - crt_timeout = local.crt_timeout + access_points = local.access_points + nr_hugepages = local.huge_pages + targets = local.targets + scm_size = local.scm_size + crt_timeout = local.crt_timeout + allow_insecure = local.allow_insecure } ) + daos_control_yaml_content = templatefile( "${path.module}/templates/daos_control.yml.tftpl", { - servers = [local.servers] + servers = [local.servers] + allow_insecure = local.allow_insecure } ) + daos_agent_yaml_content = templatefile( "${path.module}/templates/daos_agent.yml.tftpl", { - access_points = local.access_points + access_points = local.access_points + allow_insecure = local.allow_insecure + } + ) + + server_startup_script = templatefile( + "${path.module}/templates/daos_startup_script.tftpl", + { + first_server = local.first_server + daos_ca_secret_id = local.daos_ca_secret_id + allow_insecure = local.allow_insecure } ) - server_startup_script = file( - "${path.module}/templates/daos_startup_script.tftpl") configure_daos_content = templatefile( "${path.module}/templates/configure_daos.tftpl", @@ -162,3 +178,47 @@ resource "google_compute_per_instance_config" "named_instances" { } } } + +resource "google_secret_manager_secret" "daos_ca" { + secret_id = format("%s_ca", var.instance_base_name) + project = var.project_id + + replication { + user_managed { + replicas { + location = var.region + } + } + } +} + +data "google_compute_default_service_account" "default" { + project = var.project_id +} + +data "google_iam_policy" "daos_ca_secret_version_manager" { + binding { + role = "roles/secretmanager.secretVersionManager" + members = [ + format("serviceAccount:%s", var.service_account.email == null ? data.google_compute_default_service_account.default.email : var.service_account.email) + ] + } + binding { + role = "roles/secretmanager.viewer" + members = [ + format("serviceAccount:%s", var.service_account.email == null ? data.google_compute_default_service_account.default.email : var.service_account.email) + ] + } + binding { + role = "roles/secretmanager.secretAccessor" + members = [ + format("serviceAccount:%s", var.service_account.email == null ? data.google_compute_default_service_account.default.email : var.service_account.email) + ] + } +} + +resource "google_secret_manager_secret_iam_policy" "daos_ca_secret_policy" { + project = var.project_id + secret_id = google_secret_manager_secret.daos_ca.secret_id + policy_data = data.google_iam_policy.daos_ca_secret_version_manager.policy_data +} diff --git a/terraform/modules/daos_server/module.json b/terraform/modules/daos_server/module.json index d0974e6..938170f 100644 --- a/terraform/modules/daos_server/module.json +++ b/terraform/modules/daos_server/module.json @@ -2,6 +2,13 @@ "header": "Copyright 2021 Google LLC\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "footer": "", "inputs": [ + { + "name": "allow_insecure", + "type": "bool", + "description": "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files", + "default": false, + "required": false + }, { "name": "daos_crt_timeout", "type": "number", @@ -147,7 +154,8 @@ "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform" ] }, "required": false @@ -191,6 +199,10 @@ "name": "daos_agent_yml", "description": "YAML to configure the daos agent. This is typically saved in /etc/daos/daos_agent.yml" }, + { + "name": "daos_ca_secret_id", + "description": "ID of Secret Manager secret used to store daosCA.tar.gz file generated on first DAOS server instance" + }, { "name": "daos_client_install_script", "description": "Script to install the DAOS client package." @@ -202,6 +214,10 @@ { "name": "daos_control_yml", "description": "YAML configuring DAOS control. This is typically saved in /etc/daos/daos_control.yml" + }, + { + "name": "default_service_account_email", + "description": "Default service account email" } ], "providers": [ @@ -258,6 +274,33 @@ "version": "latest", "description": null }, + { + "type": "secret_manager_secret", + "name": "daos_ca", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "secret_manager_secret_iam_policy", + "name": "daos_ca_secret_policy", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_default_service_account", + "name": "default", + "provider": "google", + "source": "hashicorp/google", + "mode": "data", + "version": "latest", + "description": null + }, { "type": "compute_image", "name": "os_image", @@ -266,6 +309,15 @@ "mode": "data", "version": "latest", "description": null + }, + { + "type": "iam_policy", + "name": "daos_ca_secret_version_manager", + "provider": "google", + "source": "hashicorp/google", + "mode": "data", + "version": "latest", + "description": null } ] } diff --git a/terraform/modules/daos_server/outputs.tf b/terraform/modules/daos_server/outputs.tf index ddcd733..03e68b6 100644 --- a/terraform/modules/daos_server/outputs.tf +++ b/terraform/modules/daos_server/outputs.tf @@ -42,3 +42,13 @@ output "daos_client_install_script" { description = "Script to install the DAOS client package." value = local.daos_client_install_script_content } + +output "daos_ca_secret_id" { + description = "ID of Secret Manager secret used to store daosCA.tar.gz file generated on first DAOS server instance" + value = google_secret_manager_secret.daos_ca.name +} + +output "default_service_account_email" { + description = "Default service account email" + value = data.google_compute_default_service_account.default.email +} diff --git a/terraform/modules/daos_server/templates/daos_agent.yml.tftpl b/terraform/modules/daos_server/templates/daos_agent.yml.tftpl index c5c3517..fd4cf5f 100644 --- a/terraform/modules/daos_server/templates/daos_agent.yml.tftpl +++ b/terraform/modules/daos_server/templates/daos_agent.yml.tftpl @@ -7,4 +7,9 @@ access_points: ${jsonencode(access_points)} transport_config: - allow_insecure: true + allow_insecure: ${allow_insecure} + %{ if !allow_insecure } + ca_cert: /etc/daos/certs/daosCA.crt + cert: /etc/daos/certs/agent.crt + key: /etc/daos/certs/agent.key + %{ endif } diff --git a/terraform/modules/daos_server/templates/daos_control.yml.tftpl b/terraform/modules/daos_server/templates/daos_control.yml.tftpl index 071a5e1..f67a470 100644 --- a/terraform/modules/daos_server/templates/daos_control.yml.tftpl +++ b/terraform/modules/daos_server/templates/daos_control.yml.tftpl @@ -3,5 +3,11 @@ # hostlist: ${jsonencode(servers)} + transport_config: - allow_insecure: true + allow_insecure: ${allow_insecure} + %{ if !allow_insecure } + ca_cert: /etc/daos/certs/daosCA.crt + cert: /etc/daos/certs/admin.crt + key: /etc/daos/certs/admin.key + %{ endif } diff --git a/terraform/modules/daos_server/templates/daos_server.yml.tftpl b/terraform/modules/daos_server/templates/daos_server.yml.tftpl index b9937b9..ee41a68 100644 --- a/terraform/modules/daos_server/templates/daos_server.yml.tftpl +++ b/terraform/modules/daos_server/templates/daos_server.yml.tftpl @@ -3,8 +3,16 @@ # access_points: ${jsonencode(access_points)} + transport_config: - allow_insecure: true + allow_insecure: ${allow_insecure} + %{ if !allow_insecure } + client_cert_dir: /etc/daos/certs/clients + ca_cert: /etc/daos/certs/daosCA.crt + cert: /etc/daos/certs/server.crt + key: /etc/daos/certs/server.key + %{ endif } + provider: ofi+tcp;ofi_rxm disable_vfio: true crt_timeout: ${crt_timeout} diff --git a/terraform/modules/daos_server/templates/daos_startup_script.tftpl b/terraform/modules/daos_server/templates/daos_startup_script.tftpl index be000f2..3da01ec 100644 --- a/terraform/modules/daos_server/templates/daos_startup_script.tftpl +++ b/terraform/modules/daos_server/templates/daos_startup_script.tftpl @@ -3,35 +3,50 @@ METADATA_URL="http://metadata.google.internal/computeMetadata/v1/instance/attributes" DAOS_SERVER_SYSTEMD_FILE="/usr/lib/systemd/system/daos_server.service" DAOS_CONFIG_DIR="/etc/daos" -DAOS_MOUNT_DIR="/var/daos" +DAOS_DIR="/var/daos" +DAOS_MOUNT_DIR="$${DAOS_DIR}" +FIRST_DAOS_SERVER_HOSTNAME="${first_server}" fetch_attr() { local attr_name=$* - curl -s ${METADATA_URL}/${attr_name} -H "Metadata-Flavor: Google" + curl -s $${METADATA_URL}/$${attr_name} -H "Metadata-Flavor: Google" } -echo "BEGIN: Setting up DAOS server" +echo "BEGIN: DAOS Server Startup Script" systemctl stop daos_server -# Create server config files -mkdir -p "${DAOS_CONFIG_DIR}" -cd "${DAOS_CONFIG_DIR}" +# Create server config files from metadata +mkdir -p "$${DAOS_CONFIG_DIR}" +cd "$${DAOS_CONFIG_DIR}" fetch_attr "daos_server_yaml_content" > daos_server.yml fetch_attr "daos_control_yaml_content" > daos_control.yml fetch_attr "daos_agent_yaml_content" > daos_agent.yml +chown -R root:root /etc/daos # Create directory for engine logs and tmpfs mount point -mkdir -p "${DAOS_MOUNT_DIR}" +mkdir -p "$${DAOS_MOUNT_DIR}" # Modify systemd script for GCP -# First, run daos_server as root since GCP does not support VFIO -sed -i "s/User=daos_server/User=root/; s/Group=daos_server/Group=root/" ${DAOS_SERVER_SYSTEMD_FILE} +# GCP does not support VFIO so daos_server must run as root +sed -i "s/User=daos_server/User=root/; s/Group=daos_server/Group=root/" $${DAOS_SERVER_SYSTEMD_FILE} + +if [[ "$${HOSTNAME}" == "$${FIRST_DAOS_SERVER_HOSTNAME}" ]]; then + # Only run on the first DAOS server instance + echo "Running startup script on first DAOS server" + %{ if !allow_insecure } + # Generate CA and certs. Store in Secret Manager. + $${DAOS_DIR}/cert_gen/sm_set_ca.sh "${daos_ca_secret_id}" + %{ endif } +fi + +%{ if !allow_insecure ~} +# Get certs from Secret Manager and deploy them +$${DAOS_DIR}/cert_gen/sm_get_ca.sh "${daos_ca_secret_id}" "server" +%{ endif ~} -# enable daos_server in systemd (will be started automatically at boot time) systemctl enable daos_server - systemctl start daos_server -echo "END: Setting up DAOS server" +echo "END: DAOS Server Startup Script" diff --git a/terraform/modules/daos_server/variables.tf b/terraform/modules/daos_server/variables.tf index b0ff4d4..5ace393 100644 --- a/terraform/modules/daos_server/variables.tf +++ b/terraform/modules/daos_server/variables.tf @@ -131,7 +131,8 @@ variable "service_account" { "https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append"] + "https://www.googleapis.com/auth/trace.append", + "https://www.googleapis.com/auth/cloud-platform"] } } @@ -169,3 +170,9 @@ variable "gvnic" { default = false type = bool } + +variable "allow_insecure" { + description = "Sets the allow_insecure setting in the transport_config section of the daos_*.yml files" + default = false + type = bool +}