diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..5754143 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +--- +repos: + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.64.0 + hooks: + - id: terraform_fmt + - id: terraform_tflint + args: + - --args=--config=__GIT_WORKING_DIR__/.tflint.hcl + - id: terraform_validate + - repo: local + hooks: + - id: terraform-readme + name: terraform-readme + entry: tools/autodoc/terraform_docs.sh + language: script + types: ['terraform'] + exclude: \.terraform\/.*$ + pass_filenames: true + require_serial: true + - id: packer-readme + name: packer-readme + entry: tools/autodoc/terraform_docs.sh + language: script + files: ^.*\.pkr\.hcl$ + pass_filenames: true + require_serial: true diff --git a/.tfdocs-json.yaml b/.tfdocs-json.yaml new file mode 100644 index 0000000..d38f0e1 --- /dev/null +++ b/.tfdocs-json.yaml @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +formatter: json + +# do not use lockfile to determine version ranges placed into docs +settings: + lockfile: false + +output: + file: module.json + mode: replace + template: | + {{ .Content }} diff --git a/.tfdocs-markdown.yaml b/.tfdocs-markdown.yaml new file mode 100644 index 0000000..aa55c34 --- /dev/null +++ b/.tfdocs-markdown.yaml @@ -0,0 +1,27 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +formatter: markdown + +# do not use lockfile to determine version ranges placed into docs +settings: + lockfile: false + +output: + file: README.md + mode: inject + template: |- + + {{ .Content }} + diff --git a/.tflint.hcl b/.tflint.hcl new file mode 100644 index 0000000..fcaf9cd --- /dev/null +++ b/.tflint.hcl @@ -0,0 +1,38 @@ +plugin "google" { + enabled = true + version = "0.12.1" + source = "github.com/terraform-linters/tflint-ruleset-google" +} +rule "terraform_deprecated_index" { + enabled = true +} +rule "terraform_unused_declarations" { + enabled = true +} +rule "terraform_documented_variables" { + enabled = true +} +rule "terraform_comment_syntax" { + enabled = true +} +rule "terraform_documented_outputs" { + enabled = true +} +rule "terraform_documented_variables" { + enabled = true +} +rule "terraform_typed_variables" { + enabled = true +} +rule "terraform_naming_convention" { + enabled = true +} +rule "terraform_required_version" { + enabled = true +} +rule "terraform_required_providers" { + enabled = true +} +rule "terraform_unused_required_providers" { + enabled = true +} diff --git a/README.md b/README.md index 29f7f31..333f140 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,98 @@ -# Distributed Asynchronous Object Storage (DAOS) on Google Cloud Platform (GCP) +# DAOS on GCP -This repository contains scripts to deploy DAOS on GCP. +Distributed Asynchronous Object Storage ([DAOS](https://docs.daos.io/)) on Google Cloud Platform ([GCP](https://cloud.google.com/)) -It consists of the directories: -- [images](images) - which contains scripts to prepare DAOS images for GCP used by Terraform code -- [terraform](terraform) - which contains Terraform code used to deploy DAOS on GCP +This repository contains: + +- [Packer](https://www.packer.io/) scripts used to build DAOS images with [Google Cloud Build](https://cloud.google.com/build) +- [Terraform](https://www.terraform.io/) modules that can be used to deploy DAOS Server and Client instances +- [Terraform](https://www.terraform.io/) examples that demonstrate how to use the Terraform modules + +**Directory structure** + +``` +. +├── docs Miscellaneousc documentation and Cloud Shell tutorials +├── images Cloud Build config files and Packer templates +│ └── scripts Scripts that Packer runs to configure images +├── terraform Terraform content +│ ├── examples Examples that demonstrate how to use the DAOS Terraform modules +│ └── modules Terraform modules for deploying DAOS server and client instances +└── tools Tools used by pre-commit +``` + +### Prerequisites + +In order to deploy DAOS on GCP you will need + +- **Access to the Google Cloud Platform (GCP)** + + See [Get Started with Google Cloud](https://cloud.google.com/docs/get-started) + +- **A GCP Project** + + See [Creating and managing projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects) + +- **Required Software** + + The documentation in this repository assumes that you will use [Cloud Shell](https://cloud.google.com/shell). + + If you use [Cloud Shell](https://cloud.google.com/shell), you do not need to install any software on your system. + + If you do not want to use Cloud Shell, you will need to install + - [Git](https://git-scm.com/) + - [Google Cloud CLI](https://cloud.google.com/sdk/docs/install) + - [Terraform](https://learn.hashicorp.com/tutorials/terraform/install-cli) + +### Deploy DAOS on GCP + +Steps to deploy DAOS on GCP + +1. **Set defaults for Google Cloud CLI (```gcloud```)** + + Only needs to be done once in your shell (Cloud Shell or local shell). + +2. **Create a Packer image in your GCP project** + + In order to build DAOS images with Cloud Build your GCP project must contain a Packer image. + + Building the Packer images only needs to be done once for a GCP project. + +3. **Build DAOS Server and Client images** + + DAOS Server and Client instances are deployed using images that have DAOS pre-installed. + + Therefore, the images need to be built prior to running Terraform. + + > Click the button to open an interactive walk-through in Cloud Shell which will guide you through the steps listed above. + > + > [![DAOS on GCP Setup](http://gstatic.com/cloudssh/images/open-btn.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/daos-stack/google-cloud-daos&cloudshell_git_branch=main&shellonly=true&tutorial=docs/tutorials/daosgcp_setup.md) + +4. **Use DAOS Terraform modules in your Terraform code** + + You will need to write your own Terraform code for your particular use case. + + Your Terraform code can use the modules in ```terraform/modules``` to deploy DAOS server and client instances. + + The example Terraform configurations provided in ```terraform/examples``` can be used as a reference. + + See the [DAOS Cluster](terraform/examples/daos_cluster/README.md) example to learn more about how to use the ```terraform/modules```. + +## Links + +- [Distributed Asynchronous Object Storage (DAOS)](https://docs.daos.io/) +- [Google Cloud Platform (GCP)](https://cloud.google.com/) +- [Google Cloud CLI (gcloud)](https://cloud.google.com/cli) +- [Google Cloud Build](https://cloud.google.com/build) +- [Cloud Shell](https://cloud.google.com/shell) +- [Packer](https://www.packer.io/) +- [Terraform](https://www.terraform.io/) + + +## Development + +If you are contributing to the code in this repo, see [Development](docs/development.md) + +## License + +[Apache License Version 2.0](LICENSE) diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 0000000..fbc201d --- /dev/null +++ b/docs/development.md @@ -0,0 +1,139 @@ +# Development + +Please use the [pre-commit](https://pre-commit.com/) hooks configured in this repository to ensure that all Terraform modules are validated and properly documented before pushing code changes. + + +## Install pre-commit and dependencies + +In order to use [pre-commit](https://pre-commit.com/) you will need to install it on your system. + +You will also need to install the dependencies that are required for the pre-commit plugins used in this repository. + +1. Install [pre-commit](https://pre-commit.com/). + + [pre-commit](https://pre-commit.com/) can be installed using standard package managers. + + Instructions can be found at the [pre-commit website](https://pre-commit.com/#install). + + +2. Install [TFLint](https://github.com/terraform-linters/tflint) + + See the [installation instructions](https://github.com/terraform-linters/tflint#installation) + + After installing tflint change into the root of the locally cloned git repo and run the `init` command. + + ```shell + cd + tflint --init + ``` + +3. Install terraform-docs + + See [https://github.com/terraform-docs/terraform-docs](https://github.com/terraform-docs/terraform-docs) + + +4. MacOS only + + MacOS users will need to install `findutils` and `coreutils`. + + Before installing coreutils read the + [gotchas about coreutils](https://www.pixelbeat.org/docs/coreutils-gotchas.html) + to ensure that the installation will not negatively impact your + system. + + **Homebrew** + + ```shell + brew install findutils + brew install coreutils + ``` + + **Conda** + + ```shell + brew install findutils + conda install coreutils + ``` + + Update your PATH in your `~/.bashrc` or `~/.bash_profile` + ```shell + PATH="/usr/local/opt/coreutils/libexec/gnubin:$PATH" + ``` + +## Install the pre-commit hook + +After you have installed [pre-commit](https://pre-commit.com/) and its dependencies on your system you can need to install the pre-commit hook in +your local clone of the google-cloud-daos git repository. + +```shell +cd +pre-commit install +``` + +## Running pre-commit + +[pre-commit](https://pre-commit.com/) will now run on any files that are staged when you run `git commit -s`. + +To run [pre-commit](https://pre-commit.com/) on all files prior to staging them + +```shell +pre-commit run --all-files +``` + +## Updating Cloud Shell urls in documentation + +Several of the README.md files in this repository contain links that open tutorials in Cloud Shell. + +In order for these links to work properly during development the URLs must be changed to point to the correct branch. + +Currently Cloud Shell tutorials do not have an automatic way to detect a branch. Therefore, the branch parameter in the URL must be updated manually. + +The `tools/autodoc/cloudshell_urls.sh` script should be used to update the branch parameter in all Cloud Shell URLs that are present in *.md files in this repo. + +### Update Cloud Shell URLs when submitting a PR + +If your PR changes README.md files that contain Cloud Shell URLs, then prior to requesting a review you should run the following command and push any changes to your dev branch. + +```bash +tools/autodoc/cloudshell_urls.sh --repo-url --branch +``` + +This will allow the reviewers to run Cloud Shell tutorials from your PR branch. + +### Update Cloud Shell URLs before merging to a branch + +If you are merging changes to `*.md` files with Cloud Shell URLs in them you need to ensure that the URLs are updated with the name of the target branch before you merge. + +This is not ideal but it's the only way we can think of doing things for now. + +Let's say that you have a PR that has been approved and you want to merge it to the `develop` branch. + +Prior to merging you need to run + +```bash +tools/autodoc/cloudshell_urls.sh --repo-url --branch develop +``` + +And then commit the changes in your dev branch. + +Once that is done you can then merge to the `develop` branch. + +Now let's say that you want to merge the `develop` branch into the `main` branch. + +You will need to check out the https://github.com/daos-stack/google-cloud-daos `develop` branch and run + +```bash +tools/autodoc/cloudshell_urls.sh --repo-url https://github.com/daos-stack/google-cloud-daos --branch main +``` + +Commit the changes and push them to the develop branch. After doing this you can merge the `develop` branch to `main`. + +Now you will need to set the URLs back to the develop branch. + +```bash +tools/autodoc/cloudshell_urls.sh --repo-url https://github.com/daos-stack/google-cloud-daos --branch develop +``` + +Commit the changes and push them to the develop branch. + +This is very tedious. We will continue to seek out a better solution for maintaining the Cloud Shell URLs. diff --git a/docs/tutorials/daosgcp_setup.md b/docs/tutorials/daosgcp_setup.md new file mode 100644 index 0000000..c932b60 --- /dev/null +++ b/docs/tutorials/daosgcp_setup.md @@ -0,0 +1,159 @@ +# DAOS GCP Setup + +In this walkthrough you will + +1. Set defaults for Google Cloud CLI (```gcloud```) +2. Create a Packer image in your GCP project +3. Build DAOS Server and Client images with Packer in Cloud Build + +After completing this walkthrough you will be able to run Terraform to deploy DAOS Server and Client instances. + +## Project Selection + +Select the project that you would like to use for deploying DAOS in GCP. + + + +> Note that when running scripts and examples from the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository, charges will be +> incurred within the selected project. +> +> Always be sure to run ```terraform destroy``` when you no longer need your instances. + +Click **Start** to continue + +## Set ```gcloud``` defaults + +Many of the scripts and examples in the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository use three default configuration settings in your Google Cloud CLI (```gcloud```) configuration. + +The default settings are + +1. project +2. region +3. zone + + +### Set Default Project + +To set the default project run + +```bash +gcloud config set project {{project-id}} +``` + +### Set Default Region + +To set the default region run + +```bash +gcloud config set compute/region us-central1 +``` + +The ```us-central1``` region is recommended but feel free to change as needed. + +### Set Default Zone + +To set the default zone run + +```bash +gcloud config set compute/zone us-central1-f +``` + +The ```us-central1-f``` zone is recommended but feel free to change as needed. + +### Defaults Set! + +You have now set the necessary defaults required for the scripts and examples in the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository. + +Click **Next** to continue + +## Create Packer Image + +DAOS images are built using [Packer](https://www.packer.io/) in [Cloud Build](https://cloud.google.com/build). + +In order to run Packer in Cloud Build you need to provision an instance from an image that has Packer installed. + +Therfore, in order to build DAOS images with Packer in Cloud Build, your GCP project must contain a Packer image. + +Creating the Packer image only needs to be done once in the GCP project. + +### Enable APIs + +To enable the necessary APIs for Cloud Build run: + +```bash +gcloud services enable sourcerepo.googleapis.com +gcloud services enable compute.googleapis.com +gcloud services enable servicemanagement.googleapis.com +gcloud services enable storage-api.googleapis.com +``` + +### Required IAM permissions + +The Cloud Build service account requires the editor role. + +To grant the editor role to the service account run: + +```bash +CLOUD_BUILD_ACCOUNT=$(gcloud projects get-iam-policy {{project-id}} --filter="(bindings.role:roles/cloudbuild.builds.builder)" --flatten="bindings[].members" --format="value(bindings.members[])") + +gcloud projects add-iam-policy-binding {{project-id}} \ + --member "${CLOUD_BUILD_ACCOUNT}" \ + --role roles/compute.instanceAdmin +``` + +### Create the Packer Image + +Cloud Build provides the [Packer community builder image](https://github.com/GoogleCloudPlatform/cloud-builders-community/tree/master/packer). + +To build the Packer image run: + +```bash +pushd . +cd ~/ +git clone https://github.com/GoogleCloudPlatform/cloud-builders-community.git +cd cloud-builders-community/packer +gcloud builds submit . +rm -rf ~/cloud-builders-community +popd +``` + +
+ +You have completed the necessary steps to create your Packer image which will be used to build DAOS images with Packer in Cloud Build. + +Click **Next** to continue + +## Build DAOS Server and Client images + +In order to use Terraform to provision DAOS Server and Client instances you need to build images that have DAOS pre-installed. + +To build the DAOS Server and Client instances run: + +```bash +pushd . +cd images +./build_images.sh --type all +popd +``` + +It will take a few minutes for the images to build. + +Wait for the image build to complete. + +Click **Next** to continue + +## DAOS GCP Setup Complete + + + +You can now begin using Terraform to provision DAOS Server and Client instances in the **{{project-id}}** project! + +**Next Steps** + +- Read the terraform/modules/daos_client/README.md file +- Read the terraform/modules/daos_server/README.md file +- View the files in the ```terraform/examples/daos_cluster``` directory +- Open a tutorial that walks you through the process of deploying a DAOS cluster using the ```terraform/examples/daos_cluster``` example. + ```bash + cloudshell launch-tutorial ./docs/tutorials/example_daos_cluster.md + ``` diff --git a/docs/tutorials/example_daos_cluster.md b/docs/tutorials/example_daos_cluster.md new file mode 100644 index 0000000..b55dc8d --- /dev/null +++ b/docs/tutorials/example_daos_cluster.md @@ -0,0 +1,306 @@ +# DAOS GCP Full Cluster Example + +## Overview + +In this tutorial you will + +1. Use Terraform to deploy a DAOS cluster using the example configuration in ```terraform/examples/daos_cluster``` +2. Perform the following DAOS administration tasks + - Format Storage + - Create a Pool + - Create a Container + - Mount the storage on the clients +3. Copy files to DAOS mounted storage +4. Tear down the DAOS cluster deployment + +Click on **Start** + +## Setup and Requirements + +Before continuing, it is assumed that you have completed the following steps in your GCP project. + +1. Set defaults for Google Cloud CLI (```gcloud```) in Cloud Shell +2. Create a Packer image in your GCP project +3. Build DAOS Server and Client images with Packer in Cloud Build + +If you have not yet completed these steps, you can open a tutorial in Cloud Shell that will guide you through each step. + +If you are not sure if you have completed these steps run + +```bash +gcloud compute images list --filter="name:daos" --format="value(name)" +``` + +If you see `daos-server-*` and `daos-client-*` images, click **Next** to continue to the next step. + +Otherwise, run another tutorial that walks you though the steps listed above. + +```bash +teachme docs/tutorials/daosgcp_setup.md +``` + +## The daos_cluster example + +The example Terraform configuration in [terraform/examples/daos_cluster](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/examples/daos_cluster) demonstrates how the [DAOS Terraform Modules](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules) can be used in your own Terraform code. + +Change into the example directory now + +```bash +cd terraform/examples/daos_cluster +``` + +This will be our working directory for the rest of the tutorial. + +Click **Next** to continue + +## Create a `terraform.tfvars` file + +You need to create a `terraform.tfvars` file that contains variable values for Terraform. + +There are many variables to configure DAOS server and client configurations. Changes to certain variable values often require corresponding changes in other variable values. Depending on your use case this can become a complex topic. + +To simplify the task of setting the proper variable values for a working DAOS cluster, there are example tfvars files that can be copied to create a `terraform.tfvars` file. + +Select one of the example files to copy to `terraform.tfvars`. + +The example tfvars files are: + +1. `terraform.tfvars.tco.example` + + 16 DAOS Clients, 4 DAOS Servers with 16 375GB NVMe SSDs per server. + + To use this configuration run + ```bash + cp terraform.tfvars.tco.example terraform.tfvars + ``` + +2. `terraform.tfvars.perf.example` + + 16 DAOS Clients, 4 DAOS Servers with 4 375GB NVMe SSDs per server. + + To use this configuration run + ```bash + cp terraform.tfvars.perf.example terraform.tfvars + ``` + +Click **Next** to continue + +## Modify `terraform.tfvars` + +Now that you have created a `terraform.tfvars` file, there is one change that needs to be made in the file. + +You need to replace the `` placeholder with your project id. + +To replace the `` placeholder run + +```bash +PROJECT_ID=$(gcloud config list --format 'value(core.project)') +sed -i "s//${PROJECT_ID}/g" terraform.tfvars +``` + +To view the `terraform.tfvars` file in the Cloud Shell editor run + +```bash +cloudshell edit terraform.tfvars +``` + +Notice that the `terraform.tfvars` file contains the values for variables that are defined in `variables.tf` + +```bash +cloudshell edit variables.tf +``` + +Click **Next** to continue + +## View `main.tf` + +Open the `main.tf` file + +```bash +cloudshell edit main.tf +``` + +Notice how the `main.tf` uses the modules in [terraform/modules](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules). + +The `terraform/examples/daos_cluster` Terraform configuration only needed to define the variables that are passed to the `daos_server` and `daos_client` modules in `main.tf`. + +The variable definitions are in `variables.tf`. + +The variable values are set in `terrafrom.tfvars`. + +Click **Next** to continue + +## Run Terraform to Deploy the DAOS cluster + +You can now deploy a DAOS cluster using the `terraform/examples/daos_cluster` example configuration. + +Initialize the working directory. + +```bash +terraform init -input=false +``` + +Create an execution plan. + +```bash +terraform plan -out=tfplan -input=false +``` + +Execute the actions in the plan. + +```bash +terraform apply -input=false tfplan +``` + +List the instances that were created. + +Terraform will create 2 [managed instance groups (MIGs)](https://cloud.google.com/compute/docs/instance-groups) that will create the DAOS server and client instances. + +It may take some time for the instances to become available. + +To see the list of instances run + +```bash +gcloud compute instances list --filter="name ~ daos.*" --format="value(name,INTERNAL_IP)" +``` + +Click **Next** to continue + +## Prepare storage + +When the DAOS server and client instances are deployed the DAOS services are started but the DAOS storage is not ready to use yet. + +There are a few administrative tasks that must be performed before the DAOS storage can be used. + +The DAOS Management Tool (`dmg`) is installed on all DAOS client instances and can be used to perform administrative tasks. + +You can use `dmg` on any of the DAOS client instances. + +Log into the first DAOS client instance + +```bash +gcloud compute ssh daos-client-0001 +``` + +If you are prompted to create an SSH key pair for gcloud, follow the prompts. + +Click **Next** to continue + +## Storage Format + +When the DAOS server instances are created the `daos_server` service will be started but will be in "maintenance mode". + +In order to begin using the storage you must issue a *format* command. + +To format the storage run + +```bash +dmg storage format +``` + +To learn more see [Storage Formatting](https://docs.daos.io/latest/admin/deployment/#storage-formatting) + +Click **Next** to continue + +## Create pool + +Now that the system has been formatted you can create a Pool. + +First check to see how much free NVMe storage you have. + +```bash +dmg storage query usage +``` + +This will return something like + +``` +Hosts SCM-Total SCM-Free SCM-Used NVMe-Total NVMe-Free NVMe-Used +----- --------- -------- -------- ---------- --------- --------- +daos-server-0001 107 GB 107 GB 0 % 3.2 TB 3.2 TB 0 % +``` + +> If the values in the columns are showing zeros, wait for 1-2 minutes and run the command again. +> +> Even though the `dmg storage format` command returned immediately it can sometimes take a few minutes for the storage system to be ready. +> +> You will know it's ready when you no longer see zeros in the output > from the `dmg storage query usage` command. + +In the example output above there is one server with a total of 3.2TB > of free space. + +With that information you know you can create a 3TB pool. + +Create the pool. + +```bash +dmg pool create -z 3TB -t 3 -u ${USER} --label=daos_pool +``` + +For more information about pools see + +- [https://docs.daos.io/latest/overview/storage/#daos-pool](https://docs.daos.io/latest/overview/storage/#daos-pool) +- [https://docs.daos.io/latest/admin/pool_operations/](https://docs.daos.io/latest/admin/pool_operations/) + +Click **Next** to continue + +### Create container + +Now that a pool has been created, create a container in that pool + +```bash +daos container create --type=POSIX --properties=rf:0 --label=daos_cont daos_pool +``` + +For more information about containers see [https://docs.daos.io/latest/overview/storage/#daos-container](https://docs.daos.io/latest/overview/storage/#daos-container) + +Click **Next** to continue + +## Mount container + +The container now needs to be mounted. + +To mount the container run + +```bash +MOUNT_DIR="/tmp/daos_test1" +mkdir -p "${MOUNT_DIR}" +dfuse --singlethread --pool=daos_pool --container=daos_cont --mountpoint="${MOUNT_DIR}" +df -h -t fuse.daos +``` + +Your DAOS storage is now ready! + +You can now store files in `/tmp/daos_test1` + +Click **Next** to continue + +## Shutting it down + +If you are still logged into the first DAOS client instance, log out now. + +To shut down the DAOS cluster run + +```bash +terraform destroy +``` + +Click **Next** to continue + +## Congratulations! + + + +You have completed a DAOS cluster deployment on GCP! + +In this tutorial you used the Terraform example configuration in `terraform/examples/daos_cluster` to deploy a DAOS cluster. + +You then performed the following administration tasks: + +1. Formatted storage +2. Created a pool +3. Created a container +4. Mounted the container + +What's next? + +See [https://docs.daos.io](https://docs.daos.io) to learn more about DAOS! diff --git a/images/configs/daos_agent.yml b/images/configs/daos_agent.yml deleted file mode 100644 index 9064545..0000000 --- a/images/configs/daos_agent.yml +++ /dev/null @@ -1,3 +0,0 @@ -access_points: [changeap] -transport_config: - allow_insecure: true diff --git a/images/configs/daos_control.yml b/images/configs/daos_control.yml deleted file mode 100644 index 7658cee..0000000 --- a/images/configs/daos_control.yml +++ /dev/null @@ -1,3 +0,0 @@ -hostlist: [changehosts] -transport_config: - allow_insecure: true diff --git a/images/daos-server-image.json b/images/daos-server-image.json index 0b0d788..7105463 100644 --- a/images/daos-server-image.json +++ b/images/daos-server-image.json @@ -40,24 +40,6 @@ "./scripts/tune.sh", "./scripts/install_daos.sh" ] - }, - { - "type": "file", - "source": "./configs", - "destination": "/tmp/" - }, - { - "type": "file", - "source": "./scripts/gcp_metadata.sh", - "destination": "/tmp/" - }, - { - "type": "shell", - "execute_command": "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}", - "environment_vars": [ - "DAOS_VERSION={{user `daos_version`}}" - ], - "script": "./scripts/setup_server.sh" } ] } diff --git a/images/daos_version.sh b/images/daos_version.sh index 5401660..6205524 100644 --- a/images/daos_version.sh +++ b/images/daos_version.sh @@ -1,7 +1,7 @@ #!/bin/bash # Default DAOS version to be installed in images -export DEFAULT_DAOS_VERSION="2.0.1" +export DEFAULT_DAOS_VERSION="2.0" # Default DAOS packages repo export DEFAULT_DAOS_REPO_BASE_URL="https://packages.daos.io" diff --git a/images/scripts/gcp_metadata.sh b/images/scripts/gcp_metadata.sh deleted file mode 100644 index db68c97..0000000 --- a/images/scripts/gcp_metadata.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -readonly srv_yaml="/etc/daos/daos_server.yml" -readonly agt_yaml="/etc/daos/daos_agent.yml" -readonly ctl_yaml="/etc/daos/daos_control.yml" - -readonly URL="http://metadata.google.internal/computeMetadata/v1/instance/attributes" - -fetch_attr() -{ - local name=$* - echo `curl -s ${URL}/${name} -H "Metadata-Flavor: Google"` -} - -# Update access points only once -if ! grep -q "changeap" "$srv_yaml" "$agt_yaml"; then - echo "Access points already populated, skipping" - exit 0 -fi - -echo "Extracting instance metadata ..." - -inst_type=`fetch_attr inst_type` -if [[ ! "$inst_type" == "daos-server" ]]; then - echo "Instance type is ${inst_type} and not daos-server, skipping" - exit 1 -fi - -base_name=`fetch_attr inst_base_name` -inst_nr=`fetch_attr inst_nr` -# generate list of hosts -hosts=\'`printf "%s-[%04d-%04d]" "${base_name}" 1 ${inst_nr}`\' - -echo "Selecting access points among ${hosts}..." - -if [[ ${inst_nr} -ge 5 ]]; then - # Support up to 2 instance failures - apnr=5 -elif [[ ${inst_nr} -ge 3 ]]; then - # Support single instance failure - apnr=3 -else - # single-replica, no failure supported - apnr=1 -fi -# choose contiguous access points until we know more about fault domains -# host range not supported in the yaml file yet -# ap=\'`printf "%s-[%04d-%04d]" "${base_name}" 0 $((apnr-1))`\' -# so list each node individually -ap="" -for i in `seq 1 ${apnr}`; do - name=`printf "%s-%04d" "${base_name}" ${i}` - if [[ "$ap" == "" ]]; then - ap=\'${name}\' - else - ap=$ap,\'${name}\' - fi -done -echo "${ap} selected as access points" - -echo "Updating yaml files ..." -sed -i "s/hostlist.*/hostlist: [${hosts}]/g" ${ctl_yaml} -sed -i "s/access_points.*/access_points: [${ap}]/g" ${srv_yaml} -sed -i "s/access_points.*/access_points: [${ap}]/g" ${agt_yaml} - -echo "All done" diff --git a/images/scripts/setup_server.sh b/images/scripts/setup_server.sh deleted file mode 100644 index 86989ff..0000000 --- a/images/scripts/setup_server.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: Move this setup to terraform - -readonly yaml_path="/etc/daos" -readonly meta_path="/usr/share/daos/gcp_metadata.sh" -readonly systemd_file="/usr/lib/systemd/system/daos_server.service" - -echo "Setting up DAOS server" - -# Template config files have been copied by packer to /tmp/daos_configs -cp -f /tmp/configs/* ${yaml_path} -chown -f daos_server.daos_server ${yaml_path}/*.yml -rm -rf /tmp/configs - -# Copy script parsing instance metadata -cp -f /tmp/gcp_metadata.sh ${meta_path} -chown -f daos_server.daos_server ${meta_path} -chmod +x ${meta_path} - -# Create directory for engine logs and tmpfs mount point -mkdir -p /var/daos -chown -f daos_server.daos_server /var/daos - -# Modify systemd script for GCP -# First, run daos_server as root since GCP does not support VFIO -sed -i "s/User=daos_server/User=root/; s/Group=daos_server/Group=root/" ${systemd_file} -# Then, run gcp_metadata.sh before starting the service -# by using ExecStartPre in systemd unit file -sed -i "/^ExecStart.*/a ExecStartPre=${meta_path}" ${systemd_file} - -# enable daos_server in systemd (will be started automatically at boot time) -systemctl enable daos_server diff --git a/terraform/README.md b/terraform/README.md index 80fe914..8302e7a 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -1,19 +1,8 @@ -# Terraform deployment of Distributed Asynchronous Object Storage (DAOS) on Google Cloud Platform (GCP) +# Terraform -This directory contains Terraform code to deploy DAOS on GCP. +This directory contains [Terraform](https://www.terraform.io/) code for deploying [DAOS](https://docs.daos.io/) on [GCP](https://cloud.google.com/). -This module consists of a collection of Terraform submodules to deploy DAOS client and server instances on GCP. -Below is the list of available submodules: - -* [DAOS Server](modules/daos_server) -* [DAOS Client](modules/daos_client) - -To deploy full DAOS cluster use [full_cluster_setup](examples/full_cluster_setup) example. - -## Compatibility - -This module is meant to use with Terraform 0.14. - -## Examples - -[examples](examples) directory contains Terraform code of how to use these particular submodules. +| Subdirectory | Description | +| ------------ | ----------- | +| [examples](examples) | Contains example Terraform configurations for [DAOS](https://docs.daos.io/) deployment. | +| [modules](modules) | Contains Terraform modules that can be used in your own Terraform code to deploy DAOS server and DAOS client instances on GCP.| diff --git a/terraform/examples/README.md b/terraform/examples/README.md new file mode 100644 index 0000000..d93da8c --- /dev/null +++ b/terraform/examples/README.md @@ -0,0 +1,10 @@ +# Terraform Examples + +This directory includes examples of Terraform configurations for different types of [DAOS](https://docs.daos.io/) deployments in GCP. + +| Subdirectory | Description | +| --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [daos_cluster](daos_cluster) | Example Terraform configuration for a DAOS cluster consisting of servers and clients | +| [io500](io500) | Example that uses custom client images that have [IO500](https://github.com/IO500/io500) installed. Uses the daos_cluster example to deploy a DAOS cluster with the IO500 client images. | +| [only_daos_client](only_daos_client) | Example Terraform configuration for DAOS clients only | +| [only_daos_server](./only_daos_server/) | Example Terraform configuration for DAOS servers only | diff --git a/terraform/examples/daos_client_mig/README.md b/terraform/examples/daos_client_mig/README.md deleted file mode 100644 index fb69e62..0000000 --- a/terraform/examples/daos_client_mig/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# DAOS Client MIG - -Creates a managed instance group running ```number_of_instances``` DAOS clients. - -A DAOS client is simply an instance based on an image containing all DAOS packages and dependencies (can be created with the image scripts). - -## Requirements - -Please make sure you go through the [Requirements section](../../modules/daos_client/README.md) of the DAOS client module. - -| Name | Version | -|------|---------| -| [google](#requirement\_google) | ~> 3.54 | - -## Setup - -1. Create ```terraform.tfvars``` in this directory or the directory where you're running this example. -2. Copy the ```terraform.tfvars.example``` content into ```terraform.tfvars``` file and update the contents to match your environment. - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [daos\_client](#module\_daos\_client) | ../../modules/daos_client | n/a | - -## Resources - -No resources. - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [instance\_base\_name](#input\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-client"` | no | -| [machine\_type](#input\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `"n2-highmem-16"` | no | -| [mig\_name](#input\_mig\_name) | MIG name | `string` | `"daos-client"` | no | -| [labels](#input\_labels) | Set of key/value label pairs to assign to daos-client instances | `any` | n/a | no | -| [network](#input\_network) | GCP network to use | `string` | n/a | yes | -| [number\_of\_instances](#input\_number\_of\_instances) | Number of daos clients to bring up | `number` | `2` | no | -| [os\_disk\_size\_gb](#input\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | -| [os\_disk\_type](#input\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | -| [os\_family](#input\_os\_family) | OS GCP image family | `any` | `null` | no | -| [os\_project](#input\_os\_project) | OS GCP image project name | `any` | `null` | no | -| [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | -| [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | -| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | n/a | yes | -| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | n/a | yes | -| [template\_name](#input\_template\_name) | MIG template name | `string` | `"daos-client"` | no | -| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | n/a | yes | - -## Outputs - -No outputs. diff --git a/terraform/examples/daos_client_mig/main.tf b/terraform/examples/daos_client_mig/main.tf deleted file mode 100644 index b34dd68..0000000 --- a/terraform/examples/daos_client_mig/main.tf +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2021 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "google" { - region = var.region -} - -module "daos_client" { - source = "../../modules/daos_client" - project_id = var.project_id - network = var.network - subnetwork = var.subnetwork - subnetwork_project = var.subnetwork_project - region = var.region - zone = var.zone - labels = var.labels - - number_of_instances = var.number_of_instances - - instance_base_name = var.instance_base_name - os_disk_size_gb = var.os_disk_size_gb - os_disk_type = var.os_disk_type - template_name = var.template_name - mig_name = var.mig_name - machine_type = var.machine_type - os_project = var.os_project - os_family = var.os_family -} diff --git a/terraform/examples/daos_client_mig/terraform.tfvars.example b/terraform/examples/daos_client_mig/terraform.tfvars.example deleted file mode 100644 index 9f4a7eb..0000000 --- a/terraform/examples/daos_client_mig/terraform.tfvars.example +++ /dev/null @@ -1,22 +0,0 @@ -project_id="" -network = "" -subnetwork = "" -subnetwork_project = "" -region = "europe-west4" -zone = "europe-west4-a" -labels = { - example="daos_client_mig" -} - -number_of_instances = 2 - -instance_base_name = "daos-client" -os_disk_size_gb = 20 -os_disk_type = "pd-ssd" -template_name = "daos-client" -mig_name = "daos-client" -machine_type = "n2-highmem-16" # 10x physical cores, using 8x targets - -# the image must be built before (see build folder) -os_project = "" -os_family = "daos-client-hpc-centos-7" diff --git a/terraform/examples/daos_cluster/README.md b/terraform/examples/daos_cluster/README.md new file mode 100644 index 0000000..0d78059 --- /dev/null +++ b/terraform/examples/daos_cluster/README.md @@ -0,0 +1,271 @@ +# DAOS Cluster Example + +This example Terraform configuration demonstrates how to use the [DAOS Terraform Modules](../../modules) to deploy a DAOS cluster consisting of servers and clients. + +> +> The current version of the [daos_server](../../modules/daos_server) Terraform module does not yet support automation of the following administration tasks +> +> - storage format +> - pool creation +> - container creation +> - mounting container +> +> These steps must be performed manually by an administrator after the DAOS Server and Client instances have been deployed with Terraform. +> +> Instructions for performing the manual steps will be provided in the documentation for this example. + +## Setup + +The following steps must be performed prior to running this example. + +1. Set defaults for Google Cloud CLI (```gcloud```) +2. Create a Packer image in your GCP project +3. Build DAOS Server and Client images + +If you have not completed these steps yet, click the button below to open an interactive walkthrough in [Cloud Shell](https://cloud.google.com/shell). After completing the walkthrough your GCP project will contain the images required to run this Terraform example. + +[![DAOS on GCP Setup](http://gstatic.com/cloudssh/images/open-btn.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/daos-stack/google-cloud-daos&cloudshell_git_branch=main&shellonly=true&tutorial=docs/tutorials/daosgcp_setup.md) + +## Deploy a DAOS cluster with this example + +Click the button below to open a [Cloud Shell](https://cloud.google.com/shell) tutorial that uses this example to deploy a DAOS Cluster in GCP. + +After completing the tutorial you will have a basic understanding of how to use the [DAOS Terraform Modules](../../modules) in your own Terraform configurations as well as how to perform basic administration steps on the DAOS instances after they are deployed. + +[![DAOS on GCP Setup](http://gstatic.com/cloudssh/images/open-btn.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/daos-stack/google-cloud-daos&cloudshell_git_branch=main&shellonly=true&tutorial=docs/tutorials/example_daos_cluster.md) + +## Terraform Files + +List of Terraform files in this example + +| Filename | Description | +| ----------------------------- | ------------------------------------------------------------------------------- | +| main.tf | Main Terrform configuration file containing resource definitions | +| variables.tf | Variable definitions for variables used in main.tf | +| versions.tf | Provider definitions | +| terraform.tfvars.perf.example | Pre-Configured set of set of variables focused on performance | +| terraform.tfvars.tco.example | Pre-Configured set of set of variables focused on lower total cost of ownership | + +## Create a `terraform.tfvars` file + +Before you run `terraform apply` to deploy a DAOS cluster with this example you need to create a `terraform.tfvars` file in the `terraform/examples/daos_cluster` directory. + +The `terraform.tfvars` file will contain the variable values that are used by the `main.tf` configuration file. + +To ensure a successful deployment of a DAOS cluster there are two `terraform.tfvars.*.example` files that you can choose from. + +You will need to decide which of these files you will copy to `terraform.tfvars`. + +### The terraform.tfvars.tco.example file + +The `terraform.tfvars.tco.example` contains variables for a cluster deployment with 16 DAOS Clients, 4 DAOS Servers with 16 375GB NVMe SSDs per server. + +To use the `terraform.tfvars.tco.example` file run + +```bash +cp terraform.tfvars.tco.example terraform.tfvars +``` + +### The terraform.tfvars.perf.example file + +The `terraform.tfvars.perf.example` contains variables for a cluster deployment with 16 DAOS Clients, 4 DAOS Servers with 4 375GB NVMe SSDs per server. + +To use the ```terraform.tfvars.perf.example``` file run + +```bash +cp terraform.tfvars.perf.example terraform.tfvars +``` + +### Update `terraform.tfvars` with your project id + +Now that you have a `terraform.tfvars` file you need to replace the `` placeholder in the file with your project id. + +To update the project id in `terraform.tfvars` run + +```bash +PROJECT_ID=$(gcloud config list --format 'value(core.project)') +sed -i "s//${PROJECT_ID}/g" terraform.tfvars +``` + +## Deploy the DAOS cluster with the example Terraform configuration + +> **Billing Notification!** +> +> Running this example will incur charges in your project. +> +> To avoid surprises, be sure to monitor your costs associated with running this example. +> +> Don't forget to shut down the DAOS cluster with `terraform destroy` when you are finished. + +To deploy the DAOS cluster + +```bash +cd terraform/examples/daos_cluster +terraform init -input=false +terraform plan -out=tfplan -input=false +terraform apply -input=false tfplan +``` + +## Perform DAOS administration tasks + +After your DAOS cluster has been deployed you can log into the first DAOS client instance to perform administrative tasks. + +### Log into the first DAOS client instance + +Find the name and IP of the first client instance + +```bash +gcloud compute instances list --filter="name ~ daos-client.*-0001" --format="value(name,INTERNAL_IP)" +``` +Let's assume the name of the first client is `daos-client-0001` + +Log into the first client instance + +```bash +gcloud compute ssh daos-client-0001 +``` + +### Format Storage + +Format the storage system. + +```bash +dmg storage format +``` + +Upon successful format, DAOS Control Servers will start DAOS I/O engines that have been specified in the server config file. + +For more information see the [Storage Formatting section in the Administration Guide](https://docs.daos.io/latest/admin/deployment/#storage-formatting) + +### Create a Pool + +Now that the system has been formatted a Pool can be created. + +Check free NVMe storage. + +```bash +dmg storage query usage +``` + +This will return something like + +``` +Hosts SCM-Total SCM-Free SCM-Used NVMe-Total NVMe-Free NVMe-Used +----- --------- -------- -------- ---------- --------- --------- +daos-server-0001 107 GB 107 GB 0 % 3.2 TB 3.2 TB 0 % +``` + +In the example output above there is one server with a total of 3.2TB of free space. + +With that information you know you can create a 3TB pool. + +Create the pool. + +```bash +dmg pool create -z 3TB -t 3 -u ${USER} --label=daos_pool +``` + +For more information about pools see + +- https://docs.daos.io/latest/overview/storage/#daos-pool +- https://docs.daos.io/latest/admin/pool_operations/ + + +### Create a Container + +Create a container in the pool + +```bash +daos container create --type=POSIX --properties=rf:0 --label=daos_cont daos_pool +``` + +For more information about containers see https://docs.daos.io/latest/overview/storage/#daos-container + +### Mount + +Mount the storage with `dfuse` + +```bash +MOUNT_DIR="/tmp/daos_test1" +mkdir -p "${MOUNT_DIR}" +dfuse --singlethread --pool=daos_pool --container=daos_cont --mountpoint="${MOUNT_DIR}" +df -h -t fuse.daos +``` + +You can now store files in the DAOS container mounted on `/tmp/daos_test1`. + +## Remove DAOS cluster deployment + +To destroy the DAOS cluster run + +```bash +terraform destroy +``` + +This will shut down all DAOS server and client instances. + +# Terraform Documentation for this Example + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.14.5 | +| [google](#requirement\_google) | >= 3.54.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [daos\_client](#module\_daos\_client) | ../../modules/daos_client | n/a | +| [daos\_server](#module\_daos\_server) | ../../modules/daos_server | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [client\_instance\_base\_name](#input\_client\_instance\_base\_name) | MIG instance base names to use | `string` | `null` | no | +| [client\_labels](#input\_client\_labels) | Set of key/value label pairs to assign to daos-client instances | `any` | `{}` | no | +| [client\_machine\_type](#input\_client\_machine\_type) | GCP machine type. e.g. e2-medium | `string` | `null` | no | +| [client\_mig\_name](#input\_client\_mig\_name) | MIG name | `string` | `null` | no | +| [client\_number\_of\_instances](#input\_client\_number\_of\_instances) | Number of daos servers to bring up | `number` | `null` | no | +| [client\_os\_disk\_size\_gb](#input\_client\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | +| [client\_os\_disk\_type](#input\_client\_os\_disk\_type) | OS disk type e.g. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | +| [client\_os\_family](#input\_client\_os\_family) | OS GCP image family | `string` | `null` | no | +| [client\_os\_project](#input\_client\_os\_project) | OS GCP image project name | `string` | `null` | no | +| [client\_preemptible](#input\_client\_preemptible) | If preemptible client instances | `string` | `true` | no | +| [client\_template\_name](#input\_client\_template\_name) | MIG template name | `string` | `null` | no | +| [network](#input\_network) | GCP network to use | `string` | `"default"` | no | +| [project\_id](#input\_project\_id) | The GCP project to use | `string` | `null` | no | +| [region](#input\_region) | The GCP region to create and test resources in | `string` | `null` | no | +| [server\_daos\_crt\_timeout](#input\_server\_daos\_crt\_timeout) | crt\_timeout | `number` | `null` | no | +| [server\_daos\_disk\_count](#input\_server\_daos\_disk\_count) | Number of local ssd's to use | `number` | `null` | no | +| [server\_daos\_scm\_size](#input\_server\_daos\_scm\_size) | scm\_size | `number` | `null` | no | +| [server\_instance\_base\_name](#input\_server\_instance\_base\_name) | MIG instance base names to use | `string` | `null` | no | +| [server\_labels](#input\_server\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | +| [server\_machine\_type](#input\_server\_machine\_type) | GCP machine type. e.g. e2-medium | `string` | `null` | no | +| [server\_mig\_name](#input\_server\_mig\_name) | MIG name | `string` | `null` | no | +| [server\_number\_of\_instances](#input\_server\_number\_of\_instances) | Number of daos servers to bring up | `number` | `null` | no | +| [server\_os\_disk\_size\_gb](#input\_server\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | +| [server\_os\_disk\_type](#input\_server\_os\_disk\_type) | OS disk type e.g. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | +| [server\_os\_family](#input\_server\_os\_family) | OS GCP image family | `string` | `null` | no | +| [server\_os\_project](#input\_server\_os\_project) | OS GCP image project name | `string` | `null` | no | +| [server\_preemptible](#input\_server\_preemptible) | If preemptible server instances | `string` | `true` | no | +| [server\_template\_name](#input\_server\_template\_name) | MIG template name | `string` | `null` | no | +| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | `"default"` | no | +| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | +| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/terraform/examples/daos_cluster/main.tf b/terraform/examples/daos_cluster/main.tf new file mode 100644 index 0000000..111d330 --- /dev/null +++ b/terraform/examples/daos_cluster/main.tf @@ -0,0 +1,49 @@ +provider "google" { + region = var.region +} + +module "daos_server" { + source = "../../modules/daos_server" + project_id = var.project_id + region = var.region + zone = var.zone + network = var.network + subnetwork_project = var.subnetwork_project + subnetwork = var.subnetwork + number_of_instances = var.server_number_of_instances + labels = var.server_labels + preemptible = var.server_preemptible + mig_name = var.server_mig_name + template_name = var.server_template_name + instance_base_name = var.server_instance_base_name + machine_type = var.server_machine_type + os_family = var.server_os_family + os_project = var.server_os_project + os_disk_type = var.server_os_disk_type + os_disk_size_gb = var.server_os_disk_size_gb + daos_disk_count = var.server_daos_disk_count + daos_crt_timeout = var.server_daos_crt_timeout + daos_scm_size = var.server_daos_scm_size +} + +module "daos_client" { + source = "../../modules/daos_client" + project_id = var.project_id + region = var.region + zone = var.zone + network = var.network + subnetwork_project = var.subnetwork_project + subnetwork = var.subnetwork + number_of_instances = var.client_number_of_instances + labels = var.client_labels + preemptible = var.client_preemptible + mig_name = var.client_mig_name + template_name = var.client_template_name + instance_base_name = var.client_instance_base_name + machine_type = var.client_machine_type + os_family = var.client_os_family + os_project = var.client_os_project + os_disk_type = var.client_os_disk_type + os_disk_size_gb = var.client_os_disk_size_gb + access_points = module.daos_server.access_points +} diff --git a/terraform/examples/daos_cluster/module.json b/terraform/examples/daos_cluster/module.json new file mode 100644 index 0000000..2cdbb9c --- /dev/null +++ b/terraform/examples/daos_cluster/module.json @@ -0,0 +1,250 @@ +{ + "header": "", + "footer": "", + "inputs": [ + { + "name": "client_instance_base_name", + "type": "string", + "description": "MIG instance base names to use", + "default": null, + "required": false + }, + { + "name": "client_labels", + "type": "any", + "description": "Set of key/value label pairs to assign to daos-client instances", + "default": {}, + "required": false + }, + { + "name": "client_machine_type", + "type": "string", + "description": "GCP machine type. e.g. e2-medium", + "default": null, + "required": false + }, + { + "name": "client_mig_name", + "type": "string", + "description": "MIG name ", + "default": null, + "required": false + }, + { + "name": "client_number_of_instances", + "type": "number", + "description": "Number of daos servers to bring up", + "default": null, + "required": false + }, + { + "name": "client_os_disk_size_gb", + "type": "number", + "description": "OS disk size in GB", + "default": 20, + "required": false + }, + { + "name": "client_os_disk_type", + "type": "string", + "description": "OS disk type e.g. pd-ssd, pd-standard", + "default": "pd-ssd", + "required": false + }, + { + "name": "client_os_family", + "type": "string", + "description": "OS GCP image family", + "default": null, + "required": false + }, + { + "name": "client_os_project", + "type": "string", + "description": "OS GCP image project name", + "default": null, + "required": false + }, + { + "name": "client_preemptible", + "type": "string", + "description": "If preemptible client instances", + "default": true, + "required": false + }, + { + "name": "client_template_name", + "type": "string", + "description": "MIG template name", + "default": null, + "required": false + }, + { + "name": "network", + "type": "string", + "description": "GCP network to use", + "default": "default", + "required": false + }, + { + "name": "project_id", + "type": "string", + "description": "The GCP project to use ", + "default": null, + "required": false + }, + { + "name": "region", + "type": "string", + "description": "The GCP region to create and test resources in", + "default": null, + "required": false + }, + { + "name": "server_daos_crt_timeout", + "type": "number", + "description": "crt_timeout", + "default": null, + "required": false + }, + { + "name": "server_daos_disk_count", + "type": "number", + "description": "Number of local ssd's to use", + "default": null, + "required": false + }, + { + "name": "server_daos_scm_size", + "type": "number", + "description": "scm_size", + "default": null, + "required": false + }, + { + "name": "server_instance_base_name", + "type": "string", + "description": "MIG instance base names to use", + "default": null, + "required": false + }, + { + "name": "server_labels", + "type": "any", + "description": "Set of key/value label pairs to assign to daos-server instances", + "default": {}, + "required": false + }, + { + "name": "server_machine_type", + "type": "string", + "description": "GCP machine type. e.g. e2-medium", + "default": null, + "required": false + }, + { + "name": "server_mig_name", + "type": "string", + "description": "MIG name ", + "default": null, + "required": false + }, + { + "name": "server_number_of_instances", + "type": "number", + "description": "Number of daos servers to bring up", + "default": null, + "required": false + }, + { + "name": "server_os_disk_size_gb", + "type": "number", + "description": "OS disk size in GB", + "default": 20, + "required": false + }, + { + "name": "server_os_disk_type", + "type": "string", + "description": "OS disk type e.g. pd-ssd, pd-standard", + "default": "pd-ssd", + "required": false + }, + { + "name": "server_os_family", + "type": "string", + "description": "OS GCP image family", + "default": null, + "required": false + }, + { + "name": "server_os_project", + "type": "string", + "description": "OS GCP image project name", + "default": null, + "required": false + }, + { + "name": "server_preemptible", + "type": "string", + "description": "If preemptible server instances", + "default": true, + "required": false + }, + { + "name": "server_template_name", + "type": "string", + "description": "MIG template name", + "default": null, + "required": false + }, + { + "name": "subnetwork", + "type": "string", + "description": "GCP sub-network to use", + "default": "default", + "required": false + }, + { + "name": "subnetwork_project", + "type": "string", + "description": "The GCP project where the subnetwork is defined", + "default": null, + "required": false + }, + { + "name": "zone", + "type": "string", + "description": "The GCP zone to create and test resources in", + "default": null, + "required": false + } + ], + "modules": [ + { + "name": "daos_client", + "source": "../../modules/daos_client", + "version": "", + "description": null + }, + { + "name": "daos_server", + "source": "../../modules/daos_server", + "version": "", + "description": null + } + ], + "outputs": [], + "providers": [], + "requirements": [ + { + "name": "terraform", + "version": "\u003e= 0.14.5" + }, + { + "name": "google", + "version": "\u003e= 3.54.0" + } + ], + "resources": [] +} diff --git a/terraform/examples/daos_cluster/terraform.tfvars.perf.example b/terraform/examples/daos_cluster/terraform.tfvars.perf.example new file mode 100644 index 0000000..ce1a83c --- /dev/null +++ b/terraform/examples/daos_cluster/terraform.tfvars.perf.example @@ -0,0 +1,39 @@ +project_id = "" +region = "us-central1" +zone = "us-central1-f" +network = "default" +subnetwork_project = "" +subnetwork = "default" + +# Server +server_number_of_instances = 4 +server_labels = { + example = "daos_cluster" +} +server_preemptible = "false" +server_mig_name = "daos-server" +server_template_name = "daos-server" +server_instance_base_name = "daos-server" +server_machine_type = "n2-standard-16" +server_os_family = "daos-server-centos-7" +server_os_project = "" +server_os_disk_type = "pd-ssd" +server_os_disk_size_gb = 20 +server_daos_disk_count = 4 +server_daos_crt_timeout = 300 +server_daos_scm_size = 45 + +# Client +client_number_of_instances = 16 +client_labels = { + example = "daos_cluster" +} +client_preemptible = "false" +client_mig_name = "daos-client" +client_template_name = "daos-client" +client_instance_base_name = "daos-client" +client_machine_type = "c2-standard-16" +client_os_family = "daos-client-hpc-centos-7" +client_os_project = "" +client_os_disk_type = "pd-ssd" +client_os_disk_size_gb = 20 diff --git a/terraform/examples/full_cluster_setup/terraform.tfvars.example b/terraform/examples/daos_cluster/terraform.tfvars.tco.example similarity index 54% rename from terraform/examples/full_cluster_setup/terraform.tfvars.example rename to terraform/examples/daos_cluster/terraform.tfvars.tco.example index 5471fc5..63de587 100644 --- a/terraform/examples/full_cluster_setup/terraform.tfvars.example +++ b/terraform/examples/daos_cluster/terraform.tfvars.tco.example @@ -1,34 +1,39 @@ -project_id = "" +project_id = "" +region = "us-central1" +zone = "us-central1-f" network = "default" +subnetwork_project = "" subnetwork = "default" -subnetwork_project = "" -region = "us-central1" -zone = "us-central1-a" -preemptible = "false" + # Server -server_number_of_instances = 1 -server_daos_disk_count = 8 -server_instance_base_name = "daos-server" -server_os_disk_size_gb = 20 -server_os_disk_type = "pd-ssd" -server_template_name = "daos-server" +server_number_of_instances = 16 +server_labels = { + example = "daos_cluster" +} +server_preemptible = "false" server_mig_name = "daos-server" -server_machine_type = "n2-highmem-32" -server_os_project = "" +server_template_name = "daos-server" +server_instance_base_name = "daos-server" +server_machine_type = "n2-custom-36-215040" server_os_family = "daos-server-centos-7" -server_labels = { - example = "full_cluster_setup" -} +server_os_project = "" +server_os_disk_type = "pd-ssd" +server_os_disk_size_gb = 20 +server_daos_disk_count = 16 +server_daos_crt_timeout = 300 +server_daos_scm_size = 180 + # Client -client_number_of_instances = 1 -client_instance_base_name = "daos-client" -client_os_disk_size_gb = 20 -client_os_disk_type = "pd-ssd" -client_template_name = "daos-client" +client_number_of_instances = 2 +client_labels = { + example = "daos_cluster" +} +client_preemptible = "false" client_mig_name = "daos-client" +client_template_name = "daos-client" +client_instance_base_name = "daos-client" client_machine_type = "c2-standard-16" -client_os_project = "" client_os_family = "daos-client-hpc-centos-7" -client_labels = { - example = "full_cluster_setup" -} \ No newline at end of file +client_os_project = "" +client_os_disk_type = "pd-ssd" +client_os_disk_size_gb = 20 diff --git a/terraform/examples/full_cluster_setup/variables.tf b/terraform/examples/daos_cluster/variables.tf similarity index 88% rename from terraform/examples/full_cluster_setup/variables.tf rename to terraform/examples/daos_cluster/variables.tf index 11489e5..61c5ec8 100644 --- a/terraform/examples/full_cluster_setup/variables.tf +++ b/terraform/examples/daos_cluster/variables.tf @@ -14,53 +14,76 @@ variable "zone" { default = null } +variable "network" { + description = "GCP network to use" + default = "default" + type = string +} + +variable "subnetwork_project" { + description = "The GCP project where the subnetwork is defined" + type = string + default = null +} + +variable "subnetwork" { + description = "GCP sub-network to use" + default = "default" + type = string +} + +variable "server_number_of_instances" { + description = "Number of daos servers to bring up" + default = null + type = number +} + variable "server_labels" { description = "Set of key/value label pairs to assign to daos-server instances" type = any default = {} } -variable "client_labels" { - description = "Set of key/value label pairs to assign to daos-client instances" - type = any - default = {} +variable "server_preemptible" { + description = "If preemptible server instances" + default = true + type = string } - -variable "server_os_family" { - description = "OS GCP image family" +variable "server_mig_name" { + description = "MIG name " default = null type = string } -variable "client_os_family" { - description = "OS GCP image family" +variable "server_template_name" { + description = "MIG template name" default = null type = string } -variable "server_os_project" { - description = "OS GCP image project name" +variable "server_instance_base_name" { + description = "MIG instance base names to use" default = null type = string } -variable "client_os_project" { - description = "OS GCP image project name" +variable "server_machine_type" { + description = "GCP machine type. e.g. e2-medium" default = null type = string } -variable "server_os_disk_size_gb" { - description = "OS disk size in GB" - default = 20 - type = number +variable "server_os_family" { + description = "OS GCP image family" + default = null + type = string } -variable "client_os_disk_size_gb" { - description = "OS disk size in GB" - default = 20 - type = number +variable "server_os_project" { + description = "OS GCP image project name" + default = null + type = string } variable "server_os_disk_type" { @@ -69,114 +92,92 @@ variable "server_os_disk_type" { type = string } -variable "client_os_disk_type" { - description = "OS disk type e.g. pd-ssd, pd-standard" - default = "pd-ssd" - type = string +variable "server_os_disk_size_gb" { + description = "OS disk size in GB" + default = 20 + type = number } -variable "server_template_name" { - description = "MIG template name" +variable "server_daos_disk_count" { + description = "Number of local ssd's to use" default = null - type = string + type = number } -variable "client_template_name" { - description = "MIG template name" +variable "server_daos_crt_timeout" { + description = "crt_timeout" default = null - type = string + type = number } -variable "server_mig_name" { - description = "MIG name " +variable "server_daos_scm_size" { + description = "scm_size" default = null - type = string + type = number } -variable "client_mig_name" { - description = "MIG name " +variable "client_number_of_instances" { + description = "Number of daos servers to bring up" default = null - type = string + type = number } -variable "server_machine_type" { - description = "GCP machine type. e.g. e2-medium" - default = null - type = string +variable "client_labels" { + description = "Set of key/value label pairs to assign to daos-client instances" + type = any + default = {} } -variable "client_machine_type" { - description = "GCP machine type. e.g. e2-medium" - default = null +variable "client_preemptible" { + description = "If preemptible client instances" + default = true type = string } -variable "network" { - description = "GCP network to use" +variable "client_mig_name" { + description = "MIG name " default = null type = string } -variable "subnetwork" { - description = "GCP sub-network to use" +variable "client_template_name" { + description = "MIG template name" default = null type = string } -variable "subnetwork_project" { - description = "The GCP project where the subnetwork is defined" - type = string - default = null -} - -variable "server_instance_base_name" { +variable "client_instance_base_name" { description = "MIG instance base names to use" default = null type = string } -variable "client_instance_base_name" { - description = "MIG instance base names to use" +variable "client_machine_type" { + description = "GCP machine type. e.g. e2-medium" default = null type = string } -variable "server_number_of_instances" { - description = "Number of daos servers to bring up" +variable "client_os_family" { + description = "OS GCP image family" default = null - type = number + type = string } -variable "client_number_of_instances" { - description = "Number of daos servers to bring up" +variable "client_os_project" { + description = "OS GCP image project name" default = null - type = number + type = string } -variable "daos_disk_type" { - description = "Daos disk type to use. For now only suported one is local-ssd" - default = "local-ssd" +variable "client_os_disk_type" { + description = "OS disk type e.g. pd-ssd, pd-standard" + default = "pd-ssd" type = string } -variable "server_daos_disk_count" { - description = "Number of local ssd's to use" - default = null +variable "client_os_disk_size_gb" { + description = "OS disk size in GB" + default = 20 type = number } - -variable "daos_service_account_scopes" { - description = "Scopes for the DAOS server service account" - default = [ - "userinfo-email", - "compute-ro", - "storage-ro" - ] - type = list(string) -} - -variable "preemptible" { - description = "If preemptible instances" - default = true - type = string -} diff --git a/terraform/examples/daos_cluster/versions.tf b/terraform/examples/daos_cluster/versions.tf new file mode 100644 index 0000000..7caeb30 --- /dev/null +++ b/terraform/examples/daos_cluster/versions.tf @@ -0,0 +1,6 @@ +terraform { + required_version = ">= 0.14.5" + required_providers { + google = ">= 3.54.0" + } +} diff --git a/terraform/examples/full_cluster_setup/README.md b/terraform/examples/full_cluster_setup/README.md deleted file mode 100644 index d9fe4b5..0000000 --- a/terraform/examples/full_cluster_setup/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Full DAOS cluster setup - -Deploys full DAOS cluster with servers and clients. - -## Usage - -1. Create ```terraform.tfvars``` in this directory or the directory where you're running this example. -2. Copy the ```terraform.tfvars.example``` content into ```terraform.tfvars``` file and update the contents to match your environment. -3. Run below commands to deploy DAOS cluster: - -``` -terraform init -input=false -terraform plan -out=tfplan -input=false -terraform apply -input=false tfplan -``` - -To destroy DAOS environment, use below command: - -``` -terraform destroy -``` diff --git a/terraform/examples/full_cluster_setup/main.tf b/terraform/examples/full_cluster_setup/main.tf deleted file mode 100644 index 9721e48..0000000 --- a/terraform/examples/full_cluster_setup/main.tf +++ /dev/null @@ -1,50 +0,0 @@ -provider "google" { - region = var.region -} - -module "daos_server" { - source = "../../modules/daos_server" - project_id = var.project_id - network = var.network - subnetwork = var.subnetwork - subnetwork_project = var.subnetwork_project - region = var.region - zone = var.zone - labels = var.server_labels - - number_of_instances = var.server_number_of_instances - daos_disk_count = var.server_daos_disk_count - - instance_base_name = var.server_instance_base_name - os_disk_size_gb = var.server_os_disk_size_gb - os_disk_type = var.server_os_disk_type - template_name = var.server_template_name - mig_name = var.server_mig_name - machine_type = var.server_machine_type - os_project = var.server_os_project - os_family = var.server_os_family - preemptible = var.preemptible -} - -module "daos_client" { - source = "../../modules/daos_client" - project_id = var.project_id - network = var.network - subnetwork = var.subnetwork - subnetwork_project = var.subnetwork_project - region = var.region - zone = var.zone - labels = var.client_labels - - number_of_instances = var.client_number_of_instances - - instance_base_name = var.client_instance_base_name - os_disk_size_gb = var.client_os_disk_size_gb - os_disk_type = var.client_os_disk_type - template_name = var.client_template_name - mig_name = var.client_mig_name - machine_type = var.client_machine_type - os_project = var.client_os_project - os_family = var.client_os_family - preemptible = var.preemptible -} diff --git a/terraform/examples/io500/README.md b/terraform/examples/io500/README.md index 885f46e..df950cd 100644 --- a/terraform/examples/io500/README.md +++ b/terraform/examples/io500/README.md @@ -1,14 +1,14 @@ # IO500 Example -This example leverages another example `terraform/examples/full_cluster_setup` +This example leverages another example [full_cluster_setup](../full_cluster_setup) to provision a DAOS cluster and configure the clients so that an IO500 benchmark may be run. -The default configuration is a very small DAOS cluster consisiting of 1 DAOS +The default configuration is a very small DAOS cluster consisting of 1 DAOS server and 1 DAOS client. Different configurations can be used to deploy larger DAOS clusters. -See Configuration below. +See [Configuration](#configuration) below. ## Dependencies @@ -25,7 +25,7 @@ The example is intended to be run from a Linux system that has access to GCP. ### Deploying the DAOS Cluster -Run start.sh +Run [start.sh](start.sh) script: ```bash cd terraform/examples/io500 @@ -39,7 +39,7 @@ logging into the first daos-client instance and running the IO500 benchmark. ### Shutting Down the DAOS Cluster -To destroy the DAOS instances run the `stop.sh` script. +To destroy the DAOS instances run the [stop.sh](stop.sh) script. ```bash cd terraform/examples/io500 @@ -106,4 +106,3 @@ These are called by other scripts and used for - building custom images with the IO500 - configuring instances - cleaning DAOS storage before each IO500 run - diff --git a/terraform/examples/io500/config/config.sh b/terraform/examples/io500/config/config.sh index eaae24e..9793e09 100644 --- a/terraform/examples/io500/config/config.sh +++ b/terraform/examples/io500/config/config.sh @@ -28,7 +28,7 @@ DAOS_CLIENT_INSTANCE_COUNT="1" DAOS_CLIENT_MACHINE_TYPE=c2-standard-16 # c2-standard-16 n2-standard-2 # Storage -DAOS_POOL_SIZE="$(( 375 * ${DAOS_SERVER_DISK_COUNT} * ${DAOS_SERVER_INSTANCE_COUNT} / 1000 ))TB" +DAOS_POOL_SIZE="$(awk -v disk_count=${DAOS_SERVER_DISK_COUNT} -v server_count=${DAOS_SERVER_INSTANCE_COUNT} 'BEGIN {pool_size = 375 * disk_count * server_count / 1000; print pool_size"TB"}')" DAOS_CONT_REPLICATION_FACTOR="rf:0" # IO500 @@ -58,6 +58,8 @@ export TF_VAR_preemptible="${PREEMPTIBLE_INSTANCES}" # Servers export TF_VAR_server_number_of_instances=${DAOS_SERVER_INSTANCE_COUNT} export TF_VAR_server_daos_disk_count=${DAOS_SERVER_DISK_COUNT} +export TF_VAR_server_daos_crt_timeout=${DAOS_SERVER_CRT_TIMEOUT} +export TF_VAR_server_daos_scm_size=${DAOS_SERVER_SCM_SIZE} export TF_VAR_server_instance_base_name="${DAOS_SERVER_BASE_NAME}" export TF_VAR_server_os_disk_size_gb=20 export TF_VAR_server_os_disk_type="pd-ssd" diff --git a/terraform/examples/io500/config/config_1c_1s_8d.sh b/terraform/examples/io500/config/config_1c_1s_8d.sh index 81da0b3..9c8cc38 100644 --- a/terraform/examples/io500/config/config_1c_1s_8d.sh +++ b/terraform/examples/io500/config/config_1c_1s_8d.sh @@ -28,7 +28,7 @@ DAOS_CLIENT_INSTANCE_COUNT="1" DAOS_CLIENT_MACHINE_TYPE=c2-standard-16 # Storage -DAOS_POOL_SIZE="$(( 375 * ${DAOS_SERVER_DISK_COUNT} * ${DAOS_SERVER_INSTANCE_COUNT} / 1000 ))TB" +DAOS_POOL_SIZE="$(awk -v disk_count=${DAOS_SERVER_DISK_COUNT} -v server_count=${DAOS_SERVER_INSTANCE_COUNT} 'BEGIN {pool_size = 375 * disk_count * server_count / 1000; print pool_size"TB"}')" DAOS_CONT_REPLICATION_FACTOR="rf:0" # IO500 @@ -59,6 +59,8 @@ export TF_VAR_preemptible="${PREEMPTIBLE_INSTANCES}" # Servers export TF_VAR_server_number_of_instances=${DAOS_SERVER_INSTANCE_COUNT} export TF_VAR_server_daos_disk_count=${DAOS_SERVER_DISK_COUNT} +export TF_VAR_server_daos_crt_timeout=${DAOS_SERVER_CRT_TIMEOUT} +export TF_VAR_server_daos_scm_size=${DAOS_SERVER_SCM_SIZE} export TF_VAR_server_instance_base_name="${DAOS_SERVER_BASE_NAME}" export TF_VAR_server_os_disk_size_gb=20 export TF_VAR_server_os_disk_type="pd-ssd" diff --git a/terraform/examples/io500/config/config_2c_2s_16d.sh b/terraform/examples/io500/config/config_2c_2s_16d.sh index 217e3e1..3723115 100644 --- a/terraform/examples/io500/config/config_2c_2s_16d.sh +++ b/terraform/examples/io500/config/config_2c_2s_16d.sh @@ -28,7 +28,7 @@ DAOS_CLIENT_INSTANCE_COUNT="2" DAOS_CLIENT_MACHINE_TYPE=c2-standard-16 # Storage -DAOS_POOL_SIZE="$(( 375 * ${DAOS_SERVER_DISK_COUNT} * ${DAOS_SERVER_INSTANCE_COUNT} / 1000 ))TB" +DAOS_POOL_SIZE="$(awk -v disk_count=${DAOS_SERVER_DISK_COUNT} -v server_count=${DAOS_SERVER_INSTANCE_COUNT} 'BEGIN {pool_size = 375 * disk_count * server_count / 1000; print pool_size"TB"}')" DAOS_CONT_REPLICATION_FACTOR="rf:0" # IO500 @@ -59,6 +59,8 @@ export TF_VAR_preemptible="${PREEMPTIBLE_INSTANCES}" # Servers export TF_VAR_server_number_of_instances=${DAOS_SERVER_INSTANCE_COUNT} export TF_VAR_server_daos_disk_count=${DAOS_SERVER_DISK_COUNT} +export TF_VAR_server_daos_crt_timeout=${DAOS_SERVER_CRT_TIMEOUT} +export TF_VAR_server_daos_scm_size=${DAOS_SERVER_SCM_SIZE} export TF_VAR_server_instance_base_name="${DAOS_SERVER_BASE_NAME}" export TF_VAR_server_os_disk_size_gb=20 export TF_VAR_server_os_disk_type="pd-ssd" diff --git a/terraform/examples/io500/start.sh b/terraform/examples/io500/start.sh index 45e8c69..8a0b1d7 100755 --- a/terraform/examples/io500/start.sh +++ b/terraform/examples/io500/start.sh @@ -443,6 +443,16 @@ copy_files_to_first_client() { } +propagate_ssh_keys_to_all_nodes () { + # Clear ~/.ssh/known_hosts so we don't run into any issues + ssh -q -F "${SSH_CONFIG_FILE}" "${FIRST_CLIENT_IP}" \ + "clush --hostfile=hosts_all --dsh 'rm -f ~/.ssh/known_hosts'" + + # Copy ~/.ssh directory to all instances + ssh -q -F "${SSH_CONFIG_FILE}" "${FIRST_CLIENT_IP}" \ + "clush --hostfile=hosts_all --dsh --copy ~/.ssh --dest ~/" +} + configure_daos() { log "Configure DAOS instances" ssh -q -F "${SSH_CONFIG_FILE}" ${FIRST_CLIENT_IP} "~/configure_daos.sh" @@ -485,7 +495,7 @@ main() { configure_first_client_nat_ip configure_ssh copy_files_to_first_client - configure_daos + propagate_ssh_keys_to_all_nodes show_instances show_run_steps } diff --git a/terraform/examples/only_daos_client/README.md b/terraform/examples/only_daos_client/README.md new file mode 100644 index 0000000..e28763b --- /dev/null +++ b/terraform/examples/only_daos_client/README.md @@ -0,0 +1,140 @@ +# DAOS Client Example + +This example Terraform configuration demonstrates how to use the [DAOS Client Terraform Module](../../modules/daos_client) in your own Terraform code to deploy a group of DAOS clients. + +## Setup + +The following steps must be performed prior to deploying DAOS clients. + +1. Set defaults for Google Cloud CLI (```gcloud```) +2. Create a Packer image in your GCP project +3. Build DAOS client images + +If you have not completed these steps yet, click the button below to open an interactive walkthrough in [Cloud Shell](https://cloud.google.com/shell). After completing the walkthrough your GCP project will contain the images required to run this Terraform example. + +[![DAOS on GCP Setup](http://gstatic.com/cloudssh/images/open-btn.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/daos-stack/google-cloud-daos&cloudshell_git_branch=main&shellonly=true&tutorial=docs/tutorials/daosgcp_setup.md) + +## Terraform Files + +List of Terraform files in this example + +| Filename | Description | +| ----------------------------- | ------------------------------------------------------------------------------- | +| main.tf | Main Terrform configuration file containing resource definitions | +| variables.tf | Variable definitions for variables used in main.tf | +| versions.tf | Provider definitions | +| terraform.tfvars.example | Pre-Configured set of set of variables | + +## Create a terraform.tfvars file + +Before you run `terraform apply` with this example you need to create a `terraform.tfvars` file in the `terraform/examples/only_daos_server` directory. + +The `terraform.tfvars` file will contain the variable values that are used by the `main.tf` configuration file. + +Copy the `terraform.tfvars.example` to `terraform.tfvars` and then modify it to contain your GCP project info. + + +```bash +cp terraform.tfvars.example terraform.tfvars +GCP_PROJECT=$(gcloud config list --format='value(core.project)') +sed -i "s//${GCP_PROJECT}/g" terraform.tfvars +``` + +### Update the `access_points` variable + +This example assumes there is an existing group of DAOS server instances that the clients will connect to. + +The `access_points` variable in the `terraform.tfvars` file should contain a comma delimited list of DAOS server names or IP addresses. + +For example, if the existing DAOS server names are + +- daos-server-0001 +- daos-server-0002 +- daos-server-0003 + +the `access_points` variable should be set to + +``` +access_points = ["daos-server-0001","daos-server-0002","daos-server-0002"] +``` + +The `access_points` variable does not need to contain every server in the DAOS cluster. + +It only needs enough entries so that if a server is not available there are others to connect to. + +## Deploy DAOS Client Instances + +> **Billing Notification!** +> +> Running this example will incur charges in your project. +> +> To avoid surprises, be sure to monitor your costs associated with running this example. +> +> Don't forget to shut down the DAOS clients with `terraform destroy` when you are finished. + +To deploy the DAOS client instances + +```bash +cd terraform/examples/only_daos_client +terraform init -input=false +terraform plan -out=tfplan -input=false +terraform apply -input=false tfplan +``` + +## Remove DAOS client deployment + +To destroy the DAOS client instances run + +```bash +terraform destroy +``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.14.5 | +| [google](#requirement\_google) | >= 3.54.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [daos\_client](#module\_daos\_client) | ../../modules/daos_client | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [client\_access\_points](#input\_client\_access\_points) | List of servers to add to client .yml files | `list(string)` | `null` | no | +| [client\_instance\_base\_name](#input\_client\_instance\_base\_name) | MIG instance base names to use | `string` | `null` | no | +| [client\_labels](#input\_client\_labels) | Set of key/value label pairs to assign to daos-client instances | `any` | `{}` | no | +| [client\_machine\_type](#input\_client\_machine\_type) | GCP machine type. e.g. e2-medium | `string` | `null` | no | +| [client\_mig\_name](#input\_client\_mig\_name) | MIG name | `string` | `null` | no | +| [client\_number\_of\_instances](#input\_client\_number\_of\_instances) | Number of daos servers to bring up | `number` | `null` | no | +| [client\_os\_disk\_size\_gb](#input\_client\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | +| [client\_os\_disk\_type](#input\_client\_os\_disk\_type) | OS disk type e.g. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | +| [client\_os\_family](#input\_client\_os\_family) | OS GCP image family | `string` | `null` | no | +| [client\_os\_project](#input\_client\_os\_project) | OS GCP image project name | `string` | `null` | no | +| [client\_preemptible](#input\_client\_preemptible) | If preemptible client instances | `string` | `true` | no | +| [client\_template\_name](#input\_client\_template\_name) | MIG template name | `string` | `null` | no | +| [network](#input\_network) | GCP network to use | `string` | `"default"` | no | +| [project\_id](#input\_project\_id) | The GCP project to use | `string` | `null` | no | +| [region](#input\_region) | The GCP region to create and test resources in | `string` | `null` | no | +| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | `"default"` | no | +| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | +| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/terraform/examples/only_daos_client/main.tf b/terraform/examples/only_daos_client/main.tf new file mode 100644 index 0000000..0a7c5d8 --- /dev/null +++ b/terraform/examples/only_daos_client/main.tf @@ -0,0 +1,25 @@ +provider "google" { + region = var.region +} + +module "daos_client" { + source = "../../modules/daos_client" + project_id = var.project_id + region = var.region + zone = var.zone + network = var.network + subnetwork_project = var.subnetwork_project + subnetwork = var.subnetwork + number_of_instances = var.client_number_of_instances + labels = var.client_labels + preemptible = var.client_preemptible + mig_name = var.client_mig_name + template_name = var.client_template_name + instance_base_name = var.client_instance_base_name + machine_type = var.client_machine_type + os_family = var.client_os_family + os_project = var.client_os_project + os_disk_type = var.client_os_disk_type + os_disk_size_gb = var.client_os_disk_size_gb + access_points = var.client_access_points +} diff --git a/terraform/examples/only_daos_client/module.json b/terraform/examples/only_daos_client/module.json new file mode 100644 index 0000000..dbbc8e0 --- /dev/null +++ b/terraform/examples/only_daos_client/module.json @@ -0,0 +1,153 @@ +{ + "header": "", + "footer": "", + "inputs": [ + { + "name": "client_access_points", + "type": "list(string)", + "description": "List of servers to add to client .yml files", + "default": null, + "required": false + }, + { + "name": "client_instance_base_name", + "type": "string", + "description": "MIG instance base names to use", + "default": null, + "required": false + }, + { + "name": "client_labels", + "type": "any", + "description": "Set of key/value label pairs to assign to daos-client instances", + "default": {}, + "required": false + }, + { + "name": "client_machine_type", + "type": "string", + "description": "GCP machine type. e.g. e2-medium", + "default": null, + "required": false + }, + { + "name": "client_mig_name", + "type": "string", + "description": "MIG name ", + "default": null, + "required": false + }, + { + "name": "client_number_of_instances", + "type": "number", + "description": "Number of daos servers to bring up", + "default": null, + "required": false + }, + { + "name": "client_os_disk_size_gb", + "type": "number", + "description": "OS disk size in GB", + "default": 20, + "required": false + }, + { + "name": "client_os_disk_type", + "type": "string", + "description": "OS disk type e.g. pd-ssd, pd-standard", + "default": "pd-ssd", + "required": false + }, + { + "name": "client_os_family", + "type": "string", + "description": "OS GCP image family", + "default": null, + "required": false + }, + { + "name": "client_os_project", + "type": "string", + "description": "OS GCP image project name", + "default": null, + "required": false + }, + { + "name": "client_preemptible", + "type": "string", + "description": "If preemptible client instances", + "default": true, + "required": false + }, + { + "name": "client_template_name", + "type": "string", + "description": "MIG template name", + "default": null, + "required": false + }, + { + "name": "network", + "type": "string", + "description": "GCP network to use", + "default": "default", + "required": false + }, + { + "name": "project_id", + "type": "string", + "description": "The GCP project to use ", + "default": null, + "required": false + }, + { + "name": "region", + "type": "string", + "description": "The GCP region to create and test resources in", + "default": null, + "required": false + }, + { + "name": "subnetwork", + "type": "string", + "description": "GCP sub-network to use", + "default": "default", + "required": false + }, + { + "name": "subnetwork_project", + "type": "string", + "description": "The GCP project where the subnetwork is defined", + "default": null, + "required": false + }, + { + "name": "zone", + "type": "string", + "description": "The GCP zone to create and test resources in", + "default": null, + "required": false + } + ], + "modules": [ + { + "name": "daos_client", + "source": "../../modules/daos_client", + "version": "", + "description": null + } + ], + "outputs": [], + "providers": [], + "requirements": [ + { + "name": "terraform", + "version": "\u003e= 0.14.5" + }, + { + "name": "google", + "version": "\u003e= 3.54.0" + } + ], + "resources": [] +} diff --git a/terraform/examples/only_daos_client/terraform.tfvars.example b/terraform/examples/only_daos_client/terraform.tfvars.example new file mode 100644 index 0000000..0e18d07 --- /dev/null +++ b/terraform/examples/only_daos_client/terraform.tfvars.example @@ -0,0 +1,19 @@ +project_id = "" +region = "us-central1" +zone = "us-central1-f" +network = "default" +subnetwork_project = "" +subnetwork = "default" +client_number_of_instances = 16 +client_labels = { + example = "only_daos_client" +} +client_preemptible = "false" +client_mig_name = "daos-client" +client_template_name = "daos-client" +client_instance_base_name = "daos-client" +client_machine_type = "c2-standard-16" +client_os_family = "daos-client-hpc-centos-7" +client_os_project = "" +client_os_disk_type = "pd-ssd" +client_os_disk_size_gb = 20 diff --git a/terraform/examples/daos_client_mig/variables.tf b/terraform/examples/only_daos_client/variables.tf similarity index 51% rename from terraform/examples/daos_client_mig/variables.tf rename to terraform/examples/only_daos_client/variables.tf index fe91179..d1a04c1 100644 --- a/terraform/examples/daos_client_mig/variables.tf +++ b/terraform/examples/only_daos_client/variables.tf @@ -1,101 +1,107 @@ -/** - * Copyright 2021 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - variable "project_id" { description = "The GCP project to use " type = string + default = null } + variable "region" { description = "The GCP region to create and test resources in" type = string + default = null } + variable "zone" { description = "The GCP zone to create and test resources in" type = string + default = null } -variable "labels" { - description = "Set of key/value label pairs to assign to daos-client instances" - type = any - default = {} +variable "network" { + description = "GCP network to use" + default = "default" + type = string } -variable "os_family" { - description = "OS GCP image family" +variable "subnetwork_project" { + description = "The GCP project where the subnetwork is defined" + type = string default = null } -variable "os_project" { - description = "OS GCP image project name" - default = null +variable "subnetwork" { + description = "GCP sub-network to use" + default = "default" + type = string } -variable "os_disk_size_gb" { - description = "OS disk size in GB" - default = 20 +variable "client_number_of_instances" { + description = "Number of daos servers to bring up" + default = null type = number } -variable "os_disk_type" { - description = "OS disk type ie. pd-ssd, pd-standard" - default = "pd-ssd" - type = string +variable "client_labels" { + description = "Set of key/value label pairs to assign to daos-client instances" + type = any + default = {} } -variable "template_name" { - description = "MIG template name" - default = "daos-client" +variable "client_preemptible" { + description = "If preemptible client instances" + default = true type = string } -variable "mig_name" { +variable "client_mig_name" { description = "MIG name " - default = "daos-client" + default = null type = string } -variable "machine_type" { - description = "GCP machine type. ie. e2-medium" - default = "n2-highmem-16" +variable "client_template_name" { + description = "MIG template name" + default = null type = string } -variable "network" { - description = "GCP network to use" +variable "client_instance_base_name" { + description = "MIG instance base names to use" + default = null type = string } -variable "subnetwork" { - description = "GCP sub-network to use" +variable "client_machine_type" { + description = "GCP machine type. e.g. e2-medium" + default = null type = string } -variable "subnetwork_project" { - description = "The GCP project where the subnetwork is defined" +variable "client_os_family" { + description = "OS GCP image family" + default = null type = string } -variable "instance_base_name" { - description = "MIG instance base names to use" - default = "daos-client" +variable "client_os_project" { + description = "OS GCP image project name" + default = null + type = string +} + +variable "client_os_disk_type" { + description = "OS disk type e.g. pd-ssd, pd-standard" + default = "pd-ssd" type = string } -variable "number_of_instances" { - description = "Number of daos clients to bring up" - default = 2 +variable "client_os_disk_size_gb" { + description = "OS disk size in GB" + default = 20 type = number } + +variable "client_access_points" { + description = "List of servers to add to client .yml files" + default = null + type = list(string) +} diff --git a/terraform/examples/only_daos_client/versions.tf b/terraform/examples/only_daos_client/versions.tf new file mode 100644 index 0000000..7caeb30 --- /dev/null +++ b/terraform/examples/only_daos_client/versions.tf @@ -0,0 +1,6 @@ +terraform { + required_version = ">= 0.14.5" + required_providers { + google = ">= 3.54.0" + } +} diff --git a/terraform/examples/only_daos_server/README.md b/terraform/examples/only_daos_server/README.md new file mode 100644 index 0000000..98b7883 --- /dev/null +++ b/terraform/examples/only_daos_server/README.md @@ -0,0 +1,152 @@ +# DAOS Server Example + +This example Terraform configuration demonstrates how to use the [DAOS Server Terraform Module](../../modules/daos_server) in your own Terraform code to deploy a group of DAOS servers. + +## Setup + +The following steps must be performed prior to deploying DAOS servers. + +1. Set defaults for Google Cloud CLI (```gcloud```) +2. Create a Packer image in your GCP project +3. Build DAOS server images + +If you have not completed these steps yet, click the button below to open an interactive walkthrough in [Cloud Shell](https://cloud.google.com/shell). After completing the walkthrough your GCP project will contain the images required to run this Terraform example. + +[![DAOS on GCP Setup](http://gstatic.com/cloudssh/images/open-btn.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/daos-stack/google-cloud-daos&cloudshell_git_branch=main&shellonly=true&tutorial=docs/tutorials/daosgcp_setup.md) + +## Terraform Files + +List of Terraform files in this example + +| Filename | Description | +| ----------------------------- | ------------------------------------------------------------------------------- | +| main.tf | Main Terrform configuration file containing resource definitions | +| variables.tf | Variable definitions for variables used in main.tf | +| versions.tf | Provider definitions | +| terraform.tfvars.perf.example | Pre-Configured set of set of variables focused on performance | +| terraform.tfvars.tco.example | Pre-Configured set of set of variables focused on lower total cost of ownership | + +## Create a terraform.tfvars file + +Before you run `terraform apply` with this example you need to create a `terraform.tfvars` file in the `terraform/examples/only_daos_server` directory. + +The `terraform.tfvars` file will contain the variable values that are used by the `main.tf` configuration file. + +To ensure a successful deployment of a DAOS cluster there are two `terraform.tfvars.*.example` files that you can choose from. + +You will need to decide which of these files you will copy to `terraform.tfvars` and then modify to contain your GCP project info. + + +### The terraform.tfvars.tco.example file + +The `terraform.tfvars.tco.example` configuration will result in a smaller cluster deployment that is more affordable to operate due to fewer instances. + +This is a good choice if you just want to deploy a DAOS cluster to learn how to use DAOS. + +To use the `terraform.tfvars.tco.example` file run + +```bash +cp terraform.tfvars.tco.example terraform.tfvars +``` + +### The terraform.tfvars.perf.example file + +The `terraform.tfvars.perf.example` configuration will result in a larger cluster deployment that costs more to operate but will have better performance due to more instances being deployed. + +To use the ```terraform.tfvars.perf.example``` file run + +```bash +cp terraform.tfvars.tco.example terraform.tfvars +``` + +### Update `terraform.tfvars` with your project id + +Now that you have a `terraform.tfvars` file you need to replace the `` placeholder in the file with your project id. + +To update the project id in `terraform.tfvars` run + +```bash +PROJECT_ID=$(gcloud config list --format 'value(core.project)') +sed -i "s//${PROJECT_ID}/g" terraform.tfvars +``` + +## Deploy DAOS Server Instances + +> **Billing Notification!** +> +> Running this example will incur charges in your project. +> +> To avoid surprises, be sure to monitor your costs associated with running this example. +> +> Don't forget to shut down the DAOS servers with `terraform destroy` when you are finished. + +To deploy the DAOS server instances + +```bash +cd terraform/examples/only_daos_server +terraform init -input=false +terraform plan -out=tfplan -input=false +terraform apply -input=false tfplan +``` + +## Remove DAOS server deployment + +To destroy the DAOS server instances run + +```bash +terraform destroy +``` + +# Terraform Documentation for this Example + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.14.5 | +| [google](#requirement\_google) | >= 3.54.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [daos\_server](#module\_daos\_server) | ../../modules/daos_server | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [network](#input\_network) | GCP network to use | `string` | `"default"` | no | +| [project\_id](#input\_project\_id) | The GCP project to use | `string` | `null` | no | +| [region](#input\_region) | The GCP region to create and test resources in | `string` | `null` | no | +| [server\_daos\_crt\_timeout](#input\_server\_daos\_crt\_timeout) | crt\_timeout | `number` | `null` | no | +| [server\_daos\_disk\_count](#input\_server\_daos\_disk\_count) | Number of local ssd's to use | `number` | `null` | no | +| [server\_daos\_scm\_size](#input\_server\_daos\_scm\_size) | scm\_size | `number` | `null` | no | +| [server\_instance\_base\_name](#input\_server\_instance\_base\_name) | MIG instance base names to use | `string` | `null` | no | +| [server\_labels](#input\_server\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | +| [server\_machine\_type](#input\_server\_machine\_type) | GCP machine type. e.g. e2-medium | `string` | `null` | no | +| [server\_mig\_name](#input\_server\_mig\_name) | MIG name | `string` | `null` | no | +| [server\_number\_of\_instances](#input\_server\_number\_of\_instances) | Number of daos servers to bring up | `number` | `null` | no | +| [server\_os\_disk\_size\_gb](#input\_server\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | +| [server\_os\_disk\_type](#input\_server\_os\_disk\_type) | OS disk type e.g. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | +| [server\_os\_family](#input\_server\_os\_family) | OS GCP image family | `string` | `null` | no | +| [server\_os\_project](#input\_server\_os\_project) | OS GCP image project name | `string` | `null` | no | +| [server\_preemptible](#input\_server\_preemptible) | If preemptible server instances | `string` | `true` | no | +| [server\_template\_name](#input\_server\_template\_name) | MIG template name | `string` | `null` | no | +| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | `"default"` | no | +| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | +| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | `null` | no | + +## Outputs + +No outputs. + diff --git a/terraform/examples/only_daos_server/main.tf b/terraform/examples/only_daos_server/main.tf new file mode 100644 index 0000000..36e981b --- /dev/null +++ b/terraform/examples/only_daos_server/main.tf @@ -0,0 +1,27 @@ +provider "google" { + region = var.region +} + +module "daos_server" { + source = "../../modules/daos_server" + project_id = var.project_id + region = var.region + zone = var.zone + network = var.network + subnetwork_project = var.subnetwork_project + subnetwork = var.subnetwork + number_of_instances = var.server_number_of_instances + labels = var.server_labels + preemptible = var.server_preemptible + mig_name = var.server_mig_name + template_name = var.server_template_name + instance_base_name = var.server_instance_base_name + machine_type = var.server_machine_type + os_family = var.server_os_family + os_project = var.server_os_project + os_disk_type = var.server_os_disk_type + os_disk_size_gb = var.server_os_disk_size_gb + daos_disk_count = var.server_daos_disk_count + daos_crt_timeout = var.server_daos_crt_timeout + daos_scm_size = var.server_daos_scm_size +} diff --git a/terraform/examples/only_daos_server/module.json b/terraform/examples/only_daos_server/module.json new file mode 100644 index 0000000..eab95ed --- /dev/null +++ b/terraform/examples/only_daos_server/module.json @@ -0,0 +1,167 @@ +{ + "header": "", + "footer": "", + "inputs": [ + { + "name": "network", + "type": "string", + "description": "GCP network to use", + "default": "default", + "required": false + }, + { + "name": "project_id", + "type": "string", + "description": "The GCP project to use ", + "default": null, + "required": false + }, + { + "name": "region", + "type": "string", + "description": "The GCP region to create and test resources in", + "default": null, + "required": false + }, + { + "name": "server_daos_crt_timeout", + "type": "number", + "description": "crt_timeout", + "default": null, + "required": false + }, + { + "name": "server_daos_disk_count", + "type": "number", + "description": "Number of local ssd's to use", + "default": null, + "required": false + }, + { + "name": "server_daos_scm_size", + "type": "number", + "description": "scm_size", + "default": null, + "required": false + }, + { + "name": "server_instance_base_name", + "type": "string", + "description": "MIG instance base names to use", + "default": null, + "required": false + }, + { + "name": "server_labels", + "type": "any", + "description": "Set of key/value label pairs to assign to daos-server instances", + "default": {}, + "required": false + }, + { + "name": "server_machine_type", + "type": "string", + "description": "GCP machine type. e.g. e2-medium", + "default": null, + "required": false + }, + { + "name": "server_mig_name", + "type": "string", + "description": "MIG name ", + "default": null, + "required": false + }, + { + "name": "server_number_of_instances", + "type": "number", + "description": "Number of daos servers to bring up", + "default": null, + "required": false + }, + { + "name": "server_os_disk_size_gb", + "type": "number", + "description": "OS disk size in GB", + "default": 20, + "required": false + }, + { + "name": "server_os_disk_type", + "type": "string", + "description": "OS disk type e.g. pd-ssd, pd-standard", + "default": "pd-ssd", + "required": false + }, + { + "name": "server_os_family", + "type": "string", + "description": "OS GCP image family", + "default": null, + "required": false + }, + { + "name": "server_os_project", + "type": "string", + "description": "OS GCP image project name", + "default": null, + "required": false + }, + { + "name": "server_preemptible", + "type": "string", + "description": "If preemptible server instances", + "default": true, + "required": false + }, + { + "name": "server_template_name", + "type": "string", + "description": "MIG template name", + "default": null, + "required": false + }, + { + "name": "subnetwork", + "type": "string", + "description": "GCP sub-network to use", + "default": "default", + "required": false + }, + { + "name": "subnetwork_project", + "type": "string", + "description": "The GCP project where the subnetwork is defined", + "default": null, + "required": false + }, + { + "name": "zone", + "type": "string", + "description": "The GCP zone to create and test resources in", + "default": null, + "required": false + } + ], + "modules": [ + { + "name": "daos_server", + "source": "../../modules/daos_server", + "version": "", + "description": null + } + ], + "outputs": [], + "providers": [], + "requirements": [ + { + "name": "terraform", + "version": "\u003e= 0.14.5" + }, + { + "name": "google", + "version": "\u003e= 3.54.0" + } + ], + "resources": [] +} diff --git a/terraform/examples/only_daos_server/terraform.tfvars.perf.example b/terraform/examples/only_daos_server/terraform.tfvars.perf.example new file mode 100644 index 0000000..bee739b --- /dev/null +++ b/terraform/examples/only_daos_server/terraform.tfvars.perf.example @@ -0,0 +1,22 @@ +project_id = "" +region = "us-central1" +zone = "us-central1-f" +network = "default" +subnetwork_project = "" +subnetwork = "default" +server_number_of_instances = 4 +server_labels = { + example = "only_daos_server" +} +server_preemptible = "false" +server_mig_name = "daos-server" +server_template_name = "daos-server" +server_instance_base_name = "daos-server" +server_machine_type = "n2-standard-16" +server_os_family = "daos-server-centos-7" +server_os_project = "" +server_os_disk_type = "pd-ssd" +server_os_disk_size_gb = 20 +server_daos_disk_count = 4 +server_daos_crt_timeout = 300 +server_daos_scm_size = 45 diff --git a/terraform/examples/only_daos_server/terraform.tfvars.tco.example b/terraform/examples/only_daos_server/terraform.tfvars.tco.example new file mode 100644 index 0000000..b31f457 --- /dev/null +++ b/terraform/examples/only_daos_server/terraform.tfvars.tco.example @@ -0,0 +1,22 @@ +project_id = "" +region = "us-central1" +zone = "us-central1-f" +network = "default" +subnetwork_project = "" +subnetwork = "default" +server_number_of_instances = 16 +server_labels = { + example = "only_daos_server" +} +server_preemptible = "false" +server_mig_name = "daos-server" +server_template_name = "daos-server" +server_instance_base_name = "daos-server" +server_machine_type = "n2-custom-36-215040" +server_os_family = "daos-server-centos-7" +server_os_project = "" +server_os_disk_type = "pd-ssd" +server_os_disk_size_gb = 20 +server_daos_disk_count = 16 +server_daos_crt_timeout = 300 +server_daos_scm_size = 180 diff --git a/terraform/examples/simple_daos_server_example/variables.tf b/terraform/examples/only_daos_server/variables.tf similarity index 54% rename from terraform/examples/simple_daos_server_example/variables.tf rename to terraform/examples/only_daos_server/variables.tf index 2d5f0cf..9f21e7b 100644 --- a/terraform/examples/simple_daos_server_example/variables.tf +++ b/terraform/examples/only_daos_server/variables.tf @@ -1,107 +1,117 @@ -/** - * Copyright 2021 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - variable "project_id" { description = "The GCP project to use " type = string + default = null } variable "region" { description = "The GCP region to create and test resources in" type = string + default = null } variable "zone" { description = "The GCP zone to create and test resources in" type = string + default = null } -variable "labels" { - description = "Set of key/value label pairs to assign to daos-server instances" - type = any - default = {} +variable "network" { + description = "GCP network to use" + default = "default" + type = string } -variable "os_family" { - description = "OS GCP image family" +variable "subnetwork_project" { + description = "The GCP project where the subnetwork is defined" + type = string default = null } -variable "os_project" { - description = "OS GCP image project name" - default = null +variable "subnetwork" { + description = "GCP sub-network to use" + default = "default" + type = string } -variable "os_disk_size_gb" { - description = "OS disk size in GB" - default = 20 +variable "server_number_of_instances" { + description = "Number of daos servers to bring up" + default = null type = number } -variable "os_disk_type" { - description = "OS disk type ie. pd-ssd, pd-standard" - default = "pd-ssd" - type = string +variable "server_labels" { + description = "Set of key/value label pairs to assign to daos-server instances" + type = any + default = {} } -variable "template_name" { - description = "MIG template name" - default = "daos-server" +variable "server_preemptible" { + description = "If preemptible server instances" + default = true type = string } -variable "mig_name" { +variable "server_mig_name" { description = "MIG name " - default = "daos-server" + default = null + type = string +} + +variable "server_template_name" { + description = "MIG template name" + default = null type = string } -variable "machine_type" { - description = "GCP machine type. ie. e2-medium" - default = "n2-custom-20-131072" +variable "server_instance_base_name" { + description = "MIG instance base names to use" + default = null type = string } -variable "network" { - description = "GCP network to use" +variable "server_machine_type" { + description = "GCP machine type. e.g. e2-medium" + default = null type = string } -variable "subnetwork" { - description = "GCP sub-network to use" +variable "server_os_family" { + description = "OS GCP image family" + default = null type = string } -variable "subnetwork_project" { - description = "The GCP project where the subnetwork is defined" +variable "server_os_project" { + description = "OS GCP image project name" + default = null type = string } -variable "instance_base_name" { - description = "MIG instance base names to use" - default = "daos-server" +variable "server_os_disk_type" { + description = "OS disk type e.g. pd-ssd, pd-standard" + default = "pd-ssd" type = string } -variable "number_of_instances" { - description = "Number of daos servers to bring up" - default = 4 +variable "server_os_disk_size_gb" { + description = "OS disk size in GB" + default = 20 type = number } -variable "daos_disk_count" { +variable "server_daos_disk_count" { description = "Number of local ssd's to use" - default = 16 + default = null + type = number +} + +variable "server_daos_crt_timeout" { + description = "crt_timeout" + default = null + type = number +} + +variable "server_daos_scm_size" { + description = "scm_size" + default = null type = number } diff --git a/terraform/examples/only_daos_server/versions.tf b/terraform/examples/only_daos_server/versions.tf new file mode 100644 index 0000000..7caeb30 --- /dev/null +++ b/terraform/examples/only_daos_server/versions.tf @@ -0,0 +1,6 @@ +terraform { + required_version = ">= 0.14.5" + required_providers { + google = ">= 3.54.0" + } +} diff --git a/terraform/examples/simple_daos_server_example/README.md b/terraform/examples/simple_daos_server_example/README.md deleted file mode 100644 index e0419e5..0000000 --- a/terraform/examples/simple_daos_server_example/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# DAOS Server Simple Example - -Creates a managed instance group running ```number_of_instances``` DAOS servers. - -## Requirements - -Please make sure you go through the [Requirements section](../../modules/daos_server/README.md) of the DAOS server module. - -| Name | Version | -|------|---------| -| [google](#requirement\_google) | ~> 3.54 | - -## Setup - -1. Create ```terraform.tfvars``` in this directory or the directory where you're running this example. -2. Copy the ```terraform.tfvars.example``` content into ```terraform.tfvars``` file and update the contents to match your environment. - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [daos\_server](#module\_daos\_server) | ../../modules/daos_server | n/a | - -## Resources - -No resources. - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [daos\_disk\_count](#input\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | -| [instance\_base\_name](#input\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-server"` | no | -| [labels](#input\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | n/a | no | -| [machine\_type](#input\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `"n2-custom-20-131072"` | no | -| [mig\_name](#input\_mig\_name) | MIG name | `string` | `"daos-server"` | no | -| [network](#input\_network) | GCP network to use | `string` | n/a | yes | -| [number\_of\_instances](#input\_number\_of\_instances) | Number of daos servers to bring up | `number` | `4` | no | -| [os\_disk\_size\_gb](#input\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | -| [os\_disk\_type](#input\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | -| [os\_family](#input\_os\_family) | OS GCP image family | `any` | `null` | no | -| [os\_project](#input\_os\_project) | OS GCP image project name | `any` | `null` | no | -| [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | -| [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | -| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | n/a | yes | -| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | n/a | yes | -| [template\_name](#input\_template\_name) | MIG template name | `string` | `"daos-server"` | no | -| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | n/a | yes | - -## Outputs - -No outputs. diff --git a/terraform/examples/simple_daos_server_example/main.tf b/terraform/examples/simple_daos_server_example/main.tf deleted file mode 100644 index b89a2a9..0000000 --- a/terraform/examples/simple_daos_server_example/main.tf +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Copyright 2021 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "google" { - region = var.region -} - -module "daos_server" { - source = "../../modules/daos_server" - project_id = var.project_id - network = var.network - subnetwork = var.subnetwork - subnetwork_project = var.subnetwork_project - region = var.region - zone = var.zone - labels = var.labels - - number_of_instances = var.number_of_instances - daos_disk_count = var.daos_disk_count - - instance_base_name = var.instance_base_name - os_disk_size_gb = var.os_disk_size_gb - os_disk_type = var.os_disk_type - template_name = var.template_name - mig_name = var.mig_name - machine_type = var.machine_type - os_project = var.os_project - os_family = var.os_family -} diff --git a/terraform/examples/simple_daos_server_example/terraform.tfvars.example b/terraform/examples/simple_daos_server_example/terraform.tfvars.example deleted file mode 100644 index 502237d..0000000 --- a/terraform/examples/simple_daos_server_example/terraform.tfvars.example +++ /dev/null @@ -1,26 +0,0 @@ -project_id = "" -network = "" -subnetwork = "" -subnetwork_project = "" -region = "europe-west4" -zone = "europe-west4-a" -labels = { - example="simple_daos_server_example" -} - -number_of_instances = 4 # Allow 3-way replication with extra spare for rebuild -daos_disk_count = 16 - -instance_base_name = "daos-server" -os_disk_size_gb = 20 -os_disk_type = "pd-ssd" -template_name = "daos-server" #TODO add some sort of unique postfix here -mig_name = "daos-server" #TODO add some sort of unique postfix here -machine_type = "n2-custom-20-131072" # 10x physical cores, using 8x targets - -# the image must be built before (see build folder) -os_project = "" -os_family = "daos-server-centos-7" - - - diff --git a/terraform/modules/daos_client/README.md b/terraform/modules/daos_client/README.md index a9135cb..4ad83c3 100644 --- a/terraform/modules/daos_client/README.md +++ b/terraform/modules/daos_client/README.md @@ -8,6 +8,21 @@ The resources/services/activations/deletions that this module will create/trigge - Create an instance tempate for DAOS clients - Create a stateful instance group for DAOS clients + +Copyright 2021 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + ## Requirements | Name | Version | @@ -40,7 +55,7 @@ No modules. |------|-------------|------|---------|:--------:| | [daos\_service\_account\_scopes](#input\_daos\_service\_account\_scopes) | Scopes for the DAOS client service account | `list(string)` |
[
"userinfo-email",
"compute-ro",
"storage-ro"
]
| no | | [instance\_base\_name](#input\_instance\_base\_name) | MIG instance base names to use | `string` | `null` | no | -| [labels](#input\_labels) | Set of key/value label pairs to assign to daos-client instances | `any` | n/a | no | +| [labels](#input\_labels) | Set of key/value label pairs to assign to daos-client instances | `any` | `{}` | no | | [machine\_type](#input\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `null` | no | | [mig\_name](#input\_mig\_name) | MIG name | `string` | `null` | no | | [network](#input\_network) | GCP network to use | `string` | `null` | no | @@ -49,6 +64,7 @@ No modules. | [os\_disk\_type](#input\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | | [os\_family](#input\_os\_family) | OS GCP image family | `string` | `null` | no | | [os\_project](#input\_os\_project) | OS GCP image project name | `string` | `null` | no | +| [preemptible](#input\_preemptible) | If preemptible instances | `string` | `false` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | `null` | no | | [region](#input\_region) | The GCP region to create and test resources in | `string` | `null` | no | | [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | `null` | no | @@ -59,3 +75,4 @@ No modules. ## Outputs No outputs. + \ No newline at end of file diff --git a/terraform/modules/daos_client/main.tf b/terraform/modules/daos_client/main.tf index bbd90de..d38101e 100644 --- a/terraform/modules/daos_client/main.tf +++ b/terraform/modules/daos_client/main.tf @@ -14,6 +14,23 @@ * limitations under the License. */ +locals { + daos_agent_yaml_content = templatefile( + "${path.module}/templates/daos_agent.yml.tftpl", + { + access_points = var.access_points + } + ) + daos_control_yaml_content = templatefile( + "${path.module}/templates/daos_control.yml.tftpl", + { + access_points = var.access_points + } + ) + client_startup_script = file( + "${path.module}/templates/daos_startup_script.tftpl") +} + data "google_compute_image" "os_image" { family = var.os_family project = var.os_project @@ -47,7 +64,7 @@ resource "google_compute_instance_template" "daos_sig_template" { } scheduling { - preemptible = var.preemptible + preemptible = var.preemptible automatic_restart = false } } @@ -74,11 +91,14 @@ resource "google_compute_per_instance_config" "named_instances" { name = format("%s-%04d", var.instance_base_name, sum([count.index, 1])) preserved_state { metadata = { - inst_type = "daos-client" - enable-oslogin = "true" - // Adding a reference to the instance template used causes the stateful instance to update - // if the instance template changes. Otherwise there is no explicit dependency and template - // changes may not occur on the stateful instance + inst_type = "daos-client" + enable-oslogin = "true" + daos_control_yaml_content = local.daos_control_yaml_content + daos_agent_yaml_content = local.daos_agent_yaml_content + startup-script = local.client_startup_script + # Adding a reference to the instance template used causes the stateful instance to update + # if the instance template changes. Otherwise there is no explicit dependency and template + # changes may not occur on the stateful instance instance_template = google_compute_instance_template.daos_sig_template.self_link } } diff --git a/terraform/modules/daos_client/module.json b/terraform/modules/daos_client/module.json new file mode 100644 index 0000000..da940a5 --- /dev/null +++ b/terraform/modules/daos_client/module.json @@ -0,0 +1,193 @@ +{ + "header": "Copyright 2021 Google LLC\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "footer": "", + "inputs": [ + { + "name": "daos_service_account_scopes", + "type": "list(string)", + "description": "Scopes for the DAOS client service account", + "default": [ + "userinfo-email", + "compute-ro", + "storage-ro" + ], + "required": false + }, + { + "name": "instance_base_name", + "type": "string", + "description": "MIG instance base names to use", + "default": null, + "required": false + }, + { + "name": "labels", + "type": "any", + "description": "Set of key/value label pairs to assign to daos-client instances", + "default": {}, + "required": false + }, + { + "name": "machine_type", + "type": "string", + "description": "GCP machine type. ie. e2-medium", + "default": null, + "required": false + }, + { + "name": "mig_name", + "type": "string", + "description": "MIG name ", + "default": null, + "required": false + }, + { + "name": "network", + "type": "string", + "description": "GCP network to use", + "default": null, + "required": false + }, + { + "name": "number_of_instances", + "type": "number", + "description": "Number of daos clients to bring up", + "default": null, + "required": false + }, + { + "name": "os_disk_size_gb", + "type": "number", + "description": "OS disk size in GB", + "default": 20, + "required": false + }, + { + "name": "os_disk_type", + "type": "string", + "description": "OS disk type ie. pd-ssd, pd-standard", + "default": "pd-ssd", + "required": false + }, + { + "name": "os_family", + "type": "string", + "description": "OS GCP image family", + "default": null, + "required": false + }, + { + "name": "os_project", + "type": "string", + "description": "OS GCP image project name", + "default": null, + "required": false + }, + { + "name": "preemptible", + "type": "string", + "description": "If preemptible instances", + "default": false, + "required": false + }, + { + "name": "project_id", + "type": "string", + "description": "The GCP project to use ", + "default": null, + "required": false + }, + { + "name": "region", + "type": "string", + "description": "The GCP region to create and test resources in", + "default": null, + "required": false + }, + { + "name": "subnetwork", + "type": "string", + "description": "GCP sub-network to use", + "default": null, + "required": false + }, + { + "name": "subnetwork_project", + "type": "string", + "description": "The GCP project where the subnetwork is defined", + "default": null, + "required": false + }, + { + "name": "template_name", + "type": "string", + "description": "MIG template name", + "default": null, + "required": false + }, + { + "name": "zone", + "type": "string", + "description": "The GCP zone to create and test resources in", + "default": null, + "required": false + } + ], + "modules": [], + "outputs": [], + "providers": [ + { + "name": "google", + "alias": null, + "version": "\u003e= 3.54.0" + } + ], + "requirements": [ + { + "name": "terraform", + "version": "\u003e= 0.14.5" + }, + { + "name": "google", + "version": "\u003e= 3.54.0" + } + ], + "resources": [ + { + "type": "compute_instance_group_manager", + "name": "daos_sig", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_instance_template", + "name": "daos_sig_template", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_per_instance_config", + "name": "named_instances", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_image", + "name": "os_image", + "provider": "google", + "source": "hashicorp/google", + "mode": "data", + "version": "latest", + "description": null + } + ] +} diff --git a/terraform/modules/daos_client/templates/daos_agent.yml.tftpl b/terraform/modules/daos_client/templates/daos_agent.yml.tftpl new file mode 100644 index 0000000..c5c3517 --- /dev/null +++ b/terraform/modules/daos_client/templates/daos_agent.yml.tftpl @@ -0,0 +1,10 @@ +# +# DAOS agent configuration file +# + +# Management server access points +# Must have the same value for all agents and servers in a system. +access_points: ${jsonencode(access_points)} + +transport_config: + allow_insecure: true diff --git a/terraform/modules/daos_client/templates/daos_control.yml.tftpl b/terraform/modules/daos_client/templates/daos_control.yml.tftpl new file mode 100644 index 0000000..6c9b686 --- /dev/null +++ b/terraform/modules/daos_client/templates/daos_control.yml.tftpl @@ -0,0 +1,7 @@ +# +# DAOS manager (dmg) configuration file +# + +hostlist: ${jsonencode(access_points)} +transport_config: + allow_insecure: true diff --git a/terraform/modules/daos_client/templates/daos_startup_script.tftpl b/terraform/modules/daos_client/templates/daos_startup_script.tftpl new file mode 100644 index 0000000..8fc5016 --- /dev/null +++ b/terraform/modules/daos_client/templates/daos_startup_script.tftpl @@ -0,0 +1,27 @@ +#!/bin/bash + +METADATA_URL="http://metadata.google.internal/computeMetadata/v1/instance/attributes" +DAOS_CONFIG_DIR="/etc/daos" + +fetch_attr() +{ + local attr_name=$* + curl -s ${METADATA_URL}/${attr_name} -H "Metadata-Flavor: Google" +} + +echo "BEGIN: Setting up DAOS Client" + +systemctl stop daos_agent + +# Create agent config files +mkdir -p "${DAOS_CONFIG_DIR}" +cd "${DAOS_CONFIG_DIR}" +fetch_attr "daos_control_yaml_content" > daos_control.yml +fetch_attr "daos_agent_yaml_content" > daos_agent.yml + +# enable daos_agent in systemd (will be started automatically at boot time) +systemctl enable daos_agent + +systemctl start daos_agent + +echo "END: Setting up DAOS Client" diff --git a/terraform/modules/daos_client/variables.tf b/terraform/modules/daos_client/variables.tf index 8a4445f..871eee4 100644 --- a/terraform/modules/daos_client/variables.tf +++ b/terraform/modules/daos_client/variables.tf @@ -119,6 +119,12 @@ variable "daos_service_account_scopes" { variable "preemptible" { description = "If preemptible instances" - default = false - type = string + default = false + type = string +} + +variable "access_points" { + description = "List of servers to add to client .yml files" + default = null + type = list(string) } diff --git a/terraform/modules/daos_server/README.md b/terraform/modules/daos_server/README.md index db64002..5bbbfeb 100644 --- a/terraform/modules/daos_server/README.md +++ b/terraform/modules/daos_server/README.md @@ -8,6 +8,21 @@ The resources/services/activations/deletions that this module will create/trigge - Create an instance tempate for DAOS servers - Create a stateful instance group for DAOS servers + +Copyright 2021 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + ## Requirements | Name | Version | @@ -21,6 +36,9 @@ The resources/services/activations/deletions that this module will create/trigge |------|---------| | [google](#provider\_google) | >= 3.54.0 | +## Modules + +No modules. ## Resources @@ -35,26 +53,28 @@ The resources/services/activations/deletions that this module will create/trigge | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [daos\_disk\_count](#input\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | +| [daos\_disk\_count](#input\_daos\_disk\_count) | Number of local ssd's to use | `number` | `null` | no | | [daos\_disk\_type](#input\_daos\_disk\_type) | Daos disk type to use. For now only suported one is local-ssd | `string` | `"local-ssd"` | no | | [daos\_service\_account\_scopes](#input\_daos\_service\_account\_scopes) | Scopes for the DAOS server service account | `list(string)` |
[
"userinfo-email",
"compute-ro",
"storage-ro"
]
| no | -| [instance\_base\_name](#input\_instance\_base\_name) | MIG instance base names to use | `string` | `"daos-server"` | no | -| [labels](#input\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | n/a | no | -| [machine\_type](#input\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `"n2-custom-20-131072"` | no | -| [mig\_name](#input\_mig\_name) | MIG name | `string` | `"daos-server"` | no | -| [network](#input\_network) | GCP network to use | `string` | n/a | yes | -| [number\_of\_instances](#input\_number\_of\_instances) | Number of daos servers to bring up | `number` | `4` | no | +| [instance\_base\_name](#input\_instance\_base\_name) | MIG instance base names to use | `string` | `null` | no | +| [labels](#input\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | +| [machine\_type](#input\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `null` | no | +| [mig\_name](#input\_mig\_name) | MIG name | `string` | `null` | no | +| [network](#input\_network) | GCP network to use | `string` | `null` | no | +| [number\_of\_instances](#input\_number\_of\_instances) | Number of daos servers to bring up | `number` | `null` | no | | [os\_disk\_size\_gb](#input\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | | [os\_disk\_type](#input\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | -| [os\_family](#input\_os\_family) | OS GCP image family | `any` | `null` | no | -| [os\_project](#input\_os\_project) | OS GCP image project name | `any` | `null` | no | -| [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | -| [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | -| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | n/a | yes | -| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | n/a | yes | -| [template\_name](#input\_template\_name) | MIG template name | `string` | `"daos-server"` | no | -| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | n/a | yes | +| [os\_family](#input\_os\_family) | OS GCP image family | `string` | `null` | no | +| [os\_project](#input\_os\_project) | OS GCP image project name | `string` | `null` | no | +| [preemptible](#input\_preemptible) | If preemptible instances | `string` | `false` | no | +| [project\_id](#input\_project\_id) | The GCP project to use | `string` | `null` | no | +| [region](#input\_region) | The GCP region to create and test resources in | `string` | `null` | no | +| [subnetwork](#input\_subnetwork) | GCP sub-network to use | `string` | `null` | no | +| [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | +| [template\_name](#input\_template\_name) | MIG template name | `string` | `null` | no | +| [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | `null` | no | ## Outputs No outputs. + \ No newline at end of file diff --git a/terraform/modules/daos_server/main.tf b/terraform/modules/daos_server/main.tf index bdfc5f5..de12dc6 100644 --- a/terraform/modules/daos_server/main.tf +++ b/terraform/modules/daos_server/main.tf @@ -14,6 +14,40 @@ * limitations under the License. */ +locals { + max_aps = var.number_of_instances > 5 ? 5 : (var.number_of_instances % 2) == 1 ? var.number_of_instances : var.number_of_instances - 1 + access_points = formatlist("%s-%04s", var.instance_base_name, range(1, local.max_aps+1)) + scm_size = var.daos_scm_size + # To get nr_hugepages value: (targets * 1Gib) / hugepagesize + huge_pages = (var.daos_disk_count * 1048576) / 2048 + targets = var.daos_disk_count + crt_timeout = var.daos_crt_timeout + daos_server_yaml_content = templatefile( + "${path.module}/templates/daos_server.yml.tftpl", + { + access_points = local.access_points + nr_hugepages = local.huge_pages + targets = local.targets + scm_size = local.scm_size + crt_timeout = local.crt_timeout + } + ) + daos_control_yaml_content = templatefile( + "${path.module}/templates/daos_control.yml.tftpl", + { + access_points = local.access_points + } + ) + daos_agent_yaml_content = templatefile( + "${path.module}/templates/daos_agent.yml.tftpl", + { + access_points = local.access_points + } + ) + server_startup_script = file( + "${path.module}/templates/daos_startup_script.tftpl") +} + data "google_compute_image" "os_image" { family = var.os_family project = var.os_project @@ -58,7 +92,7 @@ resource "google_compute_instance_template" "daos_sig_template" { } scheduling { - preemptible = var.preemptible + preemptible = var.preemptible automatic_restart = false } } @@ -85,13 +119,17 @@ resource "google_compute_per_instance_config" "named_instances" { name = format("%s-%04d", var.instance_base_name, sum([count.index, 1])) preserved_state { metadata = { - inst_type = "daos-server" - enable-oslogin = "true" - inst_nr = var.number_of_instances - inst_base_name = var.instance_base_name - // Adding a reference to the instance template used causes the stateful instance to update - // if the instance template changes. Otherwise there is no explicit dependency and template - // changes may not occur on the stateful instance + inst_type = "daos-server" + enable-oslogin = "true" + inst_nr = var.number_of_instances + inst_base_name = var.instance_base_name + daos_server_yaml_content = local.daos_server_yaml_content + daos_control_yaml_content = local.daos_control_yaml_content + daos_agent_yaml_content = local.daos_agent_yaml_content + startup-script = local.server_startup_script + # Adding a reference to the instance template used causes the stateful instance to update + # if the instance template changes. Otherwise there is no explicit dependency and template + # changes may not occur on the stateful instance instance_template = google_compute_instance_template.daos_sig_template.self_link } } diff --git a/terraform/modules/daos_server/module.json b/terraform/modules/daos_server/module.json new file mode 100644 index 0000000..4e74d20 --- /dev/null +++ b/terraform/modules/daos_server/module.json @@ -0,0 +1,207 @@ +{ + "header": "Copyright 2021 Google LLC\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "footer": "", + "inputs": [ + { + "name": "daos_disk_count", + "type": "number", + "description": "Number of local ssd's to use", + "default": null, + "required": false + }, + { + "name": "daos_disk_type", + "type": "string", + "description": "Daos disk type to use. For now only suported one is local-ssd", + "default": "local-ssd", + "required": false + }, + { + "name": "daos_service_account_scopes", + "type": "list(string)", + "description": "Scopes for the DAOS server service account", + "default": [ + "userinfo-email", + "compute-ro", + "storage-ro" + ], + "required": false + }, + { + "name": "instance_base_name", + "type": "string", + "description": "MIG instance base names to use", + "default": null, + "required": false + }, + { + "name": "labels", + "type": "any", + "description": "Set of key/value label pairs to assign to daos-server instances", + "default": {}, + "required": false + }, + { + "name": "machine_type", + "type": "string", + "description": "GCP machine type. ie. e2-medium", + "default": null, + "required": false + }, + { + "name": "mig_name", + "type": "string", + "description": "MIG name ", + "default": null, + "required": false + }, + { + "name": "network", + "type": "string", + "description": "GCP network to use", + "default": null, + "required": false + }, + { + "name": "number_of_instances", + "type": "number", + "description": "Number of daos servers to bring up", + "default": null, + "required": false + }, + { + "name": "os_disk_size_gb", + "type": "number", + "description": "OS disk size in GB", + "default": 20, + "required": false + }, + { + "name": "os_disk_type", + "type": "string", + "description": "OS disk type ie. pd-ssd, pd-standard", + "default": "pd-ssd", + "required": false + }, + { + "name": "os_family", + "type": "string", + "description": "OS GCP image family", + "default": null, + "required": false + }, + { + "name": "os_project", + "type": "string", + "description": "OS GCP image project name", + "default": null, + "required": false + }, + { + "name": "preemptible", + "type": "string", + "description": "If preemptible instances", + "default": false, + "required": false + }, + { + "name": "project_id", + "type": "string", + "description": "The GCP project to use ", + "default": null, + "required": false + }, + { + "name": "region", + "type": "string", + "description": "The GCP region to create and test resources in", + "default": null, + "required": false + }, + { + "name": "subnetwork", + "type": "string", + "description": "GCP sub-network to use", + "default": null, + "required": false + }, + { + "name": "subnetwork_project", + "type": "string", + "description": "The GCP project where the subnetwork is defined", + "default": null, + "required": false + }, + { + "name": "template_name", + "type": "string", + "description": "MIG template name", + "default": null, + "required": false + }, + { + "name": "zone", + "type": "string", + "description": "The GCP zone to create and test resources in", + "default": null, + "required": false + } + ], + "modules": [], + "outputs": [], + "providers": [ + { + "name": "google", + "alias": null, + "version": "\u003e= 3.54.0" + } + ], + "requirements": [ + { + "name": "terraform", + "version": "\u003e= 0.14.5" + }, + { + "name": "google", + "version": "\u003e= 3.54.0" + } + ], + "resources": [ + { + "type": "compute_instance_group_manager", + "name": "daos_sig", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_instance_template", + "name": "daos_sig_template", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_per_instance_config", + "name": "named_instances", + "provider": "google", + "source": "hashicorp/google", + "mode": "managed", + "version": "latest", + "description": null + }, + { + "type": "compute_image", + "name": "os_image", + "provider": "google", + "source": "hashicorp/google", + "mode": "data", + "version": "latest", + "description": null + } + ] +} diff --git a/terraform/modules/daos_server/outputs.tf b/terraform/modules/daos_server/outputs.tf new file mode 100644 index 0000000..8f5bb20 --- /dev/null +++ b/terraform/modules/daos_server/outputs.tf @@ -0,0 +1,8 @@ + +output "access_points" { + description = "List of DAOS servers to use as access points" + value = local.access_points + depends_on = [ + local.access_points + ] +} diff --git a/terraform/modules/daos_server/templates/daos_agent.yml.tftpl b/terraform/modules/daos_server/templates/daos_agent.yml.tftpl new file mode 100644 index 0000000..c5c3517 --- /dev/null +++ b/terraform/modules/daos_server/templates/daos_agent.yml.tftpl @@ -0,0 +1,10 @@ +# +# DAOS agent configuration file +# + +# Management server access points +# Must have the same value for all agents and servers in a system. +access_points: ${jsonencode(access_points)} + +transport_config: + allow_insecure: true diff --git a/terraform/modules/daos_server/templates/daos_control.yml.tftpl b/terraform/modules/daos_server/templates/daos_control.yml.tftpl new file mode 100644 index 0000000..6c9b686 --- /dev/null +++ b/terraform/modules/daos_server/templates/daos_control.yml.tftpl @@ -0,0 +1,7 @@ +# +# DAOS manager (dmg) configuration file +# + +hostlist: ${jsonencode(access_points)} +transport_config: + allow_insecure: true diff --git a/images/configs/daos_server.yml b/terraform/modules/daos_server/templates/daos_server.yml.tftpl similarity index 73% rename from images/configs/daos_server.yml rename to terraform/modules/daos_server/templates/daos_server.yml.tftpl index e3bbcdc..b9937b9 100644 --- a/images/configs/daos_server.yml +++ b/terraform/modules/daos_server/templates/daos_server.yml.tftpl @@ -1,15 +1,19 @@ -access_points: [changeap] +# +# DAOS server configuration file +# + +access_points: ${jsonencode(access_points)} transport_config: allow_insecure: true provider: ofi+tcp;ofi_rxm disable_vfio: true -crt_timeout: 300 -nr_hugepages: 4096 +crt_timeout: ${crt_timeout} +nr_hugepages: ${nr_hugepages} control_log_file: /var/daos/server.log engines: - - targets: 8 + targets: ${targets} nr_xs_helpers: 0 first_core: 0 bypass_health_chk: true @@ -27,7 +31,7 @@ engines: - scm_mount: /var/daos/ram class: ram - scm_size: 100 + scm_size: ${scm_size} - class: nvme bdev_list: ["0000:00:04.0"] diff --git a/terraform/modules/daos_server/templates/daos_startup_script.tftpl b/terraform/modules/daos_server/templates/daos_startup_script.tftpl new file mode 100644 index 0000000..be000f2 --- /dev/null +++ b/terraform/modules/daos_server/templates/daos_startup_script.tftpl @@ -0,0 +1,37 @@ +#!/bin/bash + +METADATA_URL="http://metadata.google.internal/computeMetadata/v1/instance/attributes" +DAOS_SERVER_SYSTEMD_FILE="/usr/lib/systemd/system/daos_server.service" +DAOS_CONFIG_DIR="/etc/daos" +DAOS_MOUNT_DIR="/var/daos" + +fetch_attr() +{ + local attr_name=$* + curl -s ${METADATA_URL}/${attr_name} -H "Metadata-Flavor: Google" +} + +echo "BEGIN: Setting up DAOS server" + +systemctl stop daos_server + +# Create server config files +mkdir -p "${DAOS_CONFIG_DIR}" +cd "${DAOS_CONFIG_DIR}" +fetch_attr "daos_server_yaml_content" > daos_server.yml +fetch_attr "daos_control_yaml_content" > daos_control.yml +fetch_attr "daos_agent_yaml_content" > daos_agent.yml + +# Create directory for engine logs and tmpfs mount point +mkdir -p "${DAOS_MOUNT_DIR}" + +# Modify systemd script for GCP +# First, run daos_server as root since GCP does not support VFIO +sed -i "s/User=daos_server/User=root/; s/Group=daos_server/Group=root/" ${DAOS_SERVER_SYSTEMD_FILE} + +# enable daos_server in systemd (will be started automatically at boot time) +systemctl enable daos_server + +systemctl start daos_server + +echo "END: Setting up DAOS server" diff --git a/terraform/modules/daos_server/variables.tf b/terraform/modules/daos_server/variables.tf index c544f3c..1a678b4 100644 --- a/terraform/modules/daos_server/variables.tf +++ b/terraform/modules/daos_server/variables.tf @@ -133,6 +133,18 @@ variable "daos_service_account_scopes" { variable "preemptible" { description = "If preemptible instances" - default = false - type = string + default = false + type = string +} + +variable "daos_scm_size" { + description = "scm_size" + default = null + type = number +} + +variable "daos_crt_timeout" { + description = "crt_timeout" + default = null + type = number } diff --git a/tools/autodoc/cloudshell_urls.sh b/tools/autodoc/cloudshell_urls.sh new file mode 100755 index 0000000..48137a1 --- /dev/null +++ b/tools/autodoc/cloudshell_urls.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# +# File: cloudshell_urls.sh +# +# Description: +# This script will update "Open in Google Cloud Shell" in all *.md files. +# Before merging from the develop branch to main run +# +# ./cloudshell_urls.sh main +# +set -e +trap 'echo "Unexpected and unchecked error. Exiting."' ERR + +SCRIPT_NAME=$(basename "$0") +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" + +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +CURRENT_REMOTE_URL=$(git remote get-url $(git for-each-ref --format='%(upstream:short)' $(git symbolic-ref -q HEAD)|cut -d/ -f1) | sed 's|git@github.com:|https://github.com/|g' | sed 's|\.git||g') + +show_help() { + cat < + + Update all "Open in Google Cloud Shell" links in *.md files + +Options: + + [-b --branch] The branch that is set in the link. + If not provided the default branch "${BRANCH}" is used. + + [-r --repo-url] The repository URL that is set in the link. + If not provided the URL of the origin repo "${REMOTE_URL}" is used. + + [ -h --help ] Show help + +Examples: + + Set "Open in Google Cloud Shell" links before merging to main + + ${SCRIPT_NAME} --branch main --repo-url https://github.com/daos-stack/google-cloud-daos + + Set "Open in Google Cloud Shell" links when submitting a PR to the develop branch + + ${SCRIPT_NAME} --branch develop --repo-url https://github.com/daos-stack/google-cloud-daos + +EOF +} + + +opts() { + # shift will cause the script to exit if attempting to shift beyond the + # max args. So set +e to continue processing when shift errors. + set +e + while [[ $# -gt 0 ]]; do + case "$1" in + --branch|-b) + BRANCH="$2" + shift 2 + ;; + --repo-url|-r) + REMOTE_URL="${2}" + shift 2 + ;; + --help|-h) + show_help + exit 0 + ;; + --) + break + ;; + --*|-*) + log_error "ERROR: Unrecognized option '${1}'" + show_help + exit 1 + ;; + *) + log_error "ERROR: Unrecognized option '${1}'" + shift + break + ;; + esac + done + set -e + + if [[ "${BRANCH}" = "" ]] || [[ -z ${BRANCH} ]]; then + BRANCH="${CURRENT_BRANCH}" + fi + + if [[ "${REMOTE_URL}" = "" ]] || [[ -z ${REMOTE_URL} ]]; then + REMOTE_URL="${CURRENT_REMOTE_URL}" + fi +} + +update_cloud_shell_urls() { + #for mdf in $(find "${SCRIPT_DIR}/../../" -type f -name "*.md"); do + for mdf in $(grep -R -l --include "*.md" "https://console.cloud.google.com/cloudshell/open" "${SCRIPT_DIR}/../.."); do + log "Updating file: ${mdf}" + + sed -r -i "s|git_repo=[^\&]*\&|git_repo=${REMOTE_URL}\&|g" "${mdf}" + sed -r -i "s|cloudshell_git_branch=[^\&]*\&|cloudshell_git_branch=${BRANCH}\&|g" "${mdf}" + done + + grep -R --include "*.md" "https://console.cloud.google.com/cloudshell/open" "${SCRIPT_DIR}/../.." +} + +log_start() { + # shellcheck disable=SC2155,SC2183 + local line=$(printf "%80s" | tr " " "-") + if [[ -t 1 ]]; then tput setaf 14; fi + printf -- "\n%s\n" "${line}" + printf "%-78s\n\n" "Updating 'Open in Google Cloud Shell' links in *.md files" + printf "git_repo=%-78s\n" "${REMOTE_URL}" + printf "cloudshell_git_branch=%-78s\n" "${BRANCH}" + printf -- "%s\n" "${line}" + if [[ -t 1 ]]; then tput sgr0; fi +} + +log() { + # shellcheck disable=SC2155,SC2183 + local line=$(printf "%80s" | tr " " "-") + if [[ -t 1 ]]; then tput setaf 15; fi + printf "%-78s\n" "${1}" + if [[ -t 1 ]]; then tput sgr0; fi +} + + + +main() { + opts "$@" + log_start + update_cloud_shell_urls +} + +main "$@" diff --git a/tools/autodoc/terraform_docs.sh b/tools/autodoc/terraform_docs.sh new file mode 100755 index 0000000..1e5feca --- /dev/null +++ b/tools/autodoc/terraform_docs.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +index=0 +declare -a paths +for file in "$@"; do + paths[index]=$(dirname $file) + ((index += 1)) +done + +uniq_paths=$(echo "${paths[@]}" | tr ' ' '\n' | sort -u) + +for path in $uniq_paths; do + terraform-docs markdown --config .tfdocs-markdown.yaml ${path} + terraform-docs json --config .tfdocs-json.yaml ${path} +done