From ffed030a0def3d596fc110f7c2f40a2c4a1ded55 Mon Sep 17 00:00:00 2001 From: sakethanne Date: Mon, 30 Sep 2024 16:10:25 -0700 Subject: [PATCH 1/3] fix(lint): fixed the lint errors Signed-off-by: sakethanne --- ansible/bios.yml | 82 ++++++++------- ansible/f5certs.yml | 44 ++++---- ansible/firmware.yml | 243 +++++++++++++++++++++++++++++++------------ ansible/setup.yml | 153 +++++++++++++++++++++------ ansible/site.yml | 20 ++-- ansible/switches.yml | 13 ++- 6 files changed, 380 insertions(+), 175 deletions(-) diff --git a/ansible/bios.yml b/ansible/bios.yml index ccf15e4..d908049 100644 --- a/ansible/bios.yml +++ b/ansible/bios.yml @@ -1,10 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) 2022 Dell Inc, or its subsidiaries. --- - -- name: +- name: Run BIOS Configuration hosts: hostbmcs - become: yes + become: true tasks: - name: Get Firmware Inventory community.general.redfish_info: @@ -16,7 +15,8 @@ register: result - name: Debug print first firmware entry version - ansible.builtin.debug: var=result.redfish_facts.firmware.entries[0].Version + ansible.builtin.debug: + var: result.redfish_facts.firmware.entries[0].Version - name: Get BIOS attributes community.general.redfish_info: @@ -28,64 +28,66 @@ register: result - name: Debug print bios serial number - ansible.builtin.debug: msg={{ result.redfish_facts.bios_attribute.entries[0][1].SerialNumber | default(result.redfish_facts.bios_attribute.entries[0][1].SystemServiceTag) }} - + ansible.builtin.debug: + msg: > + {{ result.redfish_facts.bios_attribute.entries[0][1].SerialNumber + | default(result.redfish_facts.bios_attribute.entries[0][1].SystemServiceTag) }} # TODO: configre BIOS to be always on ( see lab/hardware/dh123) and any virtualization or hyper threading settings we might need - - name: Debug print bios attributes - ansible.builtin.debug: msg={{ result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer }} + ansible.builtin.debug: + msg: "{{ result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer }}" when: result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer is defined # Updating Bios attributes in host BMCs - name: Set BIOS attributes community.general.redfish_config: - category: Systems - resource_id: "{{ resource_id }}" - command: SetBiosAttributes - bios_attributes: "{{ bios_attributes }}" - baseuri: "{{ ansible_host }}" - username: "{{ ansible_user }}" - password: "{{ ansible_password }}" + category: Systems + resource_id: "{{ resource_id }}" + command: SetBiosAttributes + bios_attributes: "{{ bios_attributes }}" + baseuri: "{{ ansible_host }}" + username: "{{ ansible_user }}" + password: "{{ ansible_password }}" register: bios_attribute # DELL iDRAC ONLY: Updating BIOS settings requires creating a configuration job # to schedule the BIOS update, so comment out below for non-Dell systems. - name: Create BIOS configuration job (schedule BIOS setting update) - when: - - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer is defined - - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer == "Dell Inc." + when: + - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer is defined + - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer == "Dell Inc." - bios_attribute.changed community.general.idrac_redfish_command: - category: Systems - command: CreateBiosConfigJob - baseuri: "{{ ansible_host }}" - username: "{{ ansible_user }}" - password: "{{ ansible_password }}" + category: Systems + command: CreateBiosConfigJob + baseuri: "{{ ansible_host }}" + username: "{{ ansible_user }}" + password: "{{ ansible_password }}" register: bios_config_job - name: Reboot iDRAC systems to apply new BIOS settings - when: - - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer is defined - - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer == "Dell Inc." + when: + - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer is defined + - result.redfish_facts.bios_attribute.entries[0][1].SystemManufacturer == "Dell Inc." - bios_config_job.changed community.general.redfish_command: - category: Systems - command: PowerReboot - resource_id: "{{ resource_id }}" - baseuri: "{{ ansible_host }}" - username: "{{ ansible_user }}" - password: "{{ ansible_password }}" + category: Systems + command: PowerReboot + resource_id: "{{ resource_id }}" + baseuri: "{{ ansible_host }}" + username: "{{ ansible_user }}" + password: "{{ ansible_password }}" - # TODO: Merge two reboot tasks into one. find identifier for iLO in redfish bios attributes + # TODO: Merge two reboot tasks into one. find identifier for iLO in redfish bios attributes - name: Reboot iLO systems to apply new BIOS settings - when: + when: - bios_attribute.changed - inventory_hostname == 'dh2bmc' or inventory_hostname == 'dh3bmc' community.general.redfish_command: - category: Systems - command: PowerReboot - resource_id: "{{ resource_id }}" - baseuri: "{{ ansible_host }}" - username: "{{ ansible_user }}" - password: "{{ ansible_password }}" + category: Systems + command: PowerReboot + resource_id: "{{ resource_id }}" + baseuri: "{{ ansible_host }}" + username: "{{ ansible_user }}" + password: "{{ ansible_password }}" diff --git a/ansible/f5certs.yml b/ansible/f5certs.yml index ed5063b..92c86a6 100644 --- a/ansible/f5certs.yml +++ b/ansible/f5certs.yml @@ -11,27 +11,27 @@ ssl_key_location: "/etc/letsencrypt/live/{{domain_name}}/privkey.pem" tasks: - - name: Setup provider - ansible.builtin.set_fact: - provider: - server: "{{ ansible_host }}" - user: "{{ ansible_user }}" - password: "{{ ansible_password }}" - server_port: "{{ server_port }}" - no_f5_teem: yes - validate_certs: "no" + - name: Setup provider + ansible.builtin.set_fact: + provider: + server: "{{ ansible_host }}" + user: "{{ ansible_user }}" + password: "{{ ansible_password }}" + server_port: "{{ server_port }}" + no_f5_teem: true + validate_certs: "no" -#SSL Upload and Modification of VIP to use New Certificate - - name: Upload New SSL cert upload - f5networks.f5_modules.bigip_ssl_certificate: - provider: "{{ provider }}" - name: "{{ cert_list_name }}" - content: "{{ lookup('file', ssl_cert_location ) }}" - delegate_to: localhost + # SSL Upload and Modification of VIP to use New Certificate + - name: Upload New SSL cert upload + f5networks.f5_modules.bigip_ssl_certificate: + provider: "{{ provider }}" + name: "{{ cert_list_name }}" + content: "{{ lookup('file', ssl_cert_location) }}" + delegate_to: localhost - - name: Upload New SSL key upload - f5networks.f5_modules.bigip_ssl_key: - provider: "{{ provider }}" - name: "{{ cert_list_name }}" - content: "{{ lookup('file', ssl_key_location ) }}" - delegate_to: localhost + - name: Upload New SSL key upload + f5networks.f5_modules.bigip_ssl_key: + provider: "{{ provider }}" + name: "{{ cert_list_name }}" + content: "{{ lookup('file', ssl_key_location) }}" + delegate_to: localhost diff --git a/ansible/firmware.yml b/ansible/firmware.yml index 5fef9fd..f986736 100644 --- a/ansible/firmware.yml +++ b/ansible/firmware.yml @@ -1,40 +1,68 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) 2022 Dell Inc, or its subsidiaries. ---- - name: Intel | Upgrade Intel Mev IMC and ACC FWs hosts: mev - become: yes + become: true vars: - imc_version: 1.4.0.8469 - imc_local_file: /root/intel-ipu-pldm-image-{{ imc_version }}.tar.gz - imc_remote_file: /work/intel-ipu-pldm-image-{{ imc_version }}/intel-ipu-pldm-{{ imc_version }}.bin - acc_version: "{{ imc_version }}" - acc_local_file: /root/intel-ipu-acc-eval-image-{{ acc_version }}.tar.gz - acc_remote_file: /work/intel-ipu-acc-eval-image-{{ acc_version }}/ACC/OS/acc-os-kernel.bin - ansible_remote_tmp: /tmp + imc_version: 1.4.0.8469 + imc_local_file: /root/intel-ipu-pldm-image-{{ imc_version }}.tar.gz + imc_remote_file: /work/intel-ipu-pldm-image-{{ imc_version }}/intel-ipu-pldm-{{ imc_version }}.bin + acc_version: "{{ imc_version }}" + acc_local_file: /root/intel-ipu-acc-eval-image-{{ acc_version }}.tar.gz + acc_remote_file: /work/intel-ipu-acc-eval-image-{{ acc_version }}/ACC/OS/acc-os-kernel.bin + ansible_remote_tmp: /tmp tasks: - - name: Fetch Intel Mev ACC running version block: - - ansible.builtin.shell: cat /etc/issue - - ansible.builtin.shell: cat /etc/issue.net + - name: Retrieve /etc/issue + ansible.builtin.command: cat /etc/issue + changed_when: false + + - name: Retrieve /etc/issue.net + ansible.builtin.command: cat /etc/issue.net register: result - - ansible.builtin.set_fact: acc_run_version={{ result.stdout | trim }} - - ansible.builtin.debug: var=acc_run_version - - ansible.builtin.fail: msg="Could not find ACC string in the running {{ acc_run_version }}" + changed_when: false + + - name: Set ACC running version + ansible.builtin.set_fact: + acc_run_version: "{{ result.stdout | trim }}" + - name: Debug ACC running version + ansible.builtin.debug: + var: acc_run_version + + - name: Fail if ACC string not found + ansible.builtin.fail: + msg: "Could not find ACC string in the running {{ acc_run_version }}" when: not '"ACC" in acc_run_version' - name: Fetch Intel Mev IMC running version delegate_to: mevbmc block: - - ansible.builtin.shell: cat /etc/issue - - ansible.builtin.shell: cat /etc/issue.net + - name: Retrieve /etc/issue + ansible.builtin.command: cat /etc/issue + changed_when: false + + - name: Retrieve /etc/issue.net + ansible.builtin.command: cat /etc/issue.net register: result - - ansible.builtin.set_fact: imc_run_version={{ result.stdout | trim }} - - ansible.builtin.debug: var=imc_run_version - - ansible.builtin.shell: /usr/bin/ipu-update -i - - ansible.builtin.fail: msg="Could not find IMC string in the running {{ imc_run_version }}" + changed_when: false + + - name: Set IMC running version + ansible.builtin.set_fact: + imc_run_version: "{{ result.stdout | trim }}" + + - name: Debug IMC running version + ansible.builtin.debug: + var: imc_run_version + + - name: Update IMC using ipu-update + ansible.builtin.command: /usr/bin/ipu-update -i + changed_when: false + + - name: Fail if IMC string not found + ansible.builtin.fail: + msg: "Could not find IMC string in the running {{ imc_run_version }}" when: not '"IMC" in imc_run_version' - name: Upgrade Intel Mev IMC FW to {{ imc_version }} @@ -42,25 +70,30 @@ delegate_to: mevbmc block: - name: Check if firmware image exists remotely {{ imc_remote_file }} - ansible.builtin.stat: path={{ imc_remote_file }} + ansible.builtin.stat: + path: "{{ imc_remote_file }}" register: imc_remote_file_check # FW file doesn't exist, copy and unpack it - - name: Copy and Extract {{ imc_local_file }} into remote /work folder + - name: Copy and Extract the version into remote /work folder {{ imc_local_file }} when: not imc_remote_file_check.stat.exists block: - name: Check if firmware image exists locally {{ imc_local_file }} delegate_to: localhost - ansible.builtin.stat: path={{ imc_local_file }} + ansible.builtin.stat: + path: "{{ imc_local_file }}" register: imc_local_file_check - - name: Copy and Unpack {{ imc_local_file }} into remote /work folder + - name: Copy and Unpack into remote /work folder {{ imc_local_file }} + ansible.builtin.unarchive: + src: "{{ imc_local_file }}" + dest: /work when: imc_local_file_check.stat.exists - ansible.builtin.unarchive: src={{ imc_local_file }} dest=/work - - name: Check again if firmware image exists remotely {{ imc_remote_file }} after copy and unpack - ansible.builtin.stat: path={{ imc_remote_file }} + - name: Check again if firmware image exists remotely after copy and unpack {{ imc_remote_file }} + ansible.builtin.stat: + path: "{{ imc_remote_file }}" register: imc_remote_file_check # FW file exists, use it to start upgrade @@ -68,10 +101,17 @@ - name: Start upgrade Intel Mev IMC FW using existing {{ imc_remote_file }} when: imc_remote_file_check.stat.exists block: - - ansible.builtin.shell: /usr/bin/ipu-update -i {{ imc_remote_file }} + - name: Execute ipu-update with IMC firmware + ansible.builtin.command: /usr/bin/ipu-update -i "{{ imc_remote_file }}" + changed_when: false # TODO: remove echo below - - ansible.builtin.shell: echo /usr/bin/ipu-update -u {{ imc_remote_file }} - - ansible.builtin.shell: echo reboot + - name: Show output of Execute ipu-update with IMC firmware + ansible.builtin.command: echo /usr/bin/ipu-update -u "{{ imc_remote_file }}" + changed_when: false + + - name: Reboot device after upgrade (simulate) + ansible.builtin.command: echo reboot + changed_when: false # TODO: now update ACC using /usr/bin/imc-scripts/acc_os_partition_provision.sh @@ -80,25 +120,30 @@ delegate_to: mevbmc block: - name: Check if firmware image exists remotely {{ acc_remote_file }} - ansible.builtin.stat: path={{ acc_remote_file }} + ansible.builtin.stat: + path: "{{ acc_remote_file }}" register: acc_remote_file_check # FW file doesn't exist, copy and unpack it - - name: Copy and Extract {{ acc_local_file }} into remote /work folder + - name: Copy and Extract into remote /work folder {{ acc_local_file }} when: not acc_remote_file_check.stat.exists block: - name: Check if firmware image exists locally {{ acc_local_file }} delegate_to: localhost - ansible.builtin.stat: path={{ acc_local_file }} + ansible.builtin.stat: + path: "{{ acc_local_file }}" register: acc_local_file_check - - name: Copy and Unpack {{ acc_local_file }} into remote /work folder + - name: Copy and Unpack into remote /work folder {{ acc_local_file }} when: acc_local_file_check.stat.exists - ansible.builtin.unarchive: src={{ acc_local_file }} dest=/work + ansible.builtin.unarchive: + src: "{{ acc_local_file }}" + dest: /work - - name: Check again if firmware image exists remotely {{ acc_remote_file }} after copy and unpack - ansible.builtin.stat: path={{ acc_remote_file }} + - name: Check again if firmware image exists remotely after copy and unpack {{ acc_remote_file }} + ansible.builtin.stat: + path: "{{ acc_remote_file }}" register: acc_remote_file_check # FW file exists, use it to start upgrade @@ -107,73 +152,137 @@ when: acc_remote_file_check.stat.exists block: # TODO: remove echo below - - ansible.builtin.shell: ls /usr/bin/imc-scripts/acc_os_partition_provision.sh {{ acc_remote_file }} + - name: List the files in the below directory + ansible.builtin.command: ls /usr/bin/imc-scripts/acc_os_partition_provision.sh {{ acc_remote_file }} + changed_when: false -- name: +- name: Nvidia | Upgrade Nvidia BF2 FWs hosts: bf2 - become: yes + become: true vars: - bfb_local_file: /root/bf-bundle-2.7.0-33_24.04_ubuntu-22.04_prod.bfb - bfb_url: https://content.mellanox.com/BlueField/BFBs/Ubuntu22.04/{{ bfb_local_file | basename }} + bfb_local_file: /root/bf-bundle-2.7.0-33_24.04_ubuntu-22.04_prod.bfb + bfb_url: https://content.mellanox.com/BlueField/BFBs/Ubuntu22.04/{{ bfb_local_file | basename }} tasks: - name: Nvidia | Fetch BlueField runnikng version block: - - ansible.builtin.shell: cat /etc/mlnx-release + - name: Fetch Nvidia BlueField running version + ansible.builtin.command: cat /etc/mlnx-release register: result - - ansible.builtin.set_fact: bf_version={{ result.stdout | trim }} - - ansible.builtin.debug: var=bf_version + changed_when: false + + - name: Extract Version from the result variable + ansible.builtin.set_fact: + bf_version: "{{ result.stdout | trim }}" + + - name: Display BlueField running version + ansible.builtin.debug: + var: bf_version - name: Nvidia | Update BlueField using BFB image from the Host delegate_to: dh2 block: - - ansible.builtin.shell: lspci | grep BlueField + # lint: disable=risky-shell-pipe + - name: Check if BlueField PCI device is present + ansible.builtin.shell: + cmd: set -o pipefail && lspci | grep BlueField + executable: /bin/bash + changed_when: false # TODO: for f in $( dpkg --list | grep doca | awk '{print $2}' ); do echo $f ; apt remove --purge $f -y ; done # TODO: sudo apt-get autoremove # TODO: wget https://developer.nvidia.com/networking/secure/doca-sdk/doca_2.7/doca-host_2.7.0-204000-24.04-ubuntu2404_amd64.deb # TODO: sudo dpkg -i doca-host-repo-ubuntu_amd64.deb # TODO: sudo apt-get update # TODO: sudo apt install -y doca-runtime doca-sdk - - ansible.builtin.service: name=rshim enabled=yes state=started - - ansible.builtin.shell: cat /dev/rshim0/misc - - ansible.builtin.shell: ifconfig tmfifo_net0 192.168.100.1/30 up + - name: Start rshim service + ansible.builtin.service: + name: rshim + enabled: true + state: started + + - name: Show the rshim settings + ansible.builtin.command: cat /dev/rshim0/misc + changed_when: false + + - name: Configure network for BlueField device + ansible.builtin.command: ifconfig tmfifo_net0 192.168.100.1/30 up + changed_when: false - name: Check if firmware image exists locally {{ bfb_local_file }} - ansible.builtin.stat: path={{ bfb_local_file }} + ansible.builtin.stat: + path: "{{ bfb_local_file }}" register: bfb_local_file_check - name: Download firmware image {{ bfb_url }} - ansible.builtin.get_url: url={{ bfb_url }} dest={{ bfb_local_file }} mode='0440' + ansible.builtin.get_url: + url: "{{ bfb_url }}" + dest: "{{ bfb_local_file }}" + mode: "0440" when: not bfb_local_file_check.stat.exists # TODO: handle ubuntu bfb cfg password - # TODO: ansible.builtin.shell: "bfb-install --bfb {{ local_bfb }} --config /tmp/bf.cfg --rshim {{ rshim.dev }}" - - name: Flush BFB image and reboot BF card - ansible.builtin.shell: cat {{ bfb_local_file }} > /dev/rshim0/boot + ansible.builtin.command: cat {{ bfb_local_file }} > /dev/rshim0/boot when: not hostvars.bf2.bf_version in bfb_local_file + changed_when: false # from https://www.kernel.org/doc/html/v5.8/networking/device_drivers/pensando/ionic.html - name: Pensando DSC installation-update hosts: dh1 - become: yes + become: true tasks: - - ansible.builtin.shell: lspci -d 1dd8:1002 - - ansible.builtin.shell: lspci | grep Pensando - - ansible.builtin.shell: dmesg | grep ionic - - ansible.builtin.shell: ls -l /sys/class/net/*/device - - ansible.builtin.shell: ls -l /sys/class/net/*/device/driver - - ansible.builtin.shell: ethtool -i enp25s0np0 - - ansible.builtin.shell: devlink dev info pci/0000:19:00.0 + - name: Check for Pensando device in PCI list + ansible.builtin.command: lspci -d 1dd8:1002 + changed_when: false + + - name: Grep for Pensando devices list + ansible.builtin.shell: + cmd: set -o pipefail && lspci | grep Pensando + executable: /bin/bash + changed_when: false + + - name: Check for Pensando device in kernel messages + ansible.builtin.shell: + cmd: set -o pipefail && dmesg | grep ionic + executable: /bin/bash + changed_when: false + + - name: Check for Pensando device in network devices1 + ansible.builtin.shell: ls -l /sys/class/net/*/device + changed_when: false + + - name: Check for Pensando device in network devices2 + ansible.builtin.shell: ls -l /sys/class/net/*/device/driver + changed_when: false + + - name: Fetch network device driver details1 + ansible.builtin.command: ethtool -i enp25s0np0 + changed_when: false + + - name: Fetch network device driver details2 + ansible.builtin.command: devlink dev info pci/0000:19:00.0 + changed_when: false # TODO: update FW for Pensando DSC - name: Marvell CN106 installation-update hosts: dh3 - become: yes + become: true tasks: - - ansible.builtin.shell: lspci | grep Cavium - - ansible.builtin.shell: lsusb | grep CP2105 - - ansible.builtin.shell: ls -l /dev/ttyUSB* + - name: Check for Marvell device in PCI list + ansible.builtin.shell: + cmd: set -o pipefail && lspci | grep Cavium + executable: /bin/bash + changed_when: false + + - name: Check for Marvell device in USB list + ansible.builtin.shell: ls -l /dev/ttyUSB* + changed_when: false + + - name: Check for Marvell device in PCI list2 + ansible.builtin.shell: + cmd: set -o pipefail && lsusb | grep CP2105 + executable: /bin/bash + changed_when: false # TODO: update FW for Marvell CN106 diff --git a/ansible/setup.yml b/ansible/setup.yml index 4d2624a..2e54299 100644 --- a/ansible/setup.yml +++ b/ansible/setup.yml @@ -1,65 +1,149 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) 2022 Dell Inc, or its subsidiaries. --- - -- name: +- name: Sync all clocks hosts: all gather_facts: false tasks: - - name: Sync all clocks - ansible.builtin.shell: "date -s '{{ now() }}'" - - - # TODO: consider flushing OS image (we use ubuntu currently) on the Host x86 servers, since we do it manually will now + - name: Set the system time + ansible.builtin.command: "date -s '{{ now() }}'" + changed_when: false + # TODO: Consider automating the OS image flushing process for Host x86 servers (currently done manually). - name: Intel MEV | Enable Proxy and Port Forwarding hosts: mevbmc - become: yes + become: true gather_facts: false tasks: - - ansible.builtin.package: name=socat,squid state=present - - ansible.builtin.copy: src=../hardware/dh4/fs/etc/systemd/system/socat-otel.service dest=/etc/systemd/system/socat-otel.service - - ansible.builtin.service: name=socat-otel enabled=yes state=started daemon_reload=yes - - ansible.builtin.shell: sed -i 's/http_access deny all/http_access allow all/g' /etc/squid/squid.conf - - ansible.builtin.service: name=squid enabled=yes state=started + - name: Ensure socat and squid are installed + ansible.builtin.package: + name: + - socat + - squid + state: present + + - name: Copy socat-otel service file + ansible.builtin.copy: + src: ../hardware/dh4/fs/etc/systemd/system/socat-otel.service + dest: /etc/systemd/system/socat-otel.service + mode: "0755" -- name: Install docker and other packages + - name: Enable and start socat-otel service + ansible.builtin.service: + name: socat-otel + enabled: true + state: started + daemon_reload: true + + - name: Update squid configuration to allow all access + ansible.builtin.lineinfile: + path: /etc/squid/squid.conf + regexp: "^http_access deny all$" + line: "http_access allow all" + backup: true + + - name: Enable and start squid service + ansible.builtin.service: + name: squid + enabled: true + state: started + +- name: Install Docker and other packages hosts: hostservers,tgens,DPUs,Management - become: yes + become: true environment: "{{ proxy_env | default({}) }}" tasks: - - ansible.builtin.package: state=present name=python3-pip,sshpass,git - - when: ansible_os_family == 'Debian' + - name: Ensure required packages are installed + ansible.builtin.package: + state: present + name: + - python3-pip + - sshpass + - git + + - name: Add Docker GPG key and repository for Debian-based systems + when: ansible_os_family == 'Debian' block: - - ansible.builtin.get_url: url=https://download.docker.com/linux/ubuntu/gpg dest=/etc/apt/keyrings/docker.asc mode='0644' force=true - - ansible.builtin.shell: dpkg --print-architecture + - name: Download Docker GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + force: true + + - name: Get architecture + ansible.builtin.command: dpkg --print-architecture register: deb_architecture - - ansible.builtin.apt_repository: state=present repo="deb [arch={{ deb_architecture.stdout}} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" - - ansible.builtin.package: state=absent name=docker.io,docker-doc,docker-compose,docker-compose-v2,podman-docker,containerd,runc - - ansible.builtin.package: state=present name=docker-ce,docker-ce-cli,containerd.io,docker-buildx-plugin,docker-compose-plugin - - ansible.builtin.systemd: state=started name=docker + changed_when: false + + - name: Add Docker repository + ansible.builtin.apt_repository: + state: present + repo: > + deb [arch={{ deb_architecture.stdout }} signed-by=/etc/apt/keyrings/docker.asc] + https://download.docker.com/linux/ubuntu + {{ ansible_distribution_release }} stable + + - name: Remove conflicting Docker packages + ansible.builtin.package: + state: absent + name: + - docker.io + - docker-doc + - docker-compose + - docker-compose-v2 + - podman-docker + - containerd + - runc + + - name: Install Docker and related packages + ansible.builtin.package: + state: present + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin -- name: + - name: Ensure Docker service is started + ansible.builtin.systemd: + state: started + name: docker + +- name: Run Monitoring OTEL, Prometheus, Grafana hosts: Management - become: yes + become: true tasks: - - name: Run Monitoring OTEL, Prometheus, Grafana + - name: Start Docker Compose project community.docker.docker_compose_v2: project_src: /root/opi-poc/lab register: output - name: Secure Zero Touch Provisioning hosts: DPUs - become: yes + become: true tasks: - # TODO: enable this - - name: Nvidia | Fix dhcp client config and then run sztp agent container + # TODO: Enable this when ready + - name: Nvidia | Fix DHCP client config and run SZTP agent container when: inventory_hostname == 'bf123' block: - - ansible.builtin.lineinfile: backup=true path=/etc/dhcp/dhclient.conf insertbefore='^option(.*)code(.*)$' line='option sztp-redirect-urls code 143 = text;' - - ansible.builtin.lineinfile: backup=true path=/etc/dhcp/dhclient.conf regexp='^(request .*)' line='\g<1> sztp-redirect-urls,' - - name: Nvidia | Run sztp agent container on Nvidia BF + - name: Update dhclient configuration + ansible.builtin.lineinfile: + backup: true + path: /etc/dhcp/dhclient.conf + insertbefore: "^option(.*)code(.*)$" + line: "option sztp-redirect-urls code 143 = text;" + + - name: Add sztp-redirect-urls to DHCP request + ansible.builtin.lineinfile: + backup: true + path: /etc/dhcp/dhclient.conf + regexp: "^(request .*)" + line: '\g<1> sztp-redirect-urls,' + + - name: Run SZTP agent container on Nvidia BF community.docker.docker_container: name: sztp image: ghcr.io/opiproject/opi-sztp-client:main @@ -73,4 +157,7 @@ source: /var/lib/NetworkManager/dhclient-aa93b667-6aac-3804-91e9-4958e07fdb2f-oob_net0.lease target: /var/lib/dhclient/dhclient.leases read_only: true - command: /opi-sztp-agent daemon --bootstrap-trust-anchor-cert /mnt/opi.pem --device-end-entity-cert /mnt/opi_cert.pem --device-private-key /mnt/opi_private_key.pem + command: > + /opi-sztp-agent daemon --bootstrap-trust-anchor-cert /mnt/opi.pem + --device-end-entity-cert /mnt/opi_cert.pem + --device-private-key /mnt/opi_private_key.pem diff --git a/ansible/site.yml b/ansible/site.yml index f5221a5..7575c3b 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -1,10 +1,18 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) 2022 Dell Inc, or its subsidiaries. --- - # file: site.yml -- import_playbook: bios.yml -- import_playbook: setup.yml -- import_playbook: switches.yml -- import_playbook: monitoring.yml -- import_playbook: firmware.yml +- name: Import BIOS Playbook + import_playbook: bios.yml + +- name: Import Setup Playbook + import_playbook: setup.yml + +- name: Import Switches Playbook + import_playbook: switches.yml + +- name: Import Monitoring Playbook + import_playbook: monitoring.yml + +- name: Import Firmware Playbook + import_playbook: firmware.yml diff --git a/ansible/switches.yml b/ansible/switches.yml index dcc8ca4..43eec85 100644 --- a/ansible/switches.yml +++ b/ansible/switches.yml @@ -1,24 +1,23 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) 2022 Dell Inc, or its subsidiaries. --- - -- name: +- name: Get the switches configuration hosts: switches gather_facts: false tasks: - - name: run show version on remote devices - #become: yes + - name: Run show version on remote devices + # become: true arista.eos.eos_command: commands: - show version - show interfaces - #- show running-config + # - show running-config # TODO: send lab/hardware/A100G/arista.config to the switch # TODO: send lab/hardware/ToR/arista.config to the switch - #- name: load config file onto an Arista switch - # become: yes + # - name: load config file onto an Arista switch + # become: true # arista.eos.eos_config: # backup: true # src: ../hardware/{{ inventory_hostname }}/arista.config From e4d241f9b9f05a5b40939974f9133d0ade2aa042 Mon Sep 17 00:00:00 2001 From: sakethanne Date: Mon, 30 Sep 2024 16:28:18 -0700 Subject: [PATCH 2/3] fix(telegraf): merged telegraf code for all hosts Signed-off-by: sakethanne --- ansible/monitoring.yml | 117 ++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/ansible/monitoring.yml b/ansible/monitoring.yml index 0322588..b2c44c7 100644 --- a/ansible/monitoring.yml +++ b/ansible/monitoring.yml @@ -1,78 +1,51 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) 2022 Dell Inc, or its subsidiaries. ---- - name: Monitoring - # Management server runs it via compose, see below. So skip it here hosts: hostservers,tgens,DPUs - become: yes + become: true vars: - bmc_vars: "{{ hostvars[inventory_hostname+'bmc'] }}" + bmc_vars: "{{ hostvars[inventory_hostname+'bmc'] }}" tasks: - - - name: Copy telegraf folder to remote - ansible.builtin.copy: src=../telegraf.d dest=/root + - name: Copy telegraf folder to remote folder + ansible.builtin.copy: + src: ../telegraf.d + dest: /root + mode: "0755" - name: Remove arista config file - ansible.builtin.file: state=absent path=/root/telegraf.d/arista.conf - - # TODO: create new telegraf container or use same for Marvell card + ansible.builtin.file: + path: /root/telegraf.d/arista.conf + state: absent - # TODO: see if there is an opportunity to consolidate and code dup removal - - - name: Nvidia | telegraf otel monitoring + - name: Nvidia | Run additional Nvidia specific tasks when: inventory_hostname == 'bf2' block: - name: Nvidia | make sure emulation is running for temperature - ansible.builtin.systemd: state=started name=set_emu_param - - ansible.builtin.systemd: state=stopped name=mlnx_snap - - ansible.builtin.systemd: state=started name=spdk_tgt - - name: Nvidia | Run telegraf container on Nvidia BF - community.docker.docker_container: - name: telegraf - image: docker.io/library/telegraf:1.31 + ansible.builtin.systemd: + name: set_emu_param state: started - restart: true - detach: true - network_mode: host - restart_policy: always - mounts: - - type: bind - source: /root/telegraf.d/telegraf.conf.bf2 - target: /etc/telegraf/telegraf.conf - read_only: true - - type: bind - source: /run/emu_param - target: /run/emu_param - read_only: true - # TODO: see if there is an opportunity to consolidate and code dup removal + - name: Nvidia | Stop mlx_snap service + ansible.builtin.systemd: + name: mlnx_snap + state: stopped + + - name: Nvidia | Start telegraf service + ansible.builtin.systemd: + name: spdk_tgt + state: started - - name: Intel | telegraf otel monitoring + - name: Intel | Set proxy environment and downgrade requests package due to bug when: inventory_hostname == 'mev' environment: "{{ proxy_env | default({}) }}" block: - name: Intel | Downgrade requests package due to bug https://github.com/ansible-collections/community.docker/issues/868 - ansible.builtin.pip: name=requests<2.32 - - name: Intel | Run telegraf container on Intel MEV - community.docker.docker_container: - name: telegraf - image: docker.io/library/telegraf:1.31 - state: started - restart: true - detach: true - network_mode: host - restart_policy: always - mounts: - - type: bind - source: /root/telegraf.d/telegraf.conf.mev - target: /etc/telegraf/telegraf.conf - read_only: true + ansible.builtin.pip: + name: requests + version: "<2.32" - - name: Run telegraf container on others - when: - - inventory_hostname != 'mev' - - inventory_hostname != 'bf2' + - name: Run telegraf container on all hosts community.docker.docker_container: name: telegraf image: docker.io/library/telegraf:1.31 @@ -81,11 +54,37 @@ detach: true network_mode: host restart_policy: always - mounts: - - type: bind - source: /root/telegraf.d - target: /etc/telegraf/telegraf.d - read_only: true + mounts: > + {{ + [ + { + 'type': 'bind', + 'source': inventory_hostname in ['bf2', 'mev'] and '/root/telegraf.d/telegraf.conf.inventory_hostname' or + '/root/telegraf.d', + 'target': inventory_hostname in ['bf2', 'mev'] and '/etc/telegraf/telegraf.conf' or + '/etc/telegraf/telegraf.d', + 'read_only': True + } + ] + + ( + [ + { + 'type': 'bind', + 'source': '/run/emu_param', + 'target': '/run/emu_param', + 'read_only': True + } + ] + if inventory_hostname == 'bf2' else [] + ) + }} + + - name: Set telegraf container parameters on dh1, dh2, dh3, dh4 + when: + - inventory_hostname != 'mev' + - inventory_hostname != 'bf2' + community.docker.docker_container: + name: telegraf env: REDFISH_HOST: "{{ bmc_vars.ansible_host }}" REDFISH_USER: "{{ bmc_vars.ansible_user }}" From ac81233748fb99f23c3804877c35e84d713d61a4 Mon Sep 17 00:00:00 2001 From: sakethanne Date: Mon, 30 Sep 2024 16:46:22 -0700 Subject: [PATCH 3/3] docs(ansible): added ansible coding guidelines and best practices Signed-off-by: sakethanne --- ansible/Ansible_Guidelines.md | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 ansible/Ansible_Guidelines.md diff --git a/ansible/Ansible_Guidelines.md b/ansible/Ansible_Guidelines.md new file mode 100644 index 0000000..60b5428 --- /dev/null +++ b/ansible/Ansible_Guidelines.md @@ -0,0 +1,55 @@ +# Ansible Best Practices and Guidelines + +===================================== + +This files helps you with the guidelines and the best practices that you can follow, when working on the OPI Ansible scripts. Also go through the guidelines stated on contributing for the OPI Lab repository for a general overview and the GitHub process associated with it. + +## Table of Contents + +--- + +1. [General Best Practices](#general-best-practices) +2. [Project Structure](#project-structure) +3. [Playbooks](#playbooks) +4. [Code Readability and Style](#code-readability-and-style) +5. [Additional Tips](#additional-tips) + +## General Best Practices + +--- + +Always test you code before raising a Pull Request and use version control and follow coding standards. Avoid unnecessary complexity and use Ansible's built-in features. + +## Project Structure + +--- + +This shall be updated soon with the new folder structure that we will be implementing. + +- **Organize playbooks**: Use a clear directory structure for playbooks and roles. +- **Use top-level playbooks**: Create top-level playbooks that orchestrate other playbooks, example: site.yml (currently orchestrates all the other playbooks in the repository). +- **Separate configuration and deployment**: Use separate playbooks for configuration and deployment tasks. + +## Playbooks + +--- + +- **Use descriptive variable names**: Use clear and descriptive names for variables. +- **Avoid duplicating code**: Try to re-use the same code with different conditional variables like `when` to avoid code duplication. +- **Follow module development guidelines**: Adhere to Ansible's module development best practices. +- **Avoid Ansible-lint errors**: Once you have completed the development of a script, please run `ansible-lint` on your playbook and make sure there are no linting errors. +- **Use native modules**: Prefer native Ansible modules over the `command` module. +- **Avoid complex module options**: Keep module options simple and focused, and always check the official ansible documentation for help on the module options here [Ansible documentation](https://docs.ansible.com/ansible/latest/index.html) + +## Code Readability and Style + +--- + +- **Add comments**: Include comments to explain the purpose of tasks and variables. + +## Additional Tips + +--- + +- **Use blocks for critical changes**: Use block syntax to enable rollbacks and output data for critical changes. +- **Check and validate**: Use `--check` and `--diff` to validate changes also while running the playbooks on Semaphore before applying them.