diff --git a/.github/workflows-source/schedule-daily.yml b/.github/workflows-source/schedule-daily.yml index 8f7b1bca5b6..710315506f9 100644 --- a/.github/workflows-source/schedule-daily.yml +++ b/.github/workflows-source/schedule-daily.yml @@ -83,6 +83,7 @@ jobs: --file_share_ssh_key "$(realpath file2)" \ --inject_image_pub_key "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK3gjE/2K5nxIBbk3ohgs8J5LW+XiObwA+kGtSaF5+4c" \ --file_share_username ci_interim \ + --hsm \ --ci_mode # Run bare metal node performance benchmarks diff --git a/.github/workflows/schedule-daily.yml b/.github/workflows/schedule-daily.yml index 0eb36e09d05..2e6b7de6672 100644 --- a/.github/workflows/schedule-daily.yml +++ b/.github/workflows/schedule-daily.yml @@ -49,6 +49,7 @@ jobs: --file_share_ssh_key "$(realpath file2)" \ --inject_image_pub_key "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK3gjE/2K5nxIBbk3ohgs8J5LW+XiObwA+kGtSaF5+4c" \ --file_share_username ci_interim \ + --hsm \ --ci_mode # Run bare metal node performance benchmarks diff --git a/ic-os/dev-tools/bare_metal_deployment/deploy.py b/ic-os/dev-tools/bare_metal_deployment/deploy.py index 00255039e48..eec705da060 100755 --- a/ic-os/dev-tools/bare_metal_deployment/deploy.py +++ b/ic-os/dev-tools/bare_metal_deployment/deploy.py @@ -115,6 +115,9 @@ class Args: # Run benchmarks if True benchmark: bool = flag(default=False) + # Check HSM capability if True + hsm: bool = flag(default=False) + # Path to the benchmark_driver script. benchmark_driver_script: Optional[str] = "./benchmark_driver.sh" @@ -266,6 +269,21 @@ def check_guestos_metrics_version(ip_address: IPv6Address, timeout_secs: int) -> return True +def check_guestos_hsm_capability(ip_address: IPv6Address, ssh_key_file: Optional[str] = None) -> bool: + # Check that the HSM is working correctly, over an SSH session with the node. + ssh_key_arg = f"-i {ssh_key_file}" if ssh_key_file else "" + ssh_opts = "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + result = invoke.run( + f"ssh {ssh_opts} {ssh_key_arg} admin@{ip_address} '/opt/ic/bin/vsock_guest --attach-hsm && sleep 5 && pkcs11-tool --list-slots | grep \"Nitrokey HSM\"'", + warn=True, + ) + if not result or not result.ok: + return False + + log.info("HSM check success.") + return True + + def wait(wait_secs: int) -> bool: time.sleep(wait_secs) return False @@ -323,7 +341,13 @@ def configure_process_local_log(server_id: str): log.add(sys.stderr, format=logger_format) -def deploy_server(bmc_info: BMCInfo, wait_time_mins: int, idrac_script_dir: Path): +def deploy_server( + bmc_info: BMCInfo, + wait_time_mins: int, + idrac_script_dir: Path, + file_share_ssh_key: Optional[str] = None, + check_hsm: bool = False, +): # Partially applied function for brevity run_func = functools.partial(run_script, idrac_script_dir, bmc_info) @@ -382,9 +406,14 @@ def wait_func() -> bool: def check_connectivity_func() -> bool: assert bmc_info.guestos_ipv6_address is not None, "Logic error" - return check_guestos_ping_connectivity( - bmc_info.guestos_ipv6_address, timeout_secs - ) and check_guestos_metrics_version(bmc_info.guestos_ipv6_address, timeout_secs) + + result = check_guestos_ping_connectivity(bmc_info.guestos_ipv6_address, timeout_secs) + result = result and check_guestos_metrics_version(bmc_info.guestos_ipv6_address, timeout_secs) + + if check_hsm: + result = result and check_guestos_hsm_capability(bmc_info.guestos_ipv6_address, file_share_ssh_key) + + return result iterate_func = check_connectivity_func if bmc_info.guestos_ipv6_address else wait_func @@ -420,10 +449,17 @@ def check_connectivity_func() -> bool: return e.args[0] -def boot_images(bmc_infos: List[BMCInfo], parallelism: int, wait_time_mins: int, idrac_script_dir: Path): +def boot_images( + bmc_infos: List[BMCInfo], + parallelism: int, + wait_time_mins: int, + idrac_script_dir: Path, + file_share_ssh_key: Optional[str] = None, + check_hsm: bool = False, +): results: List[OperationResult] = [] - arg_tuples = ((bmc_info, wait_time_mins, idrac_script_dir) for bmc_info in bmc_infos) + arg_tuples = ((bmc_info, wait_time_mins, idrac_script_dir, file_share_ssh_key, check_hsm) for bmc_info in bmc_infos) with Pool(parallelism) as p: results = p.starmap(deploy_server, arg_tuples) @@ -682,7 +718,12 @@ def main(): wait_time_mins = args.wait_time parallelism = args.parallel success = boot_images( - bmc_infos=bmc_infos, parallelism=parallelism, wait_time_mins=wait_time_mins, idrac_script_dir=idrac_script_dir + bmc_infos=bmc_infos, + parallelism=parallelism, + wait_time_mins=wait_time_mins, + idrac_script_dir=idrac_script_dir, + file_share_ssh_key=args.file_share_ssh_key, + check_hsm=args.hsm, ) if not success: