Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add infrastructure to simulate the OpenHCL test failures #742

Merged
merged 13 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use self::vtl2_settings_worker::DeviceInterfaces;
use crate::emuplat::netvsp::RuntimeSavedState;
use crate::emuplat::EmuplatServicing;
use crate::nvme_manager::NvmeManager;
use crate::options::TestScenarioConfig;
use crate::reference_time::ReferenceTime;
use crate::servicing;
use crate::servicing::NvmeSavedState;
Expand Down Expand Up @@ -179,6 +180,7 @@ pub(crate) struct LoadedVm {
pub shared_vis_pool: Option<PagePool>,
pub private_pool: Option<PagePool>,
pub nvme_keep_alive: bool,
pub test_configuration: Option<TestScenarioConfig>,
}

pub struct LoadedVmState<T> {
Expand Down Expand Up @@ -433,11 +435,24 @@ impl LoadedVm {
deadline: std::time::Instant,
capabilities_flags: SaveGuestVtl2StateFlags,
) -> anyhow::Result<bool> {
if let Some(TestScenarioConfig::SaveStuck) = self.test_configuration {
tracing::info!("Test configuration SERVICING_SAVE_STUCK is set. Waiting indefinitely.");
std::future::pending::<()>().await;
}

let running = self.state_units.is_running();
let success = match self
.handle_servicing_inner(correlation_id, deadline, capabilities_flags)
.await
{
.and_then(|state| {
if let Some(TestScenarioConfig::SaveFail) = self.test_configuration {
tracing::info!(
"Test configuration SERVICING_SAVE_FAIL is set. Failing the save."
);
return Err(anyhow::anyhow!("Simulated servicing save failure"));
}
Ok(state)
}) {
Ok(state) => {
self.get_client
.send_servicing_state(mesh::payload::encode(state))
Expand Down
1 change: 1 addition & 0 deletions openhcl/underhill_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ async fn launch_workers(
gdbstub: opt.gdbstub,
hide_isolation: opt.hide_isolation,
nvme_keep_alive: opt.nvme_keep_alive,
test_configuration: opt.test_configuration,
};

let (mut remote_console_cfg, framebuffer_access) =
Expand Down
42 changes: 42 additions & 0 deletions openhcl/underhill_core/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,32 @@

use anyhow::bail;
use anyhow::Context;
use mesh::MeshPayload;
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::path::PathBuf;

#[derive(Clone, Debug, MeshPayload)]
pub enum TestScenarioConfig {
SaveFail,
RestoreStuck,
SaveStuck,
}

impl std::str::FromStr for TestScenarioConfig {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<TestScenarioConfig, anyhow::Error> {
match s {
"SERVICING_SAVE_FAIL" => Ok(TestScenarioConfig::SaveFail),
"SERVICING_RESTORE_STUCK" => Ok(TestScenarioConfig::RestoreStuck),
"SERVICING_SAVE_STUCK" => Ok(TestScenarioConfig::SaveStuck),
_ => Err(anyhow::anyhow!("Invalid test config: {}", s)),
}
}
}

// We've made our own parser here instead of using something like clap in order
// to save on compiled file size. We don't need all the features a crate can provide.
/// underhill core command-line and environment variable options.
Expand Down Expand Up @@ -119,6 +140,11 @@ pub struct Options {

/// (OPENHCL_NVME_KEEP_ALIVE=1) Enable nvme keep alive when servicing.
pub nvme_keep_alive: bool,

/// (OPENHCL_TEST_CONFIG=\<TestScenarioConfig\>)
/// Test configurations are designed to replicate specific behaviors and
/// conditions in order to simulate various test scenarios.
pub test_configuration: Option<TestScenarioConfig>,
}

impl Options {
Expand Down Expand Up @@ -149,6 +175,10 @@ impl Options {
})
};

// Reads an environment variable strings.
let parse_env_string =
|name: &str| -> Option<&OsString> { env.get::<OsStr>(name.as_ref()) };

fn parse_bool(value: Option<&OsString>) -> bool {
value
.map(|v| v.eq_ignore_ascii_case("true") || v == "1")
Expand Down Expand Up @@ -203,6 +233,17 @@ impl Options {
let gdbstub = parse_legacy_env_bool("OPENHCL_GDBSTUB");
let gdbstub_port = parse_legacy_env_number("OPENHCL_GDBSTUB_PORT")?.map(|x| x as u32);
let nvme_keep_alive = parse_env_bool("OPENHCL_NVME_KEEP_ALIVE");
let test_configuration = parse_env_string("OPENHCL_TEST_CONFIG").and_then(|x| {
x.to_string_lossy()
.parse::<TestScenarioConfig>()
.map_err(|e| {
tracing::warn!(
"Failed to parse OPENHCL_TEST_CONFIG: {}. No test will be simulated.",
e
)
})
.ok()
});

let mut args = std::env::args().chain(extra_args);
// Skip our own filename.
Expand Down Expand Up @@ -257,6 +298,7 @@ impl Options {
halt_on_guest_halt,
no_sidecar_hotplug,
nvme_keep_alive,
test_configuration,
})
}

Expand Down
13 changes: 13 additions & 0 deletions openhcl/underhill_core/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use crate::loader::LoadKind;
use crate::nvme_manager::NvmeDiskConfig;
use crate::nvme_manager::NvmeDiskResolver;
use crate::nvme_manager::NvmeManager;
use crate::options::TestScenarioConfig;
use crate::reference_time::ReferenceTime;
use crate::servicing;
use crate::servicing::transposed::OptionServicingInitState;
Expand Down Expand Up @@ -104,6 +105,7 @@ use state_unit::SpawnedUnit;
use state_unit::StateUnits;
use std::collections::HashMap;
use std::ffi::CString;
use std::future;
use std::sync::Arc;
use std::thread::JoinHandle;
use std::time::Duration;
Expand Down Expand Up @@ -300,6 +302,9 @@ pub struct UnderhillEnvCfg {
pub hide_isolation: bool,
/// Enable nvme keep alive.
pub nvme_keep_alive: bool,

/// test configuration
pub test_configuration: Option<TestScenarioConfig>,
}

/// Bundle of config + runtime objects for hooking into the underhill remote
Expand Down Expand Up @@ -494,6 +499,13 @@ impl UnderhillVmWorker {
"cannot have saved state from two different sources"
);

if let Some(TestScenarioConfig::RestoreStuck) = params.env_cfg.test_configuration {
tracing::info!(
"Test configuration SERVICING_RESTORE_STUCK is set. Waiting indefinitely in restore."
);
future::pending::<()>().await;
}

tracing::info!("VTL2 restart, getting servicing state from the host");

let saved_state_buf = get_client
Expand Down Expand Up @@ -3066,6 +3078,7 @@ async fn new_underhill_vm(
shared_vis_pool: shared_vis_pages_pool,
private_pool,
nvme_keep_alive: env_cfg.nvme_keep_alive,
test_configuration: env_cfg.test_configuration,
};

Ok(loaded_vm)
Expand Down