Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add infrastructure to simulate the OpenHCL test failures #742

Merged
merged 13 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ use net_packet_capture::PacketCaptureParams;
use page_pool_alloc::PagePool;
use pal_async::task::Spawn;
use pal_async::task::Task;
use pal_async::timer::PolledTimer;
use parking_lot::Mutex;
use socket2::Socket;
use state_unit::SavedStateUnit;
Expand Down Expand Up @@ -70,6 +71,8 @@ use vtl2_settings_worker::handle_vtl2_config_rpc;
use vtl2_settings_worker::Vtl2ConfigNicRpc;
use vtl2_settings_worker::Vtl2SettingsWorker;

use crate::options::TestScenarioConfig;
smalis-msft marked this conversation as resolved.
Show resolved Hide resolved

#[derive(MeshPayload)]
pub enum UhVmRpc {
Pause(Rpc<(), bool>),
Expand Down Expand Up @@ -179,6 +182,7 @@ pub(crate) struct LoadedVm {
pub shared_vis_pool: Option<PagePool>,
pub private_pool: Option<PagePool>,
pub nvme_keep_alive: bool,
pub test_configuration: Option<TestScenarioConfig>,
}

pub struct LoadedVmState<T> {
Expand Down Expand Up @@ -357,7 +361,12 @@ impl LoadedVm {
capabilities_flags,
} = message;
match self
.handle_servicing_request(correlation_id, deadline, capabilities_flags)
.handle_servicing_request(
correlation_id,
deadline,
capabilities_flags,
threadpool,
)
.await
{
Ok(true) => {
Expand Down Expand Up @@ -432,12 +441,31 @@ impl LoadedVm {
correlation_id: Guid,
deadline: std::time::Instant,
capabilities_flags: SaveGuestVtl2StateFlags,
threadpool: &AffinitizedThreadpool,
) -> anyhow::Result<bool> {
if let Some(TestScenarioConfig::TestScenarioServicingSaveStuck) = self.test_configuration {
tracing::info!("Test configuration SERVICING_SAVE_STUCK is set. Waiting indefinitely.");
let mut timer = PolledTimer::new(threadpool.current_driver());
bhargavshah1988 marked this conversation as resolved.
Show resolved Hide resolved
loop {
timer.sleep(Duration::from_secs(1)).await;
}
}

let running = self.state_units.is_running();
let success = match self
.handle_servicing_inner(correlation_id, deadline, capabilities_flags)
.await
{
.and_then(|state| {
if let Some(TestScenarioConfig::TestScenarioServicingSaveFail) =
self.test_configuration
{
tracing::info!(
"Test configuration SERVICING_SAVE_FAIL is set. Failing the save."
);
return Err(anyhow::anyhow!("Simulated servicing save failure"));
}
Ok(state)
}) {
Ok(state) => {
self.get_client
.send_servicing_state(mesh::payload::encode(state))
Expand Down
1 change: 1 addition & 0 deletions openhcl/underhill_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ async fn launch_workers(
gdbstub: opt.gdbstub,
hide_isolation: opt.hide_isolation,
nvme_keep_alive: opt.nvme_keep_alive,
test_configuration: opt.test_configuration,
};

let (mut remote_console_cfg, framebuffer_access) =
Expand Down
42 changes: 42 additions & 0 deletions openhcl/underhill_core/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,32 @@

use anyhow::bail;
use anyhow::Context;
use mesh::MeshPayload;
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::path::PathBuf;

#[derive(Copy, Clone, Debug, PartialEq, MeshPayload)]
bhargavshah1988 marked this conversation as resolved.
Show resolved Hide resolved
pub enum TestScenarioConfig {
TestScenarioServicingSaveFail,
smalis-msft marked this conversation as resolved.
Show resolved Hide resolved
TestScenarioServicingRestoreStuck,
TestScenarioServicingSaveStuck,
}

impl std::str::FromStr for TestScenarioConfig {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<TestScenarioConfig, anyhow::Error> {
match s {
"SERVICING_SAVE_FAIL" => Ok(TestScenarioConfig::TestScenarioServicingSaveFail),
"SERVICING_RESTORE_STUCK" => Ok(TestScenarioConfig::TestScenarioServicingRestoreStuck),
"SERVICING_SAVE_STUCK" => Ok(TestScenarioConfig::TestScenarioServicingSaveStuck),
_ => Err(anyhow::anyhow!("Invalid test config: {}", s)),
}
}
}

// We've made our own parser here instead of using something like clap in order
// to save on compiled file size. We don't need all the features a crate can provide.
/// underhill core command-line and environment variable options.
Expand Down Expand Up @@ -119,6 +140,11 @@ pub struct Options {

/// (OPENHCL_NVME_KEEP_ALIVE=1) Enable nvme keep alive when servicing.
pub nvme_keep_alive: bool,

/// (OPENHCL_TEST_CONFIG=\<TestScenarioConfig\>)
/// Test configurations are designed to replicate specific behaviors and
/// conditions in order to simulate various test scenarios.
pub test_configuration: Option<TestScenarioConfig>,
}

impl Options {
Expand Down Expand Up @@ -149,6 +175,10 @@ impl Options {
})
};

// Reads an environment variable strings.
let parse_env_string =
|name: &str| -> Option<&OsString> { env.get::<OsStr>(name.as_ref()) };

fn parse_bool(value: Option<&OsString>) -> bool {
value
.map(|v| v.eq_ignore_ascii_case("true") || v == "1")
Expand Down Expand Up @@ -203,6 +233,17 @@ impl Options {
let gdbstub = parse_legacy_env_bool("OPENHCL_GDBSTUB");
let gdbstub_port = parse_legacy_env_number("OPENHCL_GDBSTUB_PORT")?.map(|x| x as u32);
let nvme_keep_alive = parse_env_bool("OPENHCL_NVME_KEEP_ALIVE");
let test_configuration = parse_env_string("OPENHCL_TEST_CONFIG").and_then(|x| {
x.to_string_lossy()
.parse::<TestScenarioConfig>()
.inspect_err(|e| {
bhargavshah1988 marked this conversation as resolved.
Show resolved Hide resolved
tracing::warn!(
"Failed to parse OPENHCL_TEST_CONFIG: {}. No test will be simulated.",
e
)
})
.ok()
});

let mut args = std::env::args().chain(extra_args);
// Skip our own filename.
Expand Down Expand Up @@ -257,6 +298,7 @@ impl Options {
halt_on_guest_halt,
no_sidecar_hotplug,
nvme_keep_alive,
test_configuration,
})
}

Expand Down
19 changes: 19 additions & 0 deletions openhcl/underhill_core/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ use openhcl_attestation_protocol::igvm_attest::get::runtime_claims::AttestationV
use page_pool_alloc::PagePool;
use pal_async::local::LocalDriver;
use pal_async::task::Spawn;
use pal_async::timer::PolledTimer;
use pal_async::DefaultDriver;
use pal_async::DefaultPool;
use parking_lot::Mutex;
Expand Down Expand Up @@ -169,6 +170,8 @@ use vmotherboard::BaseChipsetBuilderOutput;
use vmotherboard::ChipsetDeviceHandle;
use zerocopy::FromZeroes;

use crate::options::TestScenarioConfig;

pub(crate) const PM_BASE: u16 = 0x400;
pub(crate) const SYSTEM_IRQ_ACPI: u32 = 9;
pub(crate) const WDAT_PORT: u16 = 0x30;
Expand Down Expand Up @@ -300,6 +303,9 @@ pub struct UnderhillEnvCfg {
pub hide_isolation: bool,
/// Enable nvme keep alive.
pub nvme_keep_alive: bool,

/// test configuration
pub test_configuration: Option<TestScenarioConfig>,
}

/// Bundle of config + runtime objects for hooking into the underhill remote
Expand Down Expand Up @@ -494,6 +500,18 @@ impl UnderhillVmWorker {
"cannot have saved state from two different sources"
);

if let Some(TestScenarioConfig::TestScenarioServicingRestoreStuck) =
params.env_cfg.test_configuration
{
tracing::info!(
"Test configuration SERVICING_RESTORE_STUCK is set. Waiting indefinitely in restore"
bhargavshah1988 marked this conversation as resolved.
Show resolved Hide resolved
);
let mut timer = PolledTimer::new(&early_init_driver);
bhargavshah1988 marked this conversation as resolved.
Show resolved Hide resolved
loop {
timer.sleep(Duration::from_secs(1)).await;
}
}

tracing::info!("VTL2 restart, getting servicing state from the host");

let saved_state_buf = get_client
Expand Down Expand Up @@ -3066,6 +3084,7 @@ async fn new_underhill_vm(
shared_vis_pool: shared_vis_pages_pool,
private_pool,
nvme_keep_alive: env_cfg.nvme_keep_alive,
test_configuration: env_cfg.test_configuration,
};

Ok(loaded_vm)
Expand Down
Loading