Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(qualification): implementing multiple starting versions #743

Merged
merged 19 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions .github/workflows/qualify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
description: "The version that should be qualified"
type: string
default: ""

# Run one qualification per commit.
# This means we can have multiple qualifications of different versions
# in parallel but only one qualification of each commit
Expand All @@ -16,14 +17,33 @@ concurrency:
cancel-in-progress: true

jobs:
setup:
runs-on:
labels: dre-runner-custom
container: ghcr.io/dfinity/dre/actions-runner:7efd87b0eac3ebd255be7efe00a3b39b0f9e9fc1
outputs:
matrix: ${{ steps.generate.outputs.output }}
steps:
- id: generate
shell: bash
run: |
sudo apt-get install -y jq
UNIQUE_VERSIONS=$(curl https://rollout-dashboard.ch1-rel1.dfinity.network/api/v1/rollouts | jq -r '.[] | select (.state != "failed") | select (.state != "complete") | .batches | to_entries[] | "\(.value)"' | jq '.subnets[].git_revision' | sort | uniq | jq -s )
echo "Will qualify starting from versions: ${UNIQUE_VERSIONS}"
echo "output=$(jq -cn --argjson versions "$UNIQUE_VERSIONS" '{version: $versions}')" >> $GITHUB_OUTPUT

qualify:
name: Qualifying ${{ matrix.version }} -> ${{ inputs.version }}
needs: setup
strategy:
matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
runs-on:
labels: dre-runner-custom
container: ghcr.io/dfinity/dre/actions-runner:7efd87b0eac3ebd255be7efe00a3b39b0f9e9fc1
steps:
- uses: actions/checkout@v4
with:
repository: 'dfinity/dre' # this needs to be specified so it can be kicked off from the ic repo
repository: "dfinity/dre" # this needs to be specified so it can be kicked off from the ic repo

- name: "🔍 Check if the version is set"
shell: bash
Expand All @@ -49,4 +69,4 @@ jobs:
run: |
mkdir -p ~/.config/dfx/identity/xnet-testing/
echo "${{ secrets.XNET_PRINCIPAL_KEY }}" > ~/.config/dfx/identity/xnet-testing/identity.pem
bazel run //rs/qualifier -- "${{ inputs.version }}"
bazel run //rs/qualifier -- "${{ inputs.version }}" --initial-versions ${{ matrix.version }}
6 changes: 5 additions & 1 deletion Cargo.Bazel.lock
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"checksum": "b595f31cbc3e94b284e51ec8b54de93639b9409006890414e0c0d18f4b917606",
"checksum": "ea5dd38d60356ea002de789a5eb67763d68cb6b2e016ed921f20e9c61a9b1d9d",
"crates": {
"actix-codec 0.5.2": {
"name": "actix-codec",
Expand Down Expand Up @@ -35361,6 +35361,10 @@
"id": "dirs 5.0.1",
"target": "dirs"
},
{
"id": "futures 0.3.30",
"target": "futures"
},
{
"id": "ic-nervous-system-common-test-keys 0.9.0",
"target": "ic_nervous_system_common_test_keys"
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion rs/cli/src/qualification/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,13 @@ impl QualificationExecutor {
step: s,
})
.collect_vec(),
step_ctx: StepCtx::new(ctx.dre_ctx, ctx.artifacts, ctx.grafana_endpoint)?,
step_ctx: StepCtx::new(
ctx.dre_ctx,
ctx.artifacts,
ctx.grafana_endpoint,
ctx.from_version.clone(),
ctx.to_version.clone(),
)?,
from_version: ctx.from_version,
to_version: ctx.to_version,
})
Expand Down
16 changes: 14 additions & 2 deletions rs/cli/src/qualification/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,18 @@ pub struct StepCtx {
log_path: Option<PathBuf>,
client: Client,
grafana_url: Option<String>,
from_version: String,
to_version: String,
}

impl StepCtx {
pub fn new(dre_ctx: DreContext, artifacts: Option<PathBuf>, grafana_url: Option<String>) -> anyhow::Result<Self> {
pub fn new(
dre_ctx: DreContext,
artifacts: Option<PathBuf>,
grafana_url: Option<String>,
from_version: String,
to_version: String,
) -> anyhow::Result<Self> {
let artifacts_of_run = artifacts.as_ref().map(|t| {
if let Err(e) = std::fs::create_dir_all(t) {
panic!("Couldn't create dir {}: {:?}", t.display(), e)
Expand All @@ -53,6 +61,8 @@ impl StepCtx {
artifacts: artifacts_of_run,
client: ClientBuilder::new().timeout(REQWEST_TIMEOUT).build()?,
grafana_url,
from_version: from_version[..6].to_string(),
to_version: to_version[..6].to_string(),
})
}

Expand Down Expand Up @@ -186,8 +196,10 @@ impl StepCtx {
fn _print_with_time(&self, message: String, add_new_line: bool) {
let current_time = Utc::now();
let formatted = format!(
"[{}]{}{}",
"[{} {} -> {}]{}{}",
current_time,
self.from_version,
self.to_version,
match add_new_line {
true => '\n',
false => ' ',
Expand Down
1 change: 1 addition & 0 deletions rs/qualifier/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ backon = { workspace = true }
chrono.workspace = true
indexmap.workspace = true
strum.workspace = true
futures.workspace = true
41 changes: 37 additions & 4 deletions rs/qualifier/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{path::PathBuf, process::Stdio, str::FromStr};
use clap::Parser;

use ic_nervous_system_common_test_keys::TEST_NEURON_1_OWNER_KEYPAIR;
use strum::Display;
use tokio::process::Command;
const TEST_NEURON_1_IDENTITY_PATH: &str = ".config/dfx/identity/test_neuron_1/identity.pem";
const XNET_TESTING_IDENTITY_PATH: &str = ".config/dfx/identity/xnet-testing/identity.pem";
Expand All @@ -13,11 +14,14 @@ pub struct Args {
/// Version to qualify
pub version_to_qualify: String,

/// Specify a version from which the qualification
/// should start. The default will be the same
/// version as the NNS
/// Specify a list of versions from which the qualification
/// should start. The default will be the same forecasted
/// versions that will endup on mainnet after the active
/// rollout is finished.
///
/// The information is gathered from https://rollout-dashboard.ch1-rel1.dfinity.network/api/v1/rollouts
#[clap(long)]
pub initial_version: Option<String>,
pub initial_versions: Option<Vec<String>>,

/// Path which contains the layout of the network to
/// be deployed. The default value will be a network
Expand All @@ -41,6 +45,35 @@ pub struct Args {
/// A range can be: `4`, `3..`, `..3, `1..3`
#[clap(long)]
pub step_range: Option<String>,

/// If there are multiple forecasted versions on the network at
/// the end of an active rollout this controls how the qualification
/// will run.
#[clap(long, default_value_t = QualificationMode::Sequential)]
pub mode: QualificationMode,
}

#[derive(Display, Clone, clap::ValueEnum)]
#[strum(serialize_all = "snake_case")]
pub enum QualificationMode {
/// Less invasive towards farm, but slower.
///
/// If default config is used this means 16 vm's
/// Each qualification is run in sequence and
/// observed time for one qualification is roughly
/// 1h 30mins, meaning that if there is more than
/// 2 beginning versions qualification can take up
/// to 5 hours to complete.
Sequential,
/// More invasive towards farm, but faster.
///
/// If the default config is used this means that
/// qualifier will spin up N amount of networks
/// where N is the number of start versions for
/// qualification. Each network (for the default config)
/// will take 16 vm's meaning that in total qualifier
/// will take 16 * N vm's.
Parallel,
}

impl Args {
Expand Down
7 changes: 3 additions & 4 deletions rs/qualifier/src/ict_util.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{path::PathBuf, process::Stdio, str::FromStr, time::Duration};
use std::{path::PathBuf, process::Stdio, time::Duration};

use itertools::Itertools;
use log::info;
Expand All @@ -19,9 +19,8 @@ const KEEPALIVE_PERIOD: Duration = Duration::from_secs(30);
const KEEPALIVE_PERIOD_ERROR: Duration = Duration::from_secs(5);
pub const FARM_BASE_URL: &str = "https://farm.dfinity.systems";

pub async fn ict(ic_git: PathBuf, config: String, token: CancellationToken, sender: Sender<Message>) -> anyhow::Result<()> {
let ic_config = PathBuf::from_str("/tmp/ic_config.json")?;
std::fs::write(&ic_config, &config)?;
pub async fn ict(ic_git: PathBuf, token: CancellationToken, sender: Sender<Message>, artifacts: PathBuf) -> anyhow::Result<()> {
let ic_config = artifacts.join("ic-config.json");

let command = "gitlab-ci/container/container-run.sh";
let args = &[
Expand Down
100 changes: 70 additions & 30 deletions rs/qualifier/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
use std::{fmt::Display, path::PathBuf, str::FromStr, time::Duration};
use std::{
fmt::Display,
path::{Path, PathBuf},
str::FromStr,
time::Duration,
};

use clap::Parser;
use cli::Args;
use futures::future::join_all;
use ict_util::ict;
use log::info;
use qualify_util::qualify;
Expand Down Expand Up @@ -33,26 +39,70 @@ async fn main() -> anyhow::Result<()> {
info!("Principal key created");

args.ensure_xnet_test_key()?;
// Take in one version and figure out what is the base version
//
// To find the initial version we could take NNS version?
let initial_version = if let Some(ref v) = args.initial_version {
v.to_string()

let initial_versions = if let Some(ref v) = args.initial_versions {
v
} else {
info!("Fetching the forcasted version of NNS which will be used as starting point");
info!("Fetching the forecasted versions from mainnet which will be used as starting point");
// Fetch the starter versions
let start_version_selector = StartVersionSelectorBuilder::new()
.with_client(ClientBuilder::new().connect_timeout(Duration::from_secs(30)))
.build()
.await?;

start_version_selector.get_forcasted_version_for_mainnet_nns()?
&start_version_selector.get_forecasted_versions_from_mainnet()?
};

info!("Initial versions that will be used: {}", initial_versions.join(","));

args.ensure_git().await?;

let artifacts = PathBuf::from_str("/tmp/qualifier-artifacts")?.join(&args.version_to_qualify);
info!("Will store artifacts in: {}", artifacts.display());
std::fs::create_dir_all(&artifacts)?;
if artifacts.exists() {
info!("Making sure artifact store is empty");
std::fs::remove_dir_all(&artifacts)?;
std::fs::create_dir(&artifacts)?;
}

info!("Qualification will run in {} mode", args.mode);
let outcomes = match args.mode {
cli::QualificationMode::Sequential => {
let mut outcomes = vec![];
for iv in initial_versions {
let current_path = &artifacts.join(format!("from-{}", iv));
if let Err(e) = std::fs::create_dir(current_path) {
outcomes.push(Err(anyhow::anyhow!(e)))
}
outcomes.push(run_qualification(&args, iv.clone(), current_path, neuron_id, &private_key_pem).await)
}
outcomes
}
cli::QualificationMode::Parallel => {
join_all(initial_versions.iter().map(|iv| async {
let current_path = &artifacts.join(format!("from-{}", iv.clone()));
if let Err(e) = std::fs::create_dir(current_path) {
return Err(anyhow::anyhow!(e));
};
run_qualification(&args, iv.clone(), current_path, neuron_id, &private_key_pem).await
}))
.await
}
};

let errs = outcomes.iter().filter(|o| o.is_err()).collect::<Vec<_>>();
if !errs.is_empty() {
anyhow::bail!("Overall qualification failed due to one or more sub-qualifications failing:\n{:?}", errs)
}

Ok(())
}

async fn run_qualification(args: &Args, initial_version: String, artifacts: &Path, neuron_id: u64, private_key_pem: &Path) -> anyhow::Result<()> {
if initial_version == args.version_to_qualify {
anyhow::bail!("Initial version and version to qualify are the same")
anyhow::bail!("Starting version and version being qualified are the same: {}", args.version_to_qualify)
}
info!("Initial version that will be used: {}", initial_version);

// Generate configuration for `ict` including the initial version
//
Expand Down Expand Up @@ -83,15 +133,13 @@ async fn main() -> anyhow::Result<()> {
"num_unassigned_nodes": 4,
"initial_version": "{}"
}}"#,
&initial_version
initial_version
);

// Validate that the string is valid json
serde_json::to_string_pretty(&serde_json::from_str::<Value>(&config)?)?
};
info!("Using configuration: \n{}", config);

args.ensure_git().await?;
info!("[{} -> {}]: Using configuration: \n{}", initial_version, args.version_to_qualify, config);

// Run ict and capture its output
//
Expand All @@ -103,33 +151,25 @@ async fn main() -> anyhow::Result<()> {
let token = CancellationToken::new();
let (sender, mut receiver) = mpsc::channel(2);

let artifacts = PathBuf::from_str("/tmp/qualifier-artifacts")?.join(&args.version_to_qualify);
info!("Will store artifacts in: {}", artifacts.display());
std::fs::create_dir_all(&artifacts)?;
if artifacts.exists() {
info!("Making sure artifact store is empty");
std::fs::remove_dir_all(&artifacts)?;
std::fs::create_dir(&artifacts)?;
}

let mut file = std::fs::File::create_new(artifacts.join("ic-config.json"))?;
writeln!(file, "{}", &config)?;
let current_network_name = format!("{}-{}", NETWORK_NAME, initial_version);

tokio::select! {
res = ict(args.ic_repo_path.clone(), config, token.clone(), sender) => res?,
res = ict(args.ic_repo_path.clone(), token.clone(), sender, artifacts.to_path_buf()) => res?,
res = qualify(
&mut receiver,
private_key_pem,
private_key_pem.to_path_buf(),
neuron_id,
NETWORK_NAME,
initial_version,
current_network_name.as_str(),
initial_version.to_owned(),
args.version_to_qualify.to_string(),
artifacts,
args.step_range
artifacts.to_path_buf(),
args.step_range.clone()
) => res?
};

info!("Finished qualifier run for: {}", args.version_to_qualify);
info!("Finished qualifier run for: {} -> {}", initial_version, args.version_to_qualify);

token.cancel();
Ok(())
Expand Down
Loading
Loading