Skip to content

Commit

Permalink
feat(qualification): implementing multiple starting versions (#743)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: sa-github-api <138766536+sa-github-api@users.noreply.github.com>
  • Loading branch information
3 people committed Aug 20, 2024
1 parent a948a92 commit 6b7626b
Show file tree
Hide file tree
Showing 10 changed files with 180 additions and 49 deletions.
24 changes: 22 additions & 2 deletions .github/workflows/qualify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
description: "The version that should be qualified"
type: string
default: ""

# Run one qualification per commit.
# This means we can have multiple qualifications of different versions
# in parallel but only one qualification of each commit
Expand All @@ -16,14 +17,33 @@ concurrency:
cancel-in-progress: true

jobs:
setup:
runs-on:
labels: dre-runner-custom
container: ghcr.io/dfinity/dre/actions-runner:7efd87b0eac3ebd255be7efe00a3b39b0f9e9fc1
outputs:
matrix: ${{ steps.generate.outputs.output }}
steps:
- id: generate
shell: bash
run: |
sudo apt-get install -y jq
UNIQUE_VERSIONS=$(curl https://rollout-dashboard.ch1-rel1.dfinity.network/api/v1/rollouts | jq -r '.[] | select (.state != "failed") | select (.state != "complete") | .batches | to_entries[] | "\(.value)"' | jq '.subnets[].git_revision' | sort | uniq | jq -s )
echo "Will qualify starting from versions: ${UNIQUE_VERSIONS}"
echo "output=$(jq -cn --argjson versions "$UNIQUE_VERSIONS" '{version: $versions}')" >> $GITHUB_OUTPUT
qualify:
name: Qualifying ${{ matrix.version }} -> ${{ inputs.version }}
needs: setup
strategy:
matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
runs-on:
labels: dre-runner-custom
container: ghcr.io/dfinity/dre/actions-runner:7efd87b0eac3ebd255be7efe00a3b39b0f9e9fc1
steps:
- uses: actions/checkout@v4
with:
repository: 'dfinity/dre' # this needs to be specified so it can be kicked off from the ic repo
repository: "dfinity/dre" # this needs to be specified so it can be kicked off from the ic repo

- name: "🔍 Check if the version is set"
shell: bash
Expand All @@ -49,4 +69,4 @@ jobs:
run: |
mkdir -p ~/.config/dfx/identity/xnet-testing/
echo "${{ secrets.XNET_PRINCIPAL_KEY }}" > ~/.config/dfx/identity/xnet-testing/identity.pem
bazel run //rs/qualifier -- "${{ inputs.version }}"
bazel run //rs/qualifier -- "${{ inputs.version }}" --initial-versions ${{ matrix.version }}
6 changes: 5 additions & 1 deletion Cargo.Bazel.lock
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"checksum": "b595f31cbc3e94b284e51ec8b54de93639b9409006890414e0c0d18f4b917606",
"checksum": "ea5dd38d60356ea002de789a5eb67763d68cb6b2e016ed921f20e9c61a9b1d9d",
"crates": {
"actix-codec 0.5.2": {
"name": "actix-codec",
Expand Down Expand Up @@ -35361,6 +35361,10 @@
"id": "dirs 5.0.1",
"target": "dirs"
},
{
"id": "futures 0.3.30",
"target": "futures"
},
{
"id": "ic-nervous-system-common-test-keys 0.9.0",
"target": "ic_nervous_system_common_test_keys"
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion rs/cli/src/qualification/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,13 @@ impl QualificationExecutor {
step: s,
})
.collect_vec(),
step_ctx: StepCtx::new(ctx.dre_ctx, ctx.artifacts, ctx.grafana_endpoint)?,
step_ctx: StepCtx::new(
ctx.dre_ctx,
ctx.artifacts,
ctx.grafana_endpoint,
ctx.from_version.clone(),
ctx.to_version.clone(),
)?,
from_version: ctx.from_version,
to_version: ctx.to_version,
})
Expand Down
16 changes: 14 additions & 2 deletions rs/cli/src/qualification/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,18 @@ pub struct StepCtx {
log_path: Option<PathBuf>,
client: Client,
grafana_url: Option<String>,
from_version: String,
to_version: String,
}

impl StepCtx {
pub fn new(dre_ctx: DreContext, artifacts: Option<PathBuf>, grafana_url: Option<String>) -> anyhow::Result<Self> {
pub fn new(
dre_ctx: DreContext,
artifacts: Option<PathBuf>,
grafana_url: Option<String>,
from_version: String,
to_version: String,
) -> anyhow::Result<Self> {
let artifacts_of_run = artifacts.as_ref().map(|t| {
if let Err(e) = std::fs::create_dir_all(t) {
panic!("Couldn't create dir {}: {:?}", t.display(), e)
Expand All @@ -53,6 +61,8 @@ impl StepCtx {
artifacts: artifacts_of_run,
client: ClientBuilder::new().timeout(REQWEST_TIMEOUT).build()?,
grafana_url,
from_version: from_version[..6].to_string(),
to_version: to_version[..6].to_string(),
})
}

Expand Down Expand Up @@ -186,8 +196,10 @@ impl StepCtx {
fn _print_with_time(&self, message: String, add_new_line: bool) {
let current_time = Utc::now();
let formatted = format!(
"[{}]{}{}",
"[{} {} -> {}]{}{}",
current_time,
self.from_version,
self.to_version,
match add_new_line {
true => '\n',
false => ' ',
Expand Down
1 change: 1 addition & 0 deletions rs/qualifier/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ backon = { workspace = true }
chrono.workspace = true
indexmap.workspace = true
strum.workspace = true
futures.workspace = true
41 changes: 37 additions & 4 deletions rs/qualifier/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{path::PathBuf, process::Stdio, str::FromStr};
use clap::Parser;

use ic_nervous_system_common_test_keys::TEST_NEURON_1_OWNER_KEYPAIR;
use strum::Display;
use tokio::process::Command;
const TEST_NEURON_1_IDENTITY_PATH: &str = ".config/dfx/identity/test_neuron_1/identity.pem";
const XNET_TESTING_IDENTITY_PATH: &str = ".config/dfx/identity/xnet-testing/identity.pem";
Expand All @@ -13,11 +14,14 @@ pub struct Args {
/// Version to qualify
pub version_to_qualify: String,

/// Specify a version from which the qualification
/// should start. The default will be the same
/// version as the NNS
/// Specify a list of versions from which the qualification
/// should start. The default will be the same forecasted
/// versions that will endup on mainnet after the active
/// rollout is finished.
///
/// The information is gathered from https://rollout-dashboard.ch1-rel1.dfinity.network/api/v1/rollouts
#[clap(long)]
pub initial_version: Option<String>,
pub initial_versions: Option<Vec<String>>,

/// Path which contains the layout of the network to
/// be deployed. The default value will be a network
Expand All @@ -41,6 +45,35 @@ pub struct Args {
/// A range can be: `4`, `3..`, `..3, `1..3`
#[clap(long)]
pub step_range: Option<String>,

/// If there are multiple forecasted versions on the network at
/// the end of an active rollout this controls how the qualification
/// will run.
#[clap(long, default_value_t = QualificationMode::Sequential)]
pub mode: QualificationMode,
}

#[derive(Display, Clone, clap::ValueEnum)]
#[strum(serialize_all = "snake_case")]
pub enum QualificationMode {
/// Less invasive towards farm, but slower.
///
/// If default config is used this means 16 vm's
/// Each qualification is run in sequence and
/// observed time for one qualification is roughly
/// 1h 30mins, meaning that if there is more than
/// 2 beginning versions qualification can take up
/// to 5 hours to complete.
Sequential,
/// More invasive towards farm, but faster.
///
/// If the default config is used this means that
/// qualifier will spin up N amount of networks
/// where N is the number of start versions for
/// qualification. Each network (for the default config)
/// will take 16 vm's meaning that in total qualifier
/// will take 16 * N vm's.
Parallel,
}

impl Args {
Expand Down
7 changes: 3 additions & 4 deletions rs/qualifier/src/ict_util.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{path::PathBuf, process::Stdio, str::FromStr, time::Duration};
use std::{path::PathBuf, process::Stdio, time::Duration};

use itertools::Itertools;
use log::info;
Expand All @@ -19,9 +19,8 @@ const KEEPALIVE_PERIOD: Duration = Duration::from_secs(30);
const KEEPALIVE_PERIOD_ERROR: Duration = Duration::from_secs(5);
pub const FARM_BASE_URL: &str = "https://farm.dfinity.systems";

pub async fn ict(ic_git: PathBuf, config: String, token: CancellationToken, sender: Sender<Message>) -> anyhow::Result<()> {
let ic_config = PathBuf::from_str("/tmp/ic_config.json")?;
std::fs::write(&ic_config, &config)?;
pub async fn ict(ic_git: PathBuf, token: CancellationToken, sender: Sender<Message>, artifacts: PathBuf) -> anyhow::Result<()> {
let ic_config = artifacts.join("ic-config.json");

let command = "gitlab-ci/container/container-run.sh";
let args = &[
Expand Down
100 changes: 70 additions & 30 deletions rs/qualifier/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
use std::{fmt::Display, path::PathBuf, str::FromStr, time::Duration};
use std::{
fmt::Display,
path::{Path, PathBuf},
str::FromStr,
time::Duration,
};

use clap::Parser;
use cli::Args;
use futures::future::join_all;
use ict_util::ict;
use log::info;
use qualify_util::qualify;
Expand Down Expand Up @@ -33,26 +39,70 @@ async fn main() -> anyhow::Result<()> {
info!("Principal key created");

args.ensure_xnet_test_key()?;
// Take in one version and figure out what is the base version
//
// To find the initial version we could take NNS version?
let initial_version = if let Some(ref v) = args.initial_version {
v.to_string()

let initial_versions = if let Some(ref v) = args.initial_versions {
v
} else {
info!("Fetching the forcasted version of NNS which will be used as starting point");
info!("Fetching the forecasted versions from mainnet which will be used as starting point");
// Fetch the starter versions
let start_version_selector = StartVersionSelectorBuilder::new()
.with_client(ClientBuilder::new().connect_timeout(Duration::from_secs(30)))
.build()
.await?;

start_version_selector.get_forcasted_version_for_mainnet_nns()?
&start_version_selector.get_forecasted_versions_from_mainnet()?
};

info!("Initial versions that will be used: {}", initial_versions.join(","));

args.ensure_git().await?;

let artifacts = PathBuf::from_str("/tmp/qualifier-artifacts")?.join(&args.version_to_qualify);
info!("Will store artifacts in: {}", artifacts.display());
std::fs::create_dir_all(&artifacts)?;
if artifacts.exists() {
info!("Making sure artifact store is empty");
std::fs::remove_dir_all(&artifacts)?;
std::fs::create_dir(&artifacts)?;
}

info!("Qualification will run in {} mode", args.mode);
let outcomes = match args.mode {
cli::QualificationMode::Sequential => {
let mut outcomes = vec![];
for iv in initial_versions {
let current_path = &artifacts.join(format!("from-{}", iv));
if let Err(e) = std::fs::create_dir(current_path) {
outcomes.push(Err(anyhow::anyhow!(e)))
}
outcomes.push(run_qualification(&args, iv.clone(), current_path, neuron_id, &private_key_pem).await)
}
outcomes
}
cli::QualificationMode::Parallel => {
join_all(initial_versions.iter().map(|iv| async {
let current_path = &artifacts.join(format!("from-{}", iv.clone()));
if let Err(e) = std::fs::create_dir(current_path) {
return Err(anyhow::anyhow!(e));
};
run_qualification(&args, iv.clone(), current_path, neuron_id, &private_key_pem).await
}))
.await
}
};

let errs = outcomes.iter().filter(|o| o.is_err()).collect::<Vec<_>>();
if !errs.is_empty() {
anyhow::bail!("Overall qualification failed due to one or more sub-qualifications failing:\n{:?}", errs)
}

Ok(())
}

async fn run_qualification(args: &Args, initial_version: String, artifacts: &Path, neuron_id: u64, private_key_pem: &Path) -> anyhow::Result<()> {
if initial_version == args.version_to_qualify {
anyhow::bail!("Initial version and version to qualify are the same")
anyhow::bail!("Starting version and version being qualified are the same: {}", args.version_to_qualify)
}
info!("Initial version that will be used: {}", initial_version);

// Generate configuration for `ict` including the initial version
//
Expand Down Expand Up @@ -83,15 +133,13 @@ async fn main() -> anyhow::Result<()> {
"num_unassigned_nodes": 4,
"initial_version": "{}"
}}"#,
&initial_version
initial_version
);

// Validate that the string is valid json
serde_json::to_string_pretty(&serde_json::from_str::<Value>(&config)?)?
};
info!("Using configuration: \n{}", config);

args.ensure_git().await?;
info!("[{} -> {}]: Using configuration: \n{}", initial_version, args.version_to_qualify, config);

// Run ict and capture its output
//
Expand All @@ -103,33 +151,25 @@ async fn main() -> anyhow::Result<()> {
let token = CancellationToken::new();
let (sender, mut receiver) = mpsc::channel(2);

let artifacts = PathBuf::from_str("/tmp/qualifier-artifacts")?.join(&args.version_to_qualify);
info!("Will store artifacts in: {}", artifacts.display());
std::fs::create_dir_all(&artifacts)?;
if artifacts.exists() {
info!("Making sure artifact store is empty");
std::fs::remove_dir_all(&artifacts)?;
std::fs::create_dir(&artifacts)?;
}

let mut file = std::fs::File::create_new(artifacts.join("ic-config.json"))?;
writeln!(file, "{}", &config)?;
let current_network_name = format!("{}-{}", NETWORK_NAME, initial_version);

tokio::select! {
res = ict(args.ic_repo_path.clone(), config, token.clone(), sender) => res?,
res = ict(args.ic_repo_path.clone(), token.clone(), sender, artifacts.to_path_buf()) => res?,
res = qualify(
&mut receiver,
private_key_pem,
private_key_pem.to_path_buf(),
neuron_id,
NETWORK_NAME,
initial_version,
current_network_name.as_str(),
initial_version.to_owned(),
args.version_to_qualify.to_string(),
artifacts,
args.step_range
artifacts.to_path_buf(),
args.step_range.clone()
) => res?
};

info!("Finished qualifier run for: {}", args.version_to_qualify);
info!("Finished qualifier run for: {} -> {}", initial_version, args.version_to_qualify);

token.cancel();
Ok(())
Expand Down
Loading

0 comments on commit 6b7626b

Please sign in to comment.