Skip to content

Commit

Permalink
feat(recovery): [CON-1407] Add CLI option to use existing binaries fo…
Browse files Browse the repository at this point in the history
…r local recoveries (#3301)

This PR adds `ic-admin` to the guest OS & adds a CLI argument to make
the recovery tool use the local binaries from `/opt/ic/bin`.

With this, we can kick off a non-interactive, local subnet recovery via
ssh, by calling the recovery tool with the right arguments. With this
change, we should be able to create a system test for local recoveries.


To confirm that this works, I performed a non-interactive recovery on a
testnet with the following arguments:

```
/opt/ic/bin/ic-recovery \
	--nns-url http://[2602:fb2b:110:10:5025:8bff:fe14:daee]:8080 \
	--test \
	--skip-prompts \
	--use-local-binaries \
	app-subnet-recovery \
	--subnet-id c3es6-ogziq-a5elm-47c4o-vkins-7kiu4-ekgun-oaeo2-ojd2m-j26wh-eae \
	--pub-key "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFx8H9eVvkxl1n7crh/uUzqH03m8Qwlo2k5EE3MJa4Bs [email protected]" \
	--download-node 2602:fb2b:110:10:506a:22ff:fe1c:ef69 \
	--upload-method local \
	--keep-downloaded-state false
```

The subnet recovered successfully without user input.
  • Loading branch information
dist1ll authored Jan 8, 2025
1 parent 9377651 commit 41b1a2e
Show file tree
Hide file tree
Showing 11 changed files with 36 additions and 3 deletions.
1 change: 1 addition & 0 deletions ic-os/guestos/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def image_deps(mode, malicious = False):
"//publish/binaries:ic-boundary-tls": "/opt/ic/bin/ic-boundary:0755", # API boundary node binary, required by the IC protocol. The same GuestOS is used both for the replica and API boundary nodes.
"//publish/binaries:ic-consensus-pool-util": "/opt/ic/bin/ic-consensus-pool-util:0755", # May be used during recoveries to export/import consensus pool artifacts.
"//publish/binaries:ic-recovery": "/opt/ic/bin/ic-recovery:0755", # Required for performing subnet recoveries on the node directly.
"//publish/binaries:ic-admin": "/opt/ic/bin/ic-admin:0755", # Required for issuing recovery proposals directly from the node (primarily used for system tests).
"//publish/binaries:state-tool": "/opt/ic/bin/state-tool:0755", # May be used during recoveries for calculating the state hash and inspecting the state more generally.
"//publish/binaries:ic-regedit": "/opt/ic/bin/ic-regedit:0755", # May be used for inspecting and recovering the registry.
# Required by the GuestOS
Expand Down
3 changes: 3 additions & 0 deletions rs/recovery/src/args_merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ mod tests {
key_file: Some(PathBuf::from("/dir1/key_file")),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};
let args2 = RecoveryArgs {
dir: PathBuf::from("/dir2/"),
Expand All @@ -74,6 +75,7 @@ mod tests {
key_file: None,
test_mode: false,
skip_prompts: true,
use_local_binaries: false,
};

let expected = RecoveryArgs {
Expand All @@ -83,6 +85,7 @@ mod tests {
key_file: args1.key_file.clone(),
test_mode: args2.test_mode,
skip_prompts: true,
use_local_binaries: false,
};

assert_eq!(expected, merge(&logger, "test", &args1, &args2).unwrap());
Expand Down
6 changes: 6 additions & 0 deletions rs/recovery/src/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ pub struct RecoveryToolArgs {
#[clap(long)]
pub skip_prompts: bool,

/// Flag to indicate we're running recovery directly on a node, and should use
/// the locally available binaries. If this option is not set, missing binaries
/// will be downloaded.
#[clap(long)]
pub use_local_binaries: bool,

#[clap(subcommand)]
pub subcmd: Option<SubCommand>,
}
16 changes: 13 additions & 3 deletions rs/recovery/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ pub struct RecoveryArgs {
pub key_file: Option<PathBuf>,
pub test_mode: bool,
pub skip_prompts: bool,
pub use_local_binaries: bool,
}

/// The recovery struct comprises working directories for the recovery of a
Expand Down Expand Up @@ -145,13 +146,22 @@ impl Recovery {
) -> RecoveryResult<Self> {
let ssh_confirmation = !args.test_mode;
let recovery_dir = args.dir.join(RECOVERY_DIRECTORY_NAME);
let binary_dir = recovery_dir.join("binaries");
let binary_dir = if args.use_local_binaries {
PathBuf::from_str("/opt/ic/bin/").expect("bad file path string")
} else {
recovery_dir.join("binaries")
};
let data_dir = recovery_dir.join("original_data");
let work_dir = recovery_dir.join("working_dir");
let local_store_path = work_dir.join("data").join(IC_REGISTRY_LOCAL_STORE);
let nns_pem = recovery_dir.join("nns.pem");

match Recovery::create_dirs(&[&binary_dir, &data_dir, &work_dir, &local_store_path]) {
let mut to_create: Vec<&Path> = vec![&data_dir, &work_dir, &local_store_path];
if !args.use_local_binaries {
to_create.push(&binary_dir);
}

match Recovery::create_dirs(&to_create) {
Err(RecoveryError::IoError(s, err)) => match err.kind() {
ErrorKind::PermissionDenied => Err(RecoveryError::IoError(
format!(
Expand Down Expand Up @@ -180,7 +190,7 @@ impl Recovery {
wait_for_confirmation(&logger);
}

if !binary_dir.join("ic-admin").exists() {
if !args.use_local_binaries && !binary_dir.join("ic-admin").exists() {
if let Some(version) = args.replica_version {
block_on(download_binary(
&logger,
Expand Down
1 change: 1 addition & 0 deletions rs/recovery/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ fn main() {
key_file: args.key_file,
test_mode: args.test,
skip_prompts: args.skip_prompts,
use_local_binaries: args.use_local_binaries,
};

let recovery_state = cli::read_and_maybe_update_state(&logger, recovery_args, args.subcmd);
Expand Down
1 change: 1 addition & 0 deletions rs/recovery/src/recovery_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ mod tests {
key_file: Some(PathBuf::from(dir)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
},
subcommand_args: SubCommand::AppSubnetRecovery(AppSubnetRecoveryArgs {
subnet_id: fake_subnet_id(),
Expand Down
7 changes: 7 additions & 0 deletions rs/recovery/subnet_splitting/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ struct SplitArgs {
#[clap(long)]
pub skip_prompts: bool,

/// Flag to indicate we're running recovery directly on a node, and should use
/// the locally available binaries. If this option is not set, missing binaries
/// will be downloaded.
#[clap(long)]
pub use_local_binaries: bool,

#[clap(flatten)]
subnet_splitting_args: SubnetSplittingArgs,
}
Expand Down Expand Up @@ -145,6 +151,7 @@ fn do_split(args: SplitArgs, logger: Logger) -> RecoveryResult<()> {
key_file: args.key_file,
test_mode: args.test,
skip_prompts: args.skip_prompts,
use_local_binaries: args.use_local_binaries,
};

let subnet_splitting_state =
Expand Down
1 change: 1 addition & 0 deletions rs/tests/consensus/subnet_recovery/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ fn app_subnet_recovery_test(env: TestEnv, cfg: Config) {
key_file: Some(ssh_authorized_priv_keys_dir.join(SSH_USERNAME)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};

let mut unassigned_nodes = env.topology_snapshot().unassigned_nodes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ pub fn test(env: TestEnv) {
key_file: Some(ssh_authorized_priv_keys_dir.join(SSH_USERNAME)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};
let subnet_args = NNSRecoveryFailoverNodesArgs {
subnet_id: topo_broken_ic.root_subnet_id(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ pub fn test(env: TestEnv) {
key_file: Some(ssh_authorized_priv_keys_dir.join(SSH_USERNAME)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};

// unlike during a production recovery using the CLI, here we already know all of parameters
Expand Down
1 change: 1 addition & 0 deletions rs/tests/consensus/subnet_splitting_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ fn subnet_splitting_test(env: TestEnv) {
),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};

let subnet_splitting_args = SubnetSplittingArgs {
Expand Down

0 comments on commit 41b1a2e

Please sign in to comment.