From 48c5d32c727d2f79b6f5382d2d4688b095e23d14 Mon Sep 17 00:00:00 2001 From: BGluth Date: Wed, 17 Jan 2024 21:33:28 -0700 Subject: [PATCH 01/11] Clippy fixes and removed some warnings --- src/nibbles.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/nibbles.rs b/src/nibbles.rs index b463feb..c4b166c 100644 --- a/src/nibbles.rs +++ b/src/nibbles.rs @@ -607,6 +607,9 @@ impl Nibbles { let hex_string_raw = hex_encode_f(&byte_buf[(64 - count_bytes)..64]); let hex_char_iter_raw = hex_string_raw.chars(); + + // We need this skip to make both match arms have the same type. + #[allow(clippy::iter_skip_zero)] let mut hex_string = String::from("0x"); match is_even(self.count) { false => hex_string.extend(hex_char_iter_raw.skip(1)), From 9d5e1ed63c258fc3e9d71cf08b730676600087bb Mon Sep 17 00:00:00 2001 From: BGluth Date: Thu, 1 Feb 2024 16:56:56 -0700 Subject: [PATCH 02/11] Initial work on trie stats --- src/debug_tools/mod.rs | 1 + src/debug_tools/stats.rs | 112 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 src/debug_tools/stats.rs diff --git a/src/debug_tools/mod.rs b/src/debug_tools/mod.rs index c05eb26..8e25c6a 100644 --- a/src/debug_tools/mod.rs +++ b/src/debug_tools/mod.rs @@ -4,3 +4,4 @@ pub mod common; pub mod diff; pub mod query; +pub mod stats; diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs new file mode 100644 index 0000000..073ece8 --- /dev/null +++ b/src/debug_tools/stats.rs @@ -0,0 +1,112 @@ +use crate::partial_trie::{Node, PartialTrie}; + +#[derive(Debug, Default)] +pub struct TrieStats { + pub counts: NodeCounts, + pub depth_stats: DepthStats, +} + +#[derive(Debug, Default)] +pub struct NodeCounts { + empty: usize, + hash: usize, + branch: usize, + extension: usize, + leaf: usize, +} + +impl NodeCounts { + pub fn total_nodes(&self) -> usize { + self.empty + self.total_node_non_empty() + } + + pub fn total_node_non_empty(&self) -> usize { + self.branch + self.extension + self.hash_and_leaf_node_count() + } + + pub fn hash_and_leaf_node_count(&self) -> usize { + self.hash + self.leaf + } + + pub fn compare(&self, _other: &Self) -> TrieComparison { + todo!() + } +} + +#[derive(Debug, Default)] +pub struct TrieComparison {} + +#[derive(Debug, Default)] +struct CurrTrackingState { + counts: NodeCounts, + leaf_and_hash_depth_sum: u64, + lowest_depth: usize, +} + +impl CurrTrackingState { + fn update_lowest_depth_if_larger(&mut self, curr_depth: usize) { + if self.lowest_depth > curr_depth { + self.lowest_depth = curr_depth; + } + } +} + +/// Depth in terms of node depth (not key length). +#[derive(Debug, Default)] +pub struct DepthStats { + pub lowest_depth: usize, + pub avg_leaf_depth: f32, +} + +pub fn get_trie_stats(trie: &T) -> TrieStats { + let mut state = CurrTrackingState::default(); + + get_trie_stats_rec(trie, &mut state, 0); + + let depth_stats = DepthStats { + lowest_depth: state.lowest_depth, + avg_leaf_depth: state.leaf_and_hash_depth_sum as f32 + / state.counts.hash_and_leaf_node_count() as f32, + }; + + TrieStats { + counts: state.counts, + depth_stats, + } +} + +fn get_trie_stats_rec( + node: &Node, + state: &mut CurrTrackingState, + curr_depth: usize, +) { + match node { + Node::Empty => { + state.counts.empty += 1; + } + Node::Hash(_) => { + state.counts.hash += 1; + state.leaf_and_hash_depth_sum += curr_depth as u64; + state.update_lowest_depth_if_larger(curr_depth); + } + Node::Branch { children, value: _ } => { + state.counts.branch += 1; + + for c in children { + get_trie_stats_rec(c, state, curr_depth + 1); + } + } + Node::Extension { nibbles: _, child } => { + state.counts.extension += 1; + get_trie_stats_rec(child, state, curr_depth + 1); + } + Node::Leaf { + nibbles: _, + value: _, + } => { + state.counts.leaf += 1; + state.leaf_and_hash_depth_sum += curr_depth as u64; + state.update_lowest_depth_if_larger(curr_depth); + } + } +} From 5806e0d43f47015c5d562cee9a03861746f31c8e Mon Sep 17 00:00:00 2001 From: BGluth Date: Mon, 5 Feb 2024 14:05:39 -0700 Subject: [PATCH 03/11] Impled `Display` for all of the stat types --- src/debug_tools/stats.rs | 181 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 171 insertions(+), 10 deletions(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index 073ece8..2ee82b0 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -1,11 +1,37 @@ +use std::fmt::{self, Display}; + use crate::partial_trie::{Node, PartialTrie}; #[derive(Debug, Default)] pub struct TrieStats { + pub name: Option, pub counts: NodeCounts, pub depth_stats: DepthStats, } +impl Display for TrieStats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Trie Stats:")?; + + match self.name.as_ref() { + Some(name) => writeln!(f, " ({})", name)?, + None => writeln!(f)?, + } + + writeln!(f, "Counts: {}", self.counts)?; + writeln!(f, "Depth stats: {}", self.depth_stats) + } +} + +impl TrieStats { + pub fn compare(&self, other: &Self) -> TrieComparison { + TrieComparison { + node_comp: self.counts.compare(&other.counts), + depth_comp: self.depth_stats.compare(&other.depth_stats), + } + } +} + #[derive(Debug, Default)] pub struct NodeCounts { empty: usize, @@ -15,6 +41,16 @@ pub struct NodeCounts { leaf: usize, } +impl Display for NodeCounts { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Empty: {}", self.empty)?; + writeln!(f, "Hash: {}", self.hash)?; + writeln!(f, "Branch: {}", self.branch)?; + writeln!(f, "Extension: {}", self.extension)?; + writeln!(f, "Leaf: {}", self.leaf) + } +} + impl NodeCounts { pub fn total_nodes(&self) -> usize { self.empty + self.total_node_non_empty() @@ -28,18 +64,116 @@ impl NodeCounts { self.hash + self.leaf } - pub fn compare(&self, _other: &Self) -> TrieComparison { - todo!() + pub fn compare(&self, other: &Self) -> NodeComparison { + NodeComparison { + tot_node_rat: RatioStat::new(self.total_nodes(), other.total_nodes()), + non_empty_rat: RatioStat::new( + self.total_node_non_empty(), + other.total_node_non_empty(), + ), + empty_rat: RatioStat::new(self.empty, other.empty), + hash_rat: RatioStat::new(self.hash, other.hash), + branch_rat: RatioStat::new(self.branch, other.branch), + extension_rat: RatioStat::new(self.extension, other.extension), + leaf_rat: RatioStat::new(self.leaf, other.leaf), + } } } -#[derive(Debug, Default)] -pub struct TrieComparison {} +#[derive(Debug)] +pub struct TrieComparison { + node_comp: NodeComparison, + depth_comp: DepthComparison, +} + +impl Display for TrieComparison { + // Pretty debug is pretty good by default If we want something better, we can do + // our own. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Node comparison: {}", self.node_comp)?; + writeln!(f, "Depth comparison: {}", self.depth_comp) + } +} + +// TODO: Consider computing these values lazily? +#[derive(Debug)] +pub struct NodeComparison { + pub tot_node_rat: RatioStat, + pub non_empty_rat: RatioStat, + + pub empty_rat: RatioStat, + pub hash_rat: RatioStat, + pub branch_rat: RatioStat, + pub extension_rat: RatioStat, + pub leaf_rat: RatioStat, +} + +impl Display for NodeComparison { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Total nodes: {}", self.tot_node_rat)?; + writeln!(f, "Non-empty: {}", self.non_empty_rat)?; + + writeln!(f, "Total empty: {}", self.empty_rat)?; + writeln!(f, "Total hash: {}", self.hash_rat)?; + writeln!(f, "Total branch: {}", self.branch_rat)?; + writeln!(f, "Total extension: {}", self.extension_rat)?; + writeln!(f, "Total leaf: {}", self.leaf_rat) + } +} + +#[derive(Debug)] +struct DepthComparison { + a: DepthStats, + b: DepthStats, +} + +impl Display for DepthComparison { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Self::write_depth_stats_and_header(f, &self.a, "a")?; + Self::write_depth_stats_and_header(f, &self.b, "b") + } +} + +impl DepthComparison { + fn write_depth_stats_and_header( + f: &mut fmt::Formatter<'_>, + stats: &DepthStats, + trie_str: &str, + ) -> fmt::Result { + writeln!(f, "Depth stats for {}:", trie_str)?; + stats.fmt(f) + } +} + +#[derive(Debug)] +pub struct RatioStat { + pub a: usize, + pub b: usize, +} + +impl Display for RatioStat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} / {} ({}%)", self.a, self.b, self.get_a_over_b_perc()) + } +} + +impl RatioStat { + /// `new` doesn't do any logic, but this will reduce the line since since + /// this is called so many times. + fn new(a: usize, b: usize) -> Self { + Self { a, b } + } + + fn get_a_over_b_perc(&self) -> f32 { + (self.a as f32 / self.b as f32) * 100.0 + } +} #[derive(Debug, Default)] struct CurrTrackingState { counts: NodeCounts, - leaf_and_hash_depth_sum: u64, + leaf_depth_sum: u64, + hash_depth_sum: u64, lowest_depth: usize, } @@ -52,24 +186,51 @@ impl CurrTrackingState { } /// Depth in terms of node depth (not key length). -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct DepthStats { pub lowest_depth: usize, pub avg_leaf_depth: f32, + pub avg_hash_depth: f32, +} + +impl Display for DepthStats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Lowest depth: {}", self.lowest_depth)?; + writeln!(f, "Average leaf depth: {}", self.avg_leaf_depth)?; + writeln!(f, "Average hash depth: {}", self.avg_hash_depth) + } +} + +impl DepthStats { + fn compare(&self, other: &Self) -> DepthComparison { + DepthComparison { + a: self.clone(), + b: other.clone(), + } + } } pub fn get_trie_stats(trie: &T) -> TrieStats { + get_trie_stats_common(trie, None) +} + +pub fn get_trie_stats_with_name(trie: &T, name: String) -> TrieStats { + get_trie_stats_common(trie, Some(name)) +} + +fn get_trie_stats_common(trie: &T, name: Option) -> TrieStats { let mut state = CurrTrackingState::default(); get_trie_stats_rec(trie, &mut state, 0); let depth_stats = DepthStats { lowest_depth: state.lowest_depth, - avg_leaf_depth: state.leaf_and_hash_depth_sum as f32 - / state.counts.hash_and_leaf_node_count() as f32, + avg_leaf_depth: state.leaf_depth_sum as f32 / state.counts.leaf as f32, + avg_hash_depth: state.hash_depth_sum as f32 / state.counts.hash as f32, }; TrieStats { + name, counts: state.counts, depth_stats, } @@ -86,7 +247,7 @@ fn get_trie_stats_rec( } Node::Hash(_) => { state.counts.hash += 1; - state.leaf_and_hash_depth_sum += curr_depth as u64; + state.hash_depth_sum += curr_depth as u64; state.update_lowest_depth_if_larger(curr_depth); } Node::Branch { children, value: _ } => { @@ -105,7 +266,7 @@ fn get_trie_stats_rec( value: _, } => { state.counts.leaf += 1; - state.leaf_and_hash_depth_sum += curr_depth as u64; + state.leaf_depth_sum += curr_depth as u64; state.update_lowest_depth_if_larger(curr_depth); } } From 1ba127021ba76309ff551d7d529da1fb700e387f Mon Sep 17 00:00:00 2001 From: BGluth Date: Tue, 6 Feb 2024 14:18:36 -0700 Subject: [PATCH 04/11] Impled trie stat tests - Also did some refactoring to reduce code duplication that cut across modules --- src/debug_tools/stats.rs | 78 ++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- src/nibbles.rs | 5 +-- src/testing_utils.rs | 97 ++++++++++++++++++++++++++++++++++------ src/trie_hashing.rs | 35 ++++++++------- src/trie_ops.rs | 26 ++++++----- src/trie_subsets.rs | 37 +++------------ 7 files changed, 206 insertions(+), 74 deletions(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index 2ee82b0..0453b2b 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -271,3 +271,81 @@ fn get_trie_stats_rec( } } } + +#[cfg(test)] +mod tests { + use super::get_trie_stats; + use crate::{ + partial_trie::{HashedPartialTrie, PartialTrie}, + testing_utils::{ + generate_n_random_fixed_trie_hash_entries, generate_n_random_fixed_trie_value_entries, + handmade_trie_1, + }, + }; + + const MASSIVE_TRIE_SIZE: usize = 100_000; + + #[test] + fn hand_made_trie_has_correct_node_stats() { + let (trie, _) = handmade_trie_1(); + let stats = get_trie_stats(&trie); + + assert_eq!(stats.counts.leaf, 4); + assert_eq!(stats.counts.hash, 0); + assert_eq!(stats.counts.branch, 4); + assert_eq!(stats.counts.extension, 2); + assert_eq!(stats.counts.empty, 57); // (n_branch * 4) - n_leaf - + // (n_branch - 1) + } + + // TODO: Low-priority. Finish later. + #[test] + #[ignore] + fn perfectly_balanced_trie_has_correct_node_stats() { + todo!() + } + + #[test] + fn massive_leaf_trie_has_correct_leaf_node_stats() { + let entries = generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE, 9522); + let trie = HashedPartialTrie::from_iter(entries); + + let stats = get_trie_stats(&trie); + + assert_eq!(stats.counts.leaf, MASSIVE_TRIE_SIZE); + assert_eq!(stats.counts.hash, 0); + } + + #[test] + fn massive_hash_trie_has_correct_hash_node_stats() { + let entries = generate_n_random_fixed_trie_hash_entries(MASSIVE_TRIE_SIZE, 9855); + let trie = HashedPartialTrie::from_iter(entries); + + let stats = get_trie_stats(&trie); + + assert_eq!(stats.counts.hash, MASSIVE_TRIE_SIZE); + assert_eq!(stats.counts.leaf, 0); + } + + #[test] + fn massive_mixed_trie_has_correct_hash_node_stats() { + let val_entries = generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE / 2, 1992); + let hash_entries = generate_n_random_fixed_trie_hash_entries(MASSIVE_TRIE_SIZE / 2, 404); + + let mut trie = HashedPartialTrie::default(); + trie.extend(val_entries); + trie.extend(hash_entries); + + let stats = get_trie_stats(&trie); + + assert_eq!(stats.counts.leaf, MASSIVE_TRIE_SIZE / 2); + assert_eq!(stats.counts.hash, MASSIVE_TRIE_SIZE / 2); + } + + // TODO: Low-priority. Finish later. + #[test] + #[ignore] + fn depth_stats_work() { + todo!() + } +} diff --git a/src/lib.rs b/src/lib.rs index 676e19b..aa2df51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,4 +23,4 @@ mod utils; pub mod debug_tools; #[cfg(test)] -mod testing_utils; +pub(crate) mod testing_utils; diff --git a/src/nibbles.rs b/src/nibbles.rs index c4b166c..730d924 100644 --- a/src/nibbles.rs +++ b/src/nibbles.rs @@ -607,9 +607,8 @@ impl Nibbles { let hex_string_raw = hex_encode_f(&byte_buf[(64 - count_bytes)..64]); let hex_char_iter_raw = hex_string_raw.chars(); - - // We need this skip to make both match arms have the same type. - #[allow(clippy::iter_skip_zero)] + // We need this skip to make both match arms have the same type. + #[allow(clippy::iter_skip_zero)] let mut hex_string = String::from("0x"); match is_even(self.count) { false => hex_string.extend(hex_char_iter_raw.skip(1)), diff --git a/src/testing_utils.rs b/src/testing_utils.rs index ef98045..5cdcb0e 100644 --- a/src/testing_utils.rs +++ b/src/testing_utils.rs @@ -5,11 +5,11 @@ use std::{ use ethereum_types::{H256, U256, U512}; use log::info; -use rand::{rngs::StdRng, seq::IteratorRandom, Rng, SeedableRng}; +use rand::{rngs::StdRng, seq::IteratorRandom, Rng, RngCore, SeedableRng}; use crate::{ nibbles::Nibbles, - partial_trie::{Node, PartialTrie}, + partial_trie::{HashedPartialTrie, Node, PartialTrie}, trie_ops::ValOrHash, utils::is_even, }; @@ -20,8 +20,11 @@ use crate::{ /// chances of these collisions occurring. const MIN_BYTES_FOR_VAR_KEY: usize = 5; +pub(crate) type TrieType = HashedPartialTrie; + pub(crate) type TestInsertValEntry = (Nibbles, Vec); pub(crate) type TestInsertHashEntry = (Nibbles, H256); +type TestInsertEntry = (Nibbles, T); // Don't want this exposed publicly, but it is useful for testing. impl From for Nibbles { @@ -70,34 +73,51 @@ where (k.into(), vec![v]) } -pub(crate) fn generate_n_random_fixed_trie_entries( +pub(crate) fn generate_n_random_fixed_trie_value_entries( n: usize, seed: u64, ) -> impl Iterator { - gen_n_random_trie_entries_common(n, seed, gen_fixed_nibbles) + gen_n_random_trie_value_entries_common(n, seed, gen_fixed_nibbles, gen_rand_u256_bytes) } -pub(crate) fn generate_n_random_variable_keys( +pub(crate) fn generate_n_random_fixed_trie_hash_entries( n: usize, seed: u64, -) -> impl Iterator { - gen_n_random_trie_entries_common(n, seed, gen_variable_nibbles) +) -> impl Iterator { + gen_n_random_trie_value_entries_common(n, seed, gen_fixed_nibbles, |_| H256::random()) } -pub(crate) fn generate_n_random_fixed_even_nibble_padded_trie_entries( +pub(crate) fn generate_n_random_variable_trie_value_entries( n: usize, seed: u64, ) -> impl Iterator { - gen_n_random_trie_entries_common(n, seed, gen_variable_nibbles_even_padded_nibbles) + gen_n_random_trie_value_entries_common(n, seed, gen_variable_nibbles, gen_rand_u256_bytes) } -fn gen_n_random_trie_entries_common Nibbles>( +pub(crate) fn generate_n_random_fixed_even_nibble_padded_trie_value_entries( n: usize, seed: u64, - u256_gen_f: F, ) -> impl Iterator { + gen_n_random_trie_value_entries_common( + n, + seed, + gen_variable_nibbles_even_padded_nibbles, + gen_rand_u256_bytes, + ) +} + +fn gen_n_random_trie_value_entries_common< + T, + K: Fn(&mut StdRng) -> Nibbles, + V: Fn(&mut StdRng) -> T, +>( + n: usize, + seed: u64, + key_gen_f: K, + val_gen_f: V, +) -> impl Iterator> { let mut rng = StdRng::seed_from_u64(seed); - (0..n).map(move |i| (u256_gen_f(&mut rng), i.to_be_bytes().to_vec())) + (0..n).map(move |_| (key_gen_f(&mut rng), val_gen_f(&mut rng))) } pub(crate) fn generate_n_hash_nodes_entries_for_empty_slots_in_trie( @@ -117,6 +137,16 @@ pub(crate) fn generate_n_hash_nodes_entries_for_empty_slots_in_trie( + n: usize, + seed: u64, +) -> T { + let mut trie = T::default(); + trie.extend(generate_n_random_variable_trie_value_entries(n, seed)); + + trie +} + fn gen_fixed_nibbles(rng: &mut StdRng) -> Nibbles { let mut k_bytes = [0; 4]; k_bytes[0..3].copy_from_slice(rng.gen::<[u64; 3]>().as_slice()); @@ -144,7 +174,6 @@ fn gen_variable_nibbles(rng: &mut StdRng) -> Nibbles { U256::from_little_endian(&bytes).into() } - // TODO: Replace with `PartialTrie` `iter` methods once done... pub(crate) fn get_non_hash_values_in_trie( trie: &Node, @@ -161,3 +190,45 @@ pub(crate) fn unwrap_iter_item_to_val(item: ValOrHash) -> Vec { ValOrHash::Hash(_) => unreachable!(), } } + +fn gen_rand_u256_bytes(rng: &mut StdRng) -> Vec { + let num_bytes = 256 / 8; + + let mut buf = Vec::with_capacity(num_bytes); + buf.resize(num_bytes, 0); + rng.fill_bytes(&mut buf); + + buf +} + +/// Initializes a trie with keys large enough to force hashing (nodes less than +/// 32 bytes are not hashed). +pub(crate) fn create_trie_with_large_entry_nodes + Copy>(keys: &[T]) -> TrieType { + let mut trie = TrieType::default(); + for (k, v) in keys.iter().map(|k| (*k).into()).map(large_entry) { + trie.insert(k, v.clone()); + } + + trie +} + +pub(crate) fn handmade_trie_1() -> (TrieType, Vec) { + let ks = vec![0x1234, 0x1324, 0x132400005_u64, 0x2001, 0x2002]; + let ks_nibbles: Vec = ks.into_iter().map(|k| k.into()).collect(); + let trie = create_trie_with_large_entry_nodes(&ks_nibbles); + + // Branch (0x) --> 1, 2 + // Branch (0x1) --> 2, 3 + // Leaf (0x1234) --> (n: 0x34, v: [0]) + + // Extension (0x13) --> n: 0x24 + // Branch (0x1324, v: [1]) --> 0 + // Leaf (0x132400005) --> (0x0005, v: [2]) + + // Extension (0x2) --> n: 0x00 + // Branch (0x200) --> 1, 2 + // Leaf (0x2001) --> (n: 0x1, v: [3]) + // Leaf (0x2002) --> (n: 0x2, v: [4]) + + (trie, ks_nibbles) +} diff --git a/src/trie_hashing.rs b/src/trie_hashing.rs index 55acab1..b6dc864 100644 --- a/src/trie_hashing.rs +++ b/src/trie_hashing.rs @@ -108,9 +108,9 @@ mod tests { nibbles::{Nibble, Nibbles}, partial_trie::{HashedPartialTrie, Node, PartialTrie, WrappedNode}, testing_utils::{ - common_setup, entry, generate_n_random_fixed_even_nibble_padded_trie_entries, - generate_n_random_fixed_trie_entries, generate_n_random_variable_keys, large_entry, - TestInsertValEntry, + common_setup, entry, generate_n_random_fixed_even_nibble_padded_trie_value_entries, + generate_n_random_fixed_trie_value_entries, + generate_n_random_variable_trie_value_entries, large_entry, TestInsertValEntry, }, trie_hashing::hash_bytes, }; @@ -306,8 +306,11 @@ mod tests { fn massive_random_data_insert_fixed_keys_hashes_agree_with_eth_trie() { common_setup(); insert_entries_into_our_and_lib_tries_and_assert_equal_hashes( - &generate_n_random_fixed_trie_entries(NUM_INSERTS_FOR_ETH_TRIE_CRATE_MASSIVE_TEST, 0) - .collect::>(), + &generate_n_random_fixed_trie_value_entries( + NUM_INSERTS_FOR_ETH_TRIE_CRATE_MASSIVE_TEST, + 0, + ) + .collect::>(), ); } @@ -315,7 +318,7 @@ mod tests { fn massive_random_data_insert_variable_keys_hashes_agree_with_eth_trie() { common_setup(); insert_entries_into_our_and_lib_tries_and_assert_equal_hashes( - &generate_n_random_fixed_even_nibble_padded_trie_entries( + &generate_n_random_fixed_even_nibble_padded_trie_value_entries( NUM_INSERTS_FOR_ETH_TRIE_CRATE_MASSIVE_TEST, 0, ) @@ -357,7 +360,7 @@ mod tests { fn massive_trie_data_deletion_agrees_with_eth_trie() { common_setup(); - let entries: Vec<_> = generate_n_random_fixed_even_nibble_padded_trie_entries( + let entries: Vec<_> = generate_n_random_fixed_even_nibble_padded_trie_value_entries( NUM_INSERTS_FOR_ETH_TRIE_CRATE_MASSIVE_TEST, 8, ) @@ -412,15 +415,17 @@ mod tests { #[test] fn replacing_part_of_a_trie_with_a_hash_node_produces_same_hash() { let entries = (0..16).flat_map(|i| { - generate_n_random_variable_keys(NODES_PER_BRANCH_FOR_HASH_REPLACEMENT_TEST, i).map( - move |(mut k, v)| { - // Force all keys to be under a given branch at root. - k.truncate_n_nibbles_front_mut(1); - k.push_nibble_front(i as Nibble); - - (k, v) - }, + generate_n_random_variable_trie_value_entries( + NODES_PER_BRANCH_FOR_HASH_REPLACEMENT_TEST, + i, ) + .map(move |(mut k, v)| { + // Force all keys to be under a given branch at root. + k.truncate_n_nibbles_front_mut(1); + k.push_nibble_front(i as Nibble); + + (k, v) + }) }); let mut trie = HashedPartialTrie::from_iter(entries); diff --git a/src/trie_ops.rs b/src/trie_ops.rs index 24f78a0..7883e47 100644 --- a/src/trie_ops.rs +++ b/src/trie_ops.rs @@ -778,8 +778,9 @@ mod tests { testing_utils::{ common_setup, entry, entry_with_value, generate_n_hash_nodes_entries_for_empty_slots_in_trie, - generate_n_random_fixed_trie_entries, generate_n_random_variable_keys, - get_non_hash_values_in_trie, unwrap_iter_item_to_val, TestInsertValEntry, + generate_n_random_fixed_trie_value_entries, + generate_n_random_variable_trie_value_entries, get_non_hash_values_in_trie, + unwrap_iter_item_to_val, TestInsertValEntry, }, utils::create_mask_of_1s, }; @@ -877,7 +878,8 @@ mod tests { #[test] fn mass_inserts_fixed_sized_keys_all_entries_are_retrievable() { common_setup(); - let entries: Vec<_> = generate_n_random_fixed_trie_entries(MASSIVE_TRIE_SIZE, 0).collect(); + let entries: Vec<_> = + generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE, 0).collect(); insert_entries_and_assert_all_exist_in_trie_with_no_extra(&entries); } @@ -885,7 +887,8 @@ mod tests { #[test] fn mass_inserts_variable_sized_keys_all_entries_are_retrievable() { common_setup(); - let entries: Vec<_> = generate_n_random_variable_keys(MASSIVE_TRIE_SIZE, 0).collect(); + let entries: Vec<_> = + generate_n_random_variable_trie_value_entries(MASSIVE_TRIE_SIZE, 0).collect(); insert_entries_and_assert_all_exist_in_trie_with_no_extra(&entries); } @@ -894,7 +897,7 @@ mod tests { fn mass_inserts_variable_sized_keys_with_hash_nodes_all_entries_are_retrievable() { common_setup(); let non_hash_entries: Vec<_> = - generate_n_random_variable_keys(MASSIVE_TRIE_SIZE, 0).collect(); + generate_n_random_variable_trie_value_entries(MASSIVE_TRIE_SIZE, 0).collect(); let mut trie = StandardTrie::from_iter(non_hash_entries.iter().cloned()); let extra_hash_entries = generate_n_hash_nodes_entries_for_empty_slots_in_trie( @@ -925,11 +928,11 @@ mod tests { StandardTrie::new(Node::Empty) ); - let entries = generate_n_random_fixed_trie_entries(MASSIVE_TRIE_SIZE, 0); + let entries = generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE, 0); let big_trie_1 = StandardTrie::from_iter(entries); assert_eq!(big_trie_1, big_trie_1); - let entries = generate_n_random_fixed_trie_entries(MASSIVE_TRIE_SIZE, 1); + let entries = generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE, 1); let big_trie_2 = StandardTrie::from_iter(entries); assert_ne!(big_trie_1, big_trie_2) @@ -951,7 +954,7 @@ mod tests { common_setup(); let random_entries: Vec<_> = - generate_n_random_fixed_trie_entries(MASSIVE_TRIE_SIZE, 9001).collect(); + generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE, 9001).collect(); let trie = StandardTrie::from_iter(random_entries.iter().cloned()); for (k, v) in random_entries.into_iter() { @@ -966,7 +969,7 @@ mod tests { fn held_trie_cow_references_do_not_change_as_trie_changes() { common_setup(); - let entries = generate_n_random_variable_keys(COW_TEST_TRIE_SIZE, 9002); + let entries = generate_n_random_variable_trie_value_entries(COW_TEST_TRIE_SIZE, 9002); let mut all_nodes_in_trie_after_each_insert = Vec::new(); let mut root_node_after_each_insert = Vec::new(); @@ -993,7 +996,7 @@ mod tests { common_setup(); let entries: HashSet<_> = - generate_n_random_variable_keys(MASSIVE_TRIE_SIZE, 9003).collect(); + generate_n_random_variable_trie_value_entries(MASSIVE_TRIE_SIZE, 9003).collect(); let trie = StandardTrie::from_iter(entries.iter().cloned()); let trie_items: HashSet<_> = trie @@ -1027,7 +1030,8 @@ mod tests { fn deletion_massive_trie() { common_setup(); - let entries: Vec<_> = generate_n_random_variable_keys(MASSIVE_TRIE_SIZE, 7).collect(); + let entries: Vec<_> = + generate_n_random_variable_trie_value_entries(MASSIVE_TRIE_SIZE, 7).collect(); let mut trie = StandardTrie::from_iter(entries.iter().cloned()); // Delete half of the elements diff --git a/src/trie_subsets.rs b/src/trie_subsets.rs index be9aae4..aa5f805 100644 --- a/src/trie_subsets.rs +++ b/src/trie_subsets.rs @@ -351,13 +351,14 @@ mod tests { use crate::{ nibbles::Nibbles, partial_trie::{HashedPartialTrie, Node, PartialTrie}, - testing_utils::{generate_n_random_fixed_trie_entries, large_entry}, + testing_utils::{ + create_trie_with_large_entry_nodes, generate_n_random_fixed_trie_value_entries, + handmade_trie_1, TrieType, + }, trie_ops::ValOrHash, utils::TrieNodeType, }; - type TrieType = HashedPartialTrie; - const MASSIVE_TEST_NUM_SUB_TRIES: usize = 10; const MASSIVE_TEST_NUM_SUB_TRIE_SIZE: usize = 5000; @@ -472,35 +473,9 @@ mod tests { assert!(!leaf_keys.contains(&Nibbles::from(0x12345))); } - // Initializes a trie with keys large enough to force hashing (nodes less than - // 32 bytes are not hashed). - fn create_trie_with_large_entry_nodes + Copy>(keys: &[T]) -> TrieType { - let mut trie = TrieType::default(); - for (k, v) in keys.iter().map(|k| (*k).into()).map(large_entry) { - trie.insert(k, v.clone()); - } - - trie - } - #[test] fn intermediate_nodes_are_included_in_subset() { - let ks = vec![0x1234, 0x1324, 0x132400005_u64, 0x2001, 0x2002]; - let trie = create_trie_with_large_entry_nodes(&ks); - - // Branch (0x) --> 1, 2 - // Branch (0x1) --> 2, 3 - // Leaf (0x1234) --> (n: 0x34, v: [0]) - - // Branch (0x1324, v: [1]) --> 0 - // Leaf (0x132400005) --> (0x0005, v: [2]) - - // Extension (0x2) --> n: 0x00 - // Branch (0x200) --> 1, 2 - // Leaf (0x2001) --> (n: 0x1, v: [3]) - // Leaf (0x2002) --> (n: 0x2, v: [4]) - - let ks_nibbles: Vec = ks.into_iter().map(|k| k.into()).collect(); + let (trie, ks_nibbles) = handmade_trie_1(); let trie_subset_all = create_trie_subset(&trie, ks_nibbles.iter().cloned()).unwrap(); let subset_keys = get_all_nibbles_of_leaf_nodes_in_trie(&trie_subset_all); @@ -648,7 +623,7 @@ mod tests { let trie_size = MASSIVE_TEST_NUM_SUB_TRIES * MASSIVE_TEST_NUM_SUB_TRIE_SIZE; let random_entries: Vec<_> = - generate_n_random_fixed_trie_entries(trie_size, seed).collect(); + generate_n_random_fixed_trie_value_entries(trie_size, seed).collect(); let entry_keys: Vec<_> = random_entries.iter().map(|(k, _)| k).cloned().collect(); let trie = TrieType::from_iter(random_entries); From fd9a8cb6f8060cc8ab9d3c171f0f5162358985f7 Mon Sep 17 00:00:00 2001 From: BGluth Date: Tue, 6 Feb 2024 14:43:48 -0700 Subject: [PATCH 05/11] Cleanup and documentation --- src/debug_tools/stats.rs | 36 ++++++++++++++++++++++++------------ src/testing_utils.rs | 10 ---------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index 0453b2b..73d108d 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -1,3 +1,8 @@ +//! Simple stat tooling to extract stats from tries. +//! +//! This is particularly useful when comparing a "base" trie against a sub-trie +//! created from it. + use std::fmt::{self, Display}; use crate::partial_trie::{Node, PartialTrie}; @@ -32,13 +37,14 @@ impl TrieStats { } } +/// Total node counts for a trie. #[derive(Debug, Default)] pub struct NodeCounts { - empty: usize, - hash: usize, - branch: usize, - extension: usize, - leaf: usize, + pub empty: usize, + pub hash: usize, + pub branch: usize, + pub extension: usize, + pub leaf: usize, } impl Display for NodeCounts { @@ -80,10 +86,11 @@ impl NodeCounts { } } +/// Information on the comparison between two tries. #[derive(Debug)] pub struct TrieComparison { - node_comp: NodeComparison, - depth_comp: DepthComparison, + pub node_comp: NodeComparison, + pub depth_comp: DepthComparison, } impl Display for TrieComparison { @@ -122,9 +129,9 @@ impl Display for NodeComparison { } #[derive(Debug)] -struct DepthComparison { - a: DepthStats, - b: DepthStats, +pub struct DepthComparison { + pub a: DepthStats, + pub b: DepthStats, } impl Display for DepthComparison { @@ -145,6 +152,7 @@ impl DepthComparison { } } +/// Type to hold (and compare) a given variable from two different tries.s #[derive(Debug)] pub struct RatioStat { pub a: usize, @@ -158,8 +166,8 @@ impl Display for RatioStat { } impl RatioStat { - /// `new` doesn't do any logic, but this will reduce the line since since - /// this is called so many times. + /// `new` doesn't do any logic, but this will reduce a lot of line lengths + /// since this is called so many times. fn new(a: usize, b: usize) -> Self { Self { a, b } } @@ -169,9 +177,13 @@ impl RatioStat { } } +/// "Raw" state that is mutated as we traverse down the trie. Is processed into +/// a more useful format later on. #[derive(Debug, Default)] struct CurrTrackingState { counts: NodeCounts, + + // The "*_sum" variables are just accumulators that we process later to get average depths. leaf_depth_sum: u64, hash_depth_sum: u64, lowest_depth: usize, diff --git a/src/testing_utils.rs b/src/testing_utils.rs index 5cdcb0e..85d227e 100644 --- a/src/testing_utils.rs +++ b/src/testing_utils.rs @@ -137,16 +137,6 @@ pub(crate) fn generate_n_hash_nodes_entries_for_empty_slots_in_trie( - n: usize, - seed: u64, -) -> T { - let mut trie = T::default(); - trie.extend(generate_n_random_variable_trie_value_entries(n, seed)); - - trie -} - fn gen_fixed_nibbles(rng: &mut StdRng) -> Nibbles { let mut k_bytes = [0; 4]; k_bytes[0..3].copy_from_slice(rng.gen::<[u64; 3]>().as_slice()); From fb3a77937423951be3793375facba8f265696e8d Mon Sep 17 00:00:00 2001 From: BGluth Date: Tue, 6 Feb 2024 16:09:09 -0700 Subject: [PATCH 06/11] Changed `DepthComparason` - Previously had a lot of redundant info that was also present in `TrieStats`. Now compares the depth values from both tries directly. --- Cargo.toml | 3 +- src/debug_tools/stats.rs | 100 ++++++++++++++++++++++----------------- 2 files changed, 59 insertions(+), 44 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d86f964..ece5d4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ keccak-hash = "0.10.0" parking_lot = { version = "0.12.1", features = ["serde"] } thiserror = "1.0.40" log = "0.4.17" +num = { version = "0.4.1", optional = true } num-traits = "0.2.15" uint = "0.9.5" rlp = "0.5.2" @@ -39,7 +40,7 @@ serde_json = "1.0.96" [features] default = ["trie_debug"] -trie_debug = [] +trie_debug = ["num"] [lib] doc-scrape-examples = true diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index 73d108d..65dc2a1 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -5,6 +5,8 @@ use std::fmt::{self, Display}; +use num_traits::ToPrimitive; + use crate::partial_trie::{Node, PartialTrie}; #[derive(Debug, Default)] @@ -23,8 +25,8 @@ impl Display for TrieStats { None => writeln!(f)?, } - writeln!(f, "Counts: {}", self.counts)?; - writeln!(f, "Depth stats: {}", self.depth_stats) + writeln!(f, "Counts:\n{}", self.counts)?; + writeln!(f, "Depth stats:\n{}", self.depth_stats) } } @@ -49,11 +51,25 @@ pub struct NodeCounts { impl Display for NodeCounts { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "Empty: {}", self.empty)?; - writeln!(f, "Hash: {}", self.hash)?; - writeln!(f, "Branch: {}", self.branch)?; - writeln!(f, "Extension: {}", self.extension)?; - writeln!(f, "Leaf: {}", self.leaf) + let tot_nodes = self.total_nodes(); + + Self::write_node_count_stats(f, "Empty", self.empty, tot_nodes)?; + Self::write_node_count_stats(f, "Hash", self.hash, tot_nodes)?; + Self::write_node_count_stats(f, "Branch", self.branch, tot_nodes)?; + Self::write_node_count_stats(f, "Extension", self.extension, tot_nodes)?; + Self::write_node_count_stats(f, "Leaf", self.leaf, tot_nodes) + } +} + +impl NodeCounts { + fn write_node_count_stats( + f: &mut fmt::Formatter<'_>, + node_t_name: &str, + count: usize, + tot_count: usize, + ) -> fmt::Result { + let perc = (count as f32 / tot_count as f32) * 100.0; + writeln!(f, "{}: {} ({:.3}%)", node_t_name, count, perc) } } @@ -105,14 +121,14 @@ impl Display for TrieComparison { // TODO: Consider computing these values lazily? #[derive(Debug)] pub struct NodeComparison { - pub tot_node_rat: RatioStat, - pub non_empty_rat: RatioStat, - - pub empty_rat: RatioStat, - pub hash_rat: RatioStat, - pub branch_rat: RatioStat, - pub extension_rat: RatioStat, - pub leaf_rat: RatioStat, + pub tot_node_rat: RatioStat, + pub non_empty_rat: RatioStat, + + pub empty_rat: RatioStat, + pub hash_rat: RatioStat, + pub branch_rat: RatioStat, + pub extension_rat: RatioStat, + pub leaf_rat: RatioStat, } impl Display for NodeComparison { @@ -130,50 +146,47 @@ impl Display for NodeComparison { #[derive(Debug)] pub struct DepthComparison { - pub a: DepthStats, - pub b: DepthStats, + pub lowest_depth_rat: RatioStat, + pub avg_leaf_depth_rat: RatioStat, + pub avg_hash_depth_rat: RatioStat, } impl Display for DepthComparison { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - Self::write_depth_stats_and_header(f, &self.a, "a")?; - Self::write_depth_stats_and_header(f, &self.b, "b") - } -} - -impl DepthComparison { - fn write_depth_stats_and_header( - f: &mut fmt::Formatter<'_>, - stats: &DepthStats, - trie_str: &str, - ) -> fmt::Result { - writeln!(f, "Depth stats for {}:", trie_str)?; - stats.fmt(f) + writeln!(f, "Lowest depth: {}", self.lowest_depth_rat)?; + writeln!(f, "Avg leaf depth: {}", self.avg_leaf_depth_rat)?; + writeln!(f, "Avg hah depth: {}", self.avg_hash_depth_rat) } } /// Type to hold (and compare) a given variable from two different tries.s #[derive(Debug)] -pub struct RatioStat { - pub a: usize, - pub b: usize, +pub struct RatioStat { + pub a: T, + pub b: T, } -impl Display for RatioStat { +impl Display for RatioStat { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} / {} ({}%)", self.a, self.b, self.get_a_over_b_perc()) + write!( + f, + "{:.3} / {:.3} ({:.3}%)", + self.a, + self.b, + self.get_a_over_b_perc() + ) } } -impl RatioStat { +impl RatioStat { /// `new` doesn't do any logic, but this will reduce a lot of line lengths /// since this is called so many times. - fn new(a: usize, b: usize) -> Self { + fn new(a: T, b: T) -> Self { Self { a, b } } fn get_a_over_b_perc(&self) -> f32 { - (self.a as f32 / self.b as f32) * 100.0 + (self.a.to_f32().unwrap() / self.b.to_f32().unwrap()) * 100.0 } } @@ -191,7 +204,7 @@ struct CurrTrackingState { impl CurrTrackingState { fn update_lowest_depth_if_larger(&mut self, curr_depth: usize) { - if self.lowest_depth > curr_depth { + if self.lowest_depth < curr_depth { self.lowest_depth = curr_depth; } } @@ -208,16 +221,17 @@ pub struct DepthStats { impl Display for DepthStats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Lowest depth: {}", self.lowest_depth)?; - writeln!(f, "Average leaf depth: {}", self.avg_leaf_depth)?; - writeln!(f, "Average hash depth: {}", self.avg_hash_depth) + writeln!(f, "Average leaf depth: {:.3}", self.avg_leaf_depth)?; + writeln!(f, "Average hash depth: {:.3}", self.avg_hash_depth) } } impl DepthStats { fn compare(&self, other: &Self) -> DepthComparison { DepthComparison { - a: self.clone(), - b: other.clone(), + lowest_depth_rat: RatioStat::new(self.lowest_depth, other.lowest_depth), + avg_leaf_depth_rat: RatioStat::new(self.avg_leaf_depth, other.avg_leaf_depth), + avg_hash_depth_rat: RatioStat::new(self.avg_hash_depth, other.avg_hash_depth), } } } From d0b1013cef224ed03c5c062ca4afb045a2bc1ccd Mon Sep 17 00:00:00 2001 From: BGluth Date: Tue, 6 Feb 2024 16:37:17 -0700 Subject: [PATCH 07/11] Doc cleanup --- src/debug_tools/stats.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index 65dc2a1..fbeb336 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -1,6 +1,6 @@ -//! Simple stat tooling to extract stats from tries. +//! Simple tooling to extract stats from tries. //! -//! This is particularly useful when comparing a "base" trie against a sub-trie +//! This is particularly useful when comparing a "base" trie against a sub-trie (hashed out trie) //! created from it. use std::fmt::{self, Display}; @@ -110,8 +110,6 @@ pub struct TrieComparison { } impl Display for TrieComparison { - // Pretty debug is pretty good by default If we want something better, we can do - // our own. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Node comparison: {}", self.node_comp)?; writeln!(f, "Depth comparison: {}", self.depth_comp) @@ -155,7 +153,7 @@ impl Display for DepthComparison { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Lowest depth: {}", self.lowest_depth_rat)?; writeln!(f, "Avg leaf depth: {}", self.avg_leaf_depth_rat)?; - writeln!(f, "Avg hah depth: {}", self.avg_hash_depth_rat) + writeln!(f, "Avg hash depth: {}", self.avg_hash_depth_rat) } } @@ -179,7 +177,7 @@ impl Display for RatioStat { } impl RatioStat { - /// `new` doesn't do any logic, but this will reduce a lot of line lengths + /// `new` doesn't have any logic, but this will reduce a lot of line lengths /// since this is called so many times. fn new(a: T, b: T) -> Self { Self { a, b } From 2b72c7fab17eb7bfd820c13a4ec0e7720bb2fb5d Mon Sep 17 00:00:00 2001 From: BGluth Date: Tue, 6 Feb 2024 16:37:51 -0700 Subject: [PATCH 08/11] Reduced code duplication in the stat tests --- src/debug_tools/stats.rs | 45 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index fbeb336..82996c1 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -1,7 +1,7 @@ //! Simple tooling to extract stats from tries. //! -//! This is particularly useful when comparing a "base" trie against a sub-trie (hashed out trie) -//! created from it. +//! This is particularly useful when comparing a "base" trie against a sub-trie +//! (hashed out trie) created from it. use std::fmt::{self, Display}; @@ -318,8 +318,9 @@ mod tests { assert_eq!(stats.counts.hash, 0); assert_eq!(stats.counts.branch, 4); assert_eq!(stats.counts.extension, 2); - assert_eq!(stats.counts.empty, 57); // (n_branch * 4) - n_leaf - - // (n_branch - 1) + + // empty = (n_branch * 4) - n_leaf - (n_branch - 1) + assert_eq!(stats.counts.empty, 57); } // TODO: Low-priority. Finish later. @@ -331,30 +332,30 @@ mod tests { #[test] fn massive_leaf_trie_has_correct_leaf_node_stats() { - let entries = generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE, 9522); - let trie = HashedPartialTrie::from_iter(entries); - - let stats = get_trie_stats(&trie); - - assert_eq!(stats.counts.leaf, MASSIVE_TRIE_SIZE); - assert_eq!(stats.counts.hash, 0); + create_trie_and_stats_from_entries_and_assert(MASSIVE_TRIE_SIZE, 0, 9522); } #[test] fn massive_hash_trie_has_correct_hash_node_stats() { - let entries = generate_n_random_fixed_trie_hash_entries(MASSIVE_TRIE_SIZE, 9855); - let trie = HashedPartialTrie::from_iter(entries); - - let stats = get_trie_stats(&trie); - - assert_eq!(stats.counts.hash, MASSIVE_TRIE_SIZE); - assert_eq!(stats.counts.leaf, 0); + create_trie_and_stats_from_entries_and_assert(0, MASSIVE_TRIE_SIZE, 9855); } #[test] fn massive_mixed_trie_has_correct_hash_node_stats() { - let val_entries = generate_n_random_fixed_trie_value_entries(MASSIVE_TRIE_SIZE / 2, 1992); - let hash_entries = generate_n_random_fixed_trie_hash_entries(MASSIVE_TRIE_SIZE / 2, 404); + create_trie_and_stats_from_entries_and_assert( + MASSIVE_TRIE_SIZE / 2, + MASSIVE_TRIE_SIZE / 2, + 1992, + ); + } + + fn create_trie_and_stats_from_entries_and_assert( + n_leaf_nodes: usize, + n_hash_nodes: usize, + seed: u64, + ) { + let val_entries = generate_n_random_fixed_trie_value_entries(n_leaf_nodes, seed); + let hash_entries = generate_n_random_fixed_trie_hash_entries(n_hash_nodes, seed + 1); let mut trie = HashedPartialTrie::default(); trie.extend(val_entries); @@ -362,8 +363,8 @@ mod tests { let stats = get_trie_stats(&trie); - assert_eq!(stats.counts.leaf, MASSIVE_TRIE_SIZE / 2); - assert_eq!(stats.counts.hash, MASSIVE_TRIE_SIZE / 2); + assert_eq!(stats.counts.leaf, n_leaf_nodes); + assert_eq!(stats.counts.hash, n_hash_nodes); } // TODO: Low-priority. Finish later. From 432f85038c76ebae6538a4a2d41c822a2bdd24be Mon Sep 17 00:00:00 2001 From: BGluth Date: Tue, 6 Feb 2024 17:13:44 -0700 Subject: [PATCH 09/11] Fixed clippy lint --- src/testing_utils.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/testing_utils.rs b/src/testing_utils.rs index 85d227e..51b40e5 100644 --- a/src/testing_utils.rs +++ b/src/testing_utils.rs @@ -184,8 +184,7 @@ pub(crate) fn unwrap_iter_item_to_val(item: ValOrHash) -> Vec { fn gen_rand_u256_bytes(rng: &mut StdRng) -> Vec { let num_bytes = 256 / 8; - let mut buf = Vec::with_capacity(num_bytes); - buf.resize(num_bytes, 0); + let mut buf = vec![0; num_bytes]; rng.fill_bytes(&mut buf); buf From 169109ceac5905ddf3938d1fe9fc5c7165cab070 Mon Sep 17 00:00:00 2001 From: BGluth Date: Wed, 7 Feb 2024 14:45:12 -0700 Subject: [PATCH 10/11] Apply suggestions from code review Co-authored-by: Robin Salen <30937548+Nashtare@users.noreply.github.com> --- src/debug_tools/stats.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index 82996c1..c9ca93b 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -69,7 +69,7 @@ impl NodeCounts { tot_count: usize, ) -> fmt::Result { let perc = (count as f32 / tot_count as f32) * 100.0; - writeln!(f, "{}: {} ({:.3}%)", node_t_name, count, perc) + writeln!(f, "{}: {} ({:.2}%)", node_t_name, count, perc) } } From 2657f48354376662d851a185988f238d4f7b78c6 Mon Sep 17 00:00:00 2001 From: BGluth Date: Wed, 7 Feb 2024 14:51:43 -0700 Subject: [PATCH 11/11] Requested PR changes for #8 --- src/debug_tools/stats.rs | 82 +++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/src/debug_tools/stats.rs b/src/debug_tools/stats.rs index c9ca93b..38429e6 100644 --- a/src/debug_tools/stats.rs +++ b/src/debug_tools/stats.rs @@ -11,9 +11,9 @@ use crate::partial_trie::{Node, PartialTrie}; #[derive(Debug, Default)] pub struct TrieStats { - pub name: Option, - pub counts: NodeCounts, - pub depth_stats: DepthStats, + name: Option, + counts: NodeCounts, + depth_stats: DepthStats, } impl Display for TrieStats { @@ -41,12 +41,12 @@ impl TrieStats { /// Total node counts for a trie. #[derive(Debug, Default)] -pub struct NodeCounts { - pub empty: usize, - pub hash: usize, - pub branch: usize, - pub extension: usize, - pub leaf: usize, +struct NodeCounts { + empty: usize, + hash: usize, + branch: usize, + extension: usize, + leaf: usize, } impl Display for NodeCounts { @@ -74,19 +74,19 @@ impl NodeCounts { } impl NodeCounts { - pub fn total_nodes(&self) -> usize { + fn total_nodes(&self) -> usize { self.empty + self.total_node_non_empty() } - pub fn total_node_non_empty(&self) -> usize { + fn total_node_non_empty(&self) -> usize { self.branch + self.extension + self.hash_and_leaf_node_count() } - pub fn hash_and_leaf_node_count(&self) -> usize { + fn hash_and_leaf_node_count(&self) -> usize { self.hash + self.leaf } - pub fn compare(&self, other: &Self) -> NodeComparison { + fn compare(&self, other: &Self) -> NodeComparison { NodeComparison { tot_node_rat: RatioStat::new(self.total_nodes(), other.total_nodes()), non_empty_rat: RatioStat::new( @@ -105,8 +105,8 @@ impl NodeCounts { /// Information on the comparison between two tries. #[derive(Debug)] pub struct TrieComparison { - pub node_comp: NodeComparison, - pub depth_comp: DepthComparison, + node_comp: NodeComparison, + depth_comp: DepthComparison, } impl Display for TrieComparison { @@ -118,15 +118,15 @@ impl Display for TrieComparison { // TODO: Consider computing these values lazily? #[derive(Debug)] -pub struct NodeComparison { - pub tot_node_rat: RatioStat, - pub non_empty_rat: RatioStat, - - pub empty_rat: RatioStat, - pub hash_rat: RatioStat, - pub branch_rat: RatioStat, - pub extension_rat: RatioStat, - pub leaf_rat: RatioStat, +struct NodeComparison { + tot_node_rat: RatioStat, + non_empty_rat: RatioStat, + + empty_rat: RatioStat, + hash_rat: RatioStat, + branch_rat: RatioStat, + extension_rat: RatioStat, + leaf_rat: RatioStat, } impl Display for NodeComparison { @@ -143,10 +143,10 @@ impl Display for NodeComparison { } #[derive(Debug)] -pub struct DepthComparison { - pub lowest_depth_rat: RatioStat, - pub avg_leaf_depth_rat: RatioStat, - pub avg_hash_depth_rat: RatioStat, +struct DepthComparison { + lowest_depth_rat: RatioStat, + avg_leaf_depth_rat: RatioStat, + avg_hash_depth_rat: RatioStat, } impl Display for DepthComparison { @@ -159,9 +159,9 @@ impl Display for DepthComparison { /// Type to hold (and compare) a given variable from two different tries.s #[derive(Debug)] -pub struct RatioStat { - pub a: T, - pub b: T, +struct RatioStat { + a: T, + b: T, } impl Display for RatioStat { @@ -171,7 +171,9 @@ impl Display for RatioStat { "{:.3} / {:.3} ({:.3}%)", self.a, self.b, - self.get_a_over_b_perc() + // Note that the `Error` type for `fmt` does not hold any extra information and can + // only indicate that something went wrong. + self.get_a_over_b_perc().map_err(|_| fmt::Error)? ) } } @@ -183,8 +185,12 @@ impl RatioStat { Self { a, b } } - fn get_a_over_b_perc(&self) -> f32 { - (self.a.to_f32().unwrap() / self.b.to_f32().unwrap()) * 100.0 + fn get_a_over_b_perc(&self) -> Result { + Ok((Self::try_to_f32(&self.a)? / Self::try_to_f32(&self.b)?) * 100.0) + } + + fn try_to_f32(v: &T) -> Result { + v.to_f32().ok_or(()) } } @@ -210,10 +216,10 @@ impl CurrTrackingState { /// Depth in terms of node depth (not key length). #[derive(Clone, Debug, Default)] -pub struct DepthStats { - pub lowest_depth: usize, - pub avg_leaf_depth: f32, - pub avg_hash_depth: f32, +struct DepthStats { + lowest_depth: usize, + avg_leaf_depth: f32, + avg_hash_depth: f32, } impl Display for DepthStats {