From d419edbda0e4013415501aadd1197190c6904cbb Mon Sep 17 00:00:00 2001 From: Caleb Owens Date: Thu, 19 Dec 2024 15:15:23 +0000 Subject: [PATCH 1/4] Introduce commit subset check --- .../src/commit_ops.rs | 238 ++++++++++++++++++ crates/gitbutler-branch-actions/src/lib.rs | 1 + crates/gitbutler-oxidize/src/lib.rs | 33 +++ .../src/testing_repository.rs | 5 + 4 files changed, 277 insertions(+) create mode 100644 crates/gitbutler-branch-actions/src/commit_ops.rs diff --git a/crates/gitbutler-branch-actions/src/commit_ops.rs b/crates/gitbutler-branch-actions/src/commit_ops.rs new file mode 100644 index 0000000000..6851f9acc9 --- /dev/null +++ b/crates/gitbutler-branch-actions/src/commit_ops.rs @@ -0,0 +1,238 @@ +use anyhow::{bail, Result}; +use gitbutler_oxidize::GixRepositoryExt; + +/// Finds the first parent of a given commit +fn get_first_parent(commit: gix::Commit) -> Result { + let first_parent = commit.parent_ids().take(1).collect::>(); + let Some(first_parent) = first_parent.first() else { + bail!("Failed to find first parent of {}", commit.id()) + }; + let first_parent = first_parent.object()?.into_commit(); + Ok(first_parent) +} + +/// Gets the changes that one commit introduced compared to the base, +/// excluding anything between the commit and the base +fn get_exclusive_tree( + repository: &gix::Repository, + commit_id: gix::ObjectId, + base_id: gix::ObjectId, +) -> Result { + let commit = repository.find_commit(commit_id)?; + let commit_parent = get_first_parent(commit.clone())?; + let base = repository.find_commit(base_id)?; + + repository + .merge_trees( + commit_parent.tree_id()?, + commit.tree_id()?, + base.tree_id()?, + Default::default(), + repository.merge_options_force_ours()?, + )? + .tree + .write() + .map_err(Into::into) + .map(Into::into) +} + +/// Takes two commits and determines if one is a subset of or equal to the other +#[allow(dead_code)] +fn is_subset( + repository: &gix::Repository, + superset_id: gix::ObjectId, + subset_id: gix::ObjectId, + common_base_id: gix::ObjectId, +) -> Result { + let exclusive_superset = get_exclusive_tree(repository, superset_id, common_base_id)?; + let exclusive_subset = get_exclusive_tree(repository, subset_id, common_base_id)?; + + let common_base = repository.find_commit(common_base_id)?; + + let (options, _) = repository.merge_options_fail_fast()?; + let mut merged_exclusives = repository.merge_trees( + common_base.tree_id()?, + exclusive_superset, + exclusive_subset, + Default::default(), + options, + )?; + + if merged_exclusives.conflicts.is_empty() { + Ok(exclusive_superset == merged_exclusives.tree.write()?) + } else { + Ok(false) + } +} + +#[cfg(test)] +mod test { + use gitbutler_testsupport::testing_repository::TestingRepository; + mod get_exclusive_tree { + use gitbutler_oxidize::OidExt; + + use super::super::get_exclusive_tree; + use super::*; + + #[test] + fn when_already_exclusive_returns_self() { + let test_repository = TestingRepository::open(); + let base_commit: git2::Commit = + test_repository.commit_tree(None, &[("foo.txt", "foo"), ("bar.txt", "bar")]); + let second_commit: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "bar"), ("bar.txt", "baz")], + ); + + let exclusive_tree = get_exclusive_tree( + &test_repository.gix_repository(), + second_commit.id().to_gix(), + base_commit.id().to_gix(), + ) + .unwrap(); + + assert_eq!( + second_commit.tree_id().to_gix(), + exclusive_tree, + "The tree returned should match the second commit" + ) + } + + #[test] + fn when_on_top_of_other_commit_its_changes_are_dropped() { + let test_repository = TestingRepository::open(); + let base_commit: git2::Commit = + test_repository.commit_tree(None, &[("foo.txt", "foo")]); + let second_commit: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "bar"), ("bar.txt", "baz")], + ); + let third_commit: git2::Commit = test_repository.commit_tree( + Some(&second_commit), + &[("foo.txt", "bar"), ("bar.txt", "baz"), ("qux.txt", "bax")], + ); + + // The second commit changed foo.txt, and added bar.txt. We expect + // foo.txt to be reverted back to foo, and bar.txt should be dropped + let expected_commit: git2::Commit = + test_repository.commit_tree(None, &[("foo.txt", "foo"), ("qux.txt", "bax")]); + + let exclusive_tree = get_exclusive_tree( + &test_repository.gix_repository(), + third_commit.id().to_gix(), + base_commit.id().to_gix(), + ) + .unwrap(); + + assert_eq!(expected_commit.tree_id().to_gix(), exclusive_tree,) + } + } + + mod is_subset { + use gitbutler_oxidize::OidExt; + + use super::super::is_subset; + use super::*; + + #[test] + fn a_commit_is_a_subset_of_itself() { + let test_repository = TestingRepository::open(); + let base_commit: git2::Commit = + test_repository.commit_tree(None, &[("foo.txt", "foo")]); + let second_commit: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "bar"), ("bar.txt", "baz")], + ); + + assert!(is_subset( + &test_repository.gix_repository(), + second_commit.id().to_gix(), + second_commit.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap()) + } + + #[test] + fn basic_subset() { + let test_repository = TestingRepository::open(); + let base_commit: git2::Commit = + test_repository.commit_tree(None, &[("foo.txt", "foo")]); + let superset: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "bar"), ("bar.txt", "baz"), ("baz.txt", "asdf")], + ); + let subset: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "bar"), ("bar.txt", "baz")], + ); + + assert!(is_subset( + &test_repository.gix_repository(), + superset.id().to_gix(), + subset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap()); + + assert!(!is_subset( + &test_repository.gix_repository(), + subset.id().to_gix(), + superset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap()); + } + + #[test] + fn complex_subset() { + let test_repository = TestingRepository::open(); + let base_commit: git2::Commit = + test_repository.commit_tree(None, &[("foo.txt", "foo")]); + let i1: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "baz"), ("amp.txt", "asfd")], + ); + let superset: git2::Commit = test_repository.commit_tree( + Some(&i1), + &[ + ("foo.txt", "baz"), + ("amp.txt", "asfd"), + ("bar.txt", "baz"), + ("baz.txt", "asdf"), + ], + ); + let i2: git2::Commit = test_repository.commit_tree( + Some(&base_commit), + &[("foo.txt", "xxx"), ("fuzz.txt", "asdf")], + ); + let subset: git2::Commit = test_repository.commit_tree( + Some(&i2), + &[("foo.txt", "xxx"), ("fuzz.txt", "asdf"), ("bar.txt", "baz")], + ); + + // This creates two commits "superset" and "subset" which when + // compared directly don't have a superset-subset relationship, + // but because we take the changes that the subset/superset commits + // exclusivly added compared to a common base, we are able to + // identify that the changes each commit introduced are infact + // a superset/subset of each other + + assert!(is_subset( + &test_repository.gix_repository(), + superset.id().to_gix(), + subset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap()); + + assert!(!is_subset( + &test_repository.gix_repository(), + subset.id().to_gix(), + superset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap()); + } + } +} diff --git a/crates/gitbutler-branch-actions/src/lib.rs b/crates/gitbutler-branch-actions/src/lib.rs index 624b30df04..0fb3c1dbc3 100644 --- a/crates/gitbutler-branch-actions/src/lib.rs +++ b/crates/gitbutler-branch-actions/src/lib.rs @@ -79,4 +79,5 @@ pub use branch::{ pub use integration::GITBUTLER_WORKSPACE_COMMIT_TITLE; +mod commit_ops; pub mod stack; diff --git a/crates/gitbutler-oxidize/src/lib.rs b/crates/gitbutler-oxidize/src/lib.rs index 0aa7ad7b49..9c859fdc97 100644 --- a/crates/gitbutler-oxidize/src/lib.rs +++ b/crates/gitbutler-oxidize/src/lib.rs @@ -15,10 +15,30 @@ pub fn git2_to_gix_object_id(id: git2::Oid) -> gix::ObjectId { gix::ObjectId::try_from(id.as_bytes()).expect("git2 oid is always valid") } +pub trait OidExt { + fn to_gix(self) -> gix::ObjectId; +} + +impl OidExt for git2::Oid { + fn to_gix(self) -> gix::ObjectId { + git2_to_gix_object_id(self) + } +} + pub fn gix_to_git2_oid(id: impl Into) -> git2::Oid { git2::Oid::from_bytes(id.into().as_bytes()).expect("always valid") } +pub trait ObjectIdExt { + fn to_git2(self) -> git2::Oid; +} + +impl ObjectIdExt for gix::ObjectId { + fn to_git2(self) -> git2::Oid { + gix_to_git2_oid(self) + } +} + pub fn git2_signature_to_gix_signature<'a>( input: impl Borrow>, ) -> gix::actor::Signature { @@ -101,3 +121,16 @@ pub fn gix_to_git2_index(index: &gix::index::State) -> anyhow::Result) { + let mut recorder = gix::traverse::tree::Recorder::default(); + tree.traverse().breadthfirst(&mut recorder).unwrap(); + let repo = tree.repo; + for record in recorder.records { + println!( + "{}: {}", + record.filepath, + repo.find_blob(record.oid).unwrap().data.as_bstr() + ); + } +} diff --git a/crates/gitbutler-testsupport/src/testing_repository.rs b/crates/gitbutler-testsupport/src/testing_repository.rs index 86f8c9b040..5c9bdb24e8 100644 --- a/crates/gitbutler-testsupport/src/testing_repository.rs +++ b/crates/gitbutler-testsupport/src/testing_repository.rs @@ -52,6 +52,10 @@ impl TestingRepository { } } + pub fn gix_repository(&self) -> gix::Repository { + gix::open(self.repository.path()).unwrap() + } + pub fn open_with_initial_commit(files: &[(&str, &str)]) -> Self { let tempdir = tempdir().unwrap(); let repository = git2::Repository::init_opts(tempdir.path(), &init_opts()).unwrap(); @@ -90,6 +94,7 @@ impl TestingRepository { ) -> git2::Commit<'a> { self.commit_tree_inner(parent, &Uuid::new_v4().to_string(), files, Some(change_id)) } + pub fn commit_tree<'a>( &'a self, parent: Option<&git2::Commit<'_>>, From 653e8d57f8f70fd5943c98afce14fc2c13fcac0c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 2 Jan 2025 14:45:43 +0100 Subject: [PATCH 2/4] minimal refactor This is more of a suggestion than anything else. --- .../gitbutler-branch-actions/src/commit_ops.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/crates/gitbutler-branch-actions/src/commit_ops.rs b/crates/gitbutler-branch-actions/src/commit_ops.rs index 6851f9acc9..e99590cc71 100644 --- a/crates/gitbutler-branch-actions/src/commit_ops.rs +++ b/crates/gitbutler-branch-actions/src/commit_ops.rs @@ -1,10 +1,9 @@ use anyhow::{bail, Result}; use gitbutler_oxidize::GixRepositoryExt; -/// Finds the first parent of a given commit -fn get_first_parent(commit: gix::Commit) -> Result { - let first_parent = commit.parent_ids().take(1).collect::>(); - let Some(first_parent) = first_parent.first() else { +/// Finds the first parent of a given commit. +fn get_first_parent<'repo>(commit: &gix::Commit<'repo>) -> Result> { + let Some(first_parent) = commit.parent_ids().next() else { bail!("Failed to find first parent of {}", commit.id()) }; let first_parent = first_parent.object()?.into_commit(); @@ -12,14 +11,14 @@ fn get_first_parent(commit: gix::Commit) -> Result { } /// Gets the changes that one commit introduced compared to the base, -/// excluding anything between the commit and the base +/// excluding anything between the commit and the base. fn get_exclusive_tree( repository: &gix::Repository, commit_id: gix::ObjectId, base_id: gix::ObjectId, ) -> Result { let commit = repository.find_commit(commit_id)?; - let commit_parent = get_first_parent(commit.clone())?; + let commit_parent = get_first_parent(&commit)?; let base = repository.find_commit(base_id)?; repository @@ -36,7 +35,12 @@ fn get_exclusive_tree( .map(Into::into) } -/// Takes two commits and determines if one is a subset of or equal to the other +/// Takes two commits and determines if one is a subset of or equal to the other. +/// +/// ### Performance +/// +/// `repository` should have been configured [`with_object_memory()`](gix::Repository::with_object_memory()) +/// to prevent real objects to be written while probing for set inclusion. #[allow(dead_code)] fn is_subset( repository: &gix::Repository, From f4383b2d8466066747d8b569481a2f5d6ea26355 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 2 Jan 2025 14:47:09 +0100 Subject: [PATCH 3/4] When checking for conflicts, always be explicit about what's a conflict. --- crates/gitbutler-branch-actions/src/commit_ops.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/gitbutler-branch-actions/src/commit_ops.rs b/crates/gitbutler-branch-actions/src/commit_ops.rs index e99590cc71..aa1be86a21 100644 --- a/crates/gitbutler-branch-actions/src/commit_ops.rs +++ b/crates/gitbutler-branch-actions/src/commit_ops.rs @@ -21,7 +21,7 @@ fn get_exclusive_tree( let commit_parent = get_first_parent(&commit)?; let base = repository.find_commit(base_id)?; - repository + let merged_tree = repository .merge_trees( commit_parent.tree_id()?, commit.tree_id()?, @@ -30,9 +30,8 @@ fn get_exclusive_tree( repository.merge_options_force_ours()?, )? .tree - .write() - .map_err(Into::into) - .map(Into::into) + .write()?; + Ok(merged_tree.into()) } /// Takes two commits and determines if one is a subset of or equal to the other. @@ -53,7 +52,7 @@ fn is_subset( let common_base = repository.find_commit(common_base_id)?; - let (options, _) = repository.merge_options_fail_fast()?; + let (options, unresolved) = repository.merge_options_fail_fast()?; let mut merged_exclusives = repository.merge_trees( common_base.tree_id()?, exclusive_superset, @@ -62,7 +61,7 @@ fn is_subset( options, )?; - if merged_exclusives.conflicts.is_empty() { + if !merged_exclusives.has_unresolved_conflicts(unresolved) { Ok(exclusive_superset == merged_exclusives.tree.write()?) } else { Ok(false) From 930c144adc57632df46b657ae1a344e4cb3b5a3a Mon Sep 17 00:00:00 2001 From: Caleb Owens Date: Sun, 5 Jan 2025 15:17:32 +0000 Subject: [PATCH 4/4] Use an enum result to better describe subset --- .../src/commit_ops.rs | 110 ++++++++++++------ 1 file changed, 72 insertions(+), 38 deletions(-) diff --git a/crates/gitbutler-branch-actions/src/commit_ops.rs b/crates/gitbutler-branch-actions/src/commit_ops.rs index aa1be86a21..03336be6a0 100644 --- a/crates/gitbutler-branch-actions/src/commit_ops.rs +++ b/crates/gitbutler-branch-actions/src/commit_ops.rs @@ -34,6 +34,17 @@ fn get_exclusive_tree( Ok(merged_tree.into()) } +#[derive(PartialEq, Debug)] +enum SubsetKind { + /// The subset_id is not equal to or a subset of superset_id. + /// superset_id MAY still be a strict subset of subset_id + NotSubset, + /// The subset_id is a strict subset of superset_id + Subset, + /// The subset_id and superset_id are equivalent commits + Equal, +} + /// Takes two commits and determines if one is a subset of or equal to the other. /// /// ### Performance @@ -46,10 +57,14 @@ fn is_subset( superset_id: gix::ObjectId, subset_id: gix::ObjectId, common_base_id: gix::ObjectId, -) -> Result { +) -> Result { let exclusive_superset = get_exclusive_tree(repository, superset_id, common_base_id)?; let exclusive_subset = get_exclusive_tree(repository, subset_id, common_base_id)?; + if exclusive_superset == exclusive_subset { + return Ok(SubsetKind::Equal); + } + let common_base = repository.find_commit(common_base_id)?; let (options, unresolved) = repository.merge_options_fail_fast()?; @@ -61,10 +76,12 @@ fn is_subset( options, )?; - if !merged_exclusives.has_unresolved_conflicts(unresolved) { - Ok(exclusive_superset == merged_exclusives.tree.write()?) + if merged_exclusives.has_unresolved_conflicts(unresolved) + || exclusive_superset != merged_exclusives.tree.write()? + { + Ok(SubsetKind::NotSubset) } else { - Ok(false) + Ok(SubsetKind::Subset) } } @@ -134,6 +151,8 @@ mod test { mod is_subset { use gitbutler_oxidize::OidExt; + use crate::commit_ops::SubsetKind; + use super::super::is_subset; use super::*; @@ -147,13 +166,16 @@ mod test { &[("foo.txt", "bar"), ("bar.txt", "baz")], ); - assert!(is_subset( - &test_repository.gix_repository(), - second_commit.id().to_gix(), - second_commit.id().to_gix(), - base_commit.id().to_gix() + assert_eq!( + is_subset( + &test_repository.gix_repository(), + second_commit.id().to_gix(), + second_commit.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap(), + SubsetKind::Equal ) - .unwrap()) } #[test] @@ -170,21 +192,27 @@ mod test { &[("foo.txt", "bar"), ("bar.txt", "baz")], ); - assert!(is_subset( - &test_repository.gix_repository(), - superset.id().to_gix(), - subset.id().to_gix(), - base_commit.id().to_gix() - ) - .unwrap()); + assert_eq!( + is_subset( + &test_repository.gix_repository(), + superset.id().to_gix(), + subset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap(), + SubsetKind::Subset + ); - assert!(!is_subset( - &test_repository.gix_repository(), - subset.id().to_gix(), - superset.id().to_gix(), - base_commit.id().to_gix() - ) - .unwrap()); + assert_eq!( + is_subset( + &test_repository.gix_repository(), + subset.id().to_gix(), + superset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap(), + SubsetKind::NotSubset + ); } #[test] @@ -221,21 +249,27 @@ mod test { // identify that the changes each commit introduced are infact // a superset/subset of each other - assert!(is_subset( - &test_repository.gix_repository(), - superset.id().to_gix(), - subset.id().to_gix(), - base_commit.id().to_gix() - ) - .unwrap()); + assert_eq!( + is_subset( + &test_repository.gix_repository(), + superset.id().to_gix(), + subset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap(), + SubsetKind::Subset + ); - assert!(!is_subset( - &test_repository.gix_repository(), - subset.id().to_gix(), - superset.id().to_gix(), - base_commit.id().to_gix() - ) - .unwrap()); + assert_eq!( + is_subset( + &test_repository.gix_repository(), + subset.id().to_gix(), + superset.id().to_gix(), + base_commit.id().to_gix() + ) + .unwrap(), + SubsetKind::NotSubset + ); } } }