From da0bbbe637d781220efd8ba4e6b79c4800e06252 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Sun, 3 Jan 2021 00:26:57 -0800 Subject: [PATCH] view: start tracking git refs Git refs are important at least for understanding where the remote branches are. This commit adds support for tracking them in the view and makes `git::import_refs()` update them. When merging views (either because of concurrent operations or when undoing an earlier operation), there can be conflicts between git ref changes. I ignored that for now and let the later operation win. That will probably be good enough for a while. It's not hard to detect the conflicts, but I haven't yet decided how to handle them. I'm leaning towards representing the conflicting refs in the view just like how we represent conflicting files in the tree. --- lib/protos/op_store.proto | 9 ++ lib/src/git.rs | 18 +++- lib/src/op_store.rs | 4 +- lib/src/simple_op_store.rs | 10 ++ lib/src/transaction.rs | 14 +++ lib/src/view.rs | 48 ++++++++- lib/tests/test_git.rs | 209 ++++++++++++++++++++++++++++++++++++- 7 files changed, 305 insertions(+), 7 deletions(-) diff --git a/lib/protos/op_store.proto b/lib/protos/op_store.proto index 175d073c3..f8a735072 100644 --- a/lib/protos/op_store.proto +++ b/lib/protos/op_store.proto @@ -14,9 +14,18 @@ syntax = "proto3"; +message GitRef { + string name = 1; + // Always a commit id. Refs pointing to a non-commit object are not + // included. + bytes commit_id = 2; +} + message View { repeated bytes head_ids = 1; bytes checkout = 2; + // Only a subset of the refs. For example, does not include refs/notes/. + repeated GitRef git_refs = 3; } message Operation { diff --git a/lib/src/git.rs b/lib/src/git.rs index bfa675a57..ef5a942a9 100644 --- a/lib/src/git.rs +++ b/lib/src/git.rs @@ -13,6 +13,7 @@ // limitations under the License. use crate::commit::Commit; +use crate::repo::Repo; use crate::store::CommitId; use crate::transaction::Transaction; use thiserror::Error; @@ -30,10 +31,22 @@ pub fn import_refs(tx: &mut Transaction) -> Result<(), GitImportError> { let store = tx.store().clone(); let git_repo = store.git_repo().ok_or(GitImportError::NotAGitRepo)?; let git_refs = git_repo.references()?; + let existing_git_refs: Vec<_> = tx.as_repo().view().git_refs().keys().cloned().collect(); + // TODO: Store the id of the previous import and read it back here, so we can + // merge the views instead of overwriting. + for existing_git_ref in existing_git_refs { + tx.remove_git_ref(&existing_git_ref); + // TODO: We should probably also remove heads pointing to the same + // commits and commits no longer reachable from other refs. + // If the underlying git repo has a branch that gets rewritten, we + // should probably not keep the commits it used to point to. + } for git_ref in git_refs { let git_ref = git_ref?; - if !(git_ref.is_tag() || git_ref.is_branch() || git_ref.is_remote()) { - // Skip other refs (such as notes) and symbolic refs. + if !(git_ref.is_tag() || git_ref.is_branch() || git_ref.is_remote()) + || git_ref.name().is_none() + { + // Skip other refs (such as notes) and symbolic refs, as well as non-utf8 refs. // TODO: Is it useful to import HEAD (especially if it's detached)? continue; } @@ -41,6 +54,7 @@ pub fn import_refs(tx: &mut Transaction) -> Result<(), GitImportError> { let id = CommitId(git_commit.id().as_bytes().to_vec()); let commit = store.get_commit(&id).unwrap(); tx.add_head(&commit); + tx.insert_git_ref(git_ref.name().unwrap().to_string(), id); } Ok(()) } diff --git a/lib/src/op_store.rs b/lib/src/op_store.rs index 0d14fced1..9f2c5defc 100644 --- a/lib/src/op_store.rs +++ b/lib/src/op_store.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::store::{CommitId, Timestamp}; -use std::collections::HashSet; +use std::collections::{BTreeMap, HashSet}; use std::fmt::{Debug, Error, Formatter}; #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] @@ -52,6 +52,7 @@ impl OperationId { pub struct View { /// All head commits pub head_ids: HashSet, + pub git_refs: BTreeMap, // The commit that *should be* checked out in the (default) working copy. Note that the // working copy (.jj/working_copy/) has the source of truth about which commit *is* checked out // (to be precise: the commit to which we most recently completed a checkout to). @@ -63,6 +64,7 @@ impl View { pub fn new(checkout: CommitId) -> Self { Self { head_ids: HashSet::new(), + git_refs: BTreeMap::new(), checkout, } } diff --git a/lib/src/simple_op_store.rs b/lib/src/simple_op_store.rs index 7117dfd5d..c285fb408 100644 --- a/lib/src/simple_op_store.rs +++ b/lib/src/simple_op_store.rs @@ -198,6 +198,12 @@ fn view_to_proto(view: &View) -> crate::protos::op_store::View { for head_id in &view.head_ids { proto.head_ids.push(head_id.0.clone()); } + for (git_ref_name, commit_id) in &view.git_refs { + let mut git_ref_proto = crate::protos::op_store::GitRef::new(); + git_ref_proto.set_name(git_ref_name.clone()); + git_ref_proto.set_commit_id(commit_id.0.clone()); + proto.git_refs.push(git_ref_proto); + } proto } @@ -206,5 +212,9 @@ fn view_from_proto(proto: &crate::protos::op_store::View) -> View { for head_id_bytes in proto.head_ids.iter() { view.head_ids.insert(CommitId(head_id_bytes.to_vec())); } + for git_ref in proto.git_refs.iter() { + view.git_refs + .insert(git_ref.name.clone(), CommitId(git_ref.commit_id.to_vec())); + } view } diff --git a/lib/src/transaction.rs b/lib/src/transaction.rs index 92ef75768..3b3570dde 100644 --- a/lib/src/transaction.rs +++ b/lib/src/transaction.rs @@ -171,6 +171,20 @@ impl<'r> Transaction<'r> { mut_repo.evolution.as_mut().unwrap().invalidate(); } + pub fn insert_git_ref(&mut self, name: String, commit_id: CommitId) { + let mut_repo = Arc::get_mut(self.repo.as_mut().unwrap()).unwrap(); + mut_repo + .view + .as_mut() + .unwrap() + .insert_git_ref(name, commit_id); + } + + pub fn remove_git_ref(&mut self, name: &str) { + let mut_repo = Arc::get_mut(self.repo.as_mut().unwrap()).unwrap(); + mut_repo.view.as_mut().unwrap().remove_git_ref(name); + } + pub fn set_view(&mut self, data: op_store::View) { let mut_repo = Arc::get_mut(self.repo.as_mut().unwrap()).unwrap(); mut_repo.view.as_mut().unwrap().set_view(data); diff --git a/lib/src/view.rs b/lib/src/view.rs index 687960239..3d8fd51ba 100644 --- a/lib/src/view.rs +++ b/lib/src/view.rs @@ -13,7 +13,7 @@ // limitations under the License. use std::cmp::min; -use std::collections::HashSet; +use std::collections::{BTreeMap, HashSet}; use std::path::PathBuf; use std::sync::Arc; @@ -32,6 +32,7 @@ use crate::store_wrapper::StoreWrapper; pub trait View { fn checkout(&self) -> &CommitId; fn heads<'a>(&'a self) -> Box + 'a>; + fn git_refs(&self) -> &BTreeMap; fn op_store(&self) -> Arc; fn base_op_head_id(&self) -> &OperationId; @@ -152,6 +153,35 @@ pub fn merge_views( // side while a child or successor is created on another side? Maybe a // warning? + // Merge git refs + let base_git_ref_names: HashSet<_> = base.git_refs.keys().clone().collect(); + let right_git_ref_names: HashSet<_> = right.git_refs.keys().clone().collect(); + for maybe_modified_git_ref_name in right_git_ref_names.intersection(&base_git_ref_names) { + let base_commit_id = base.git_refs.get(*maybe_modified_git_ref_name).unwrap(); + let right_commit_id = right.git_refs.get(*maybe_modified_git_ref_name).unwrap(); + if base_commit_id == right_commit_id { + continue; + } + // TODO: Handle modify/modify conflict (i.e. if left and base are different + // here) + result.git_refs.insert( + (*maybe_modified_git_ref_name).clone(), + right_commit_id.clone(), + ); + } + for added_git_ref_name in right_git_ref_names.difference(&base_git_ref_names) { + // TODO: Handle add/add conflict (i.e. if left also has the ref here) + result.git_refs.insert( + (*added_git_ref_name).clone(), + right.git_refs.get(*added_git_ref_name).unwrap().clone(), + ); + } + for removed_git_ref_name in base_git_ref_names.difference(&right_git_ref_names) { + // TODO: Handle modify/remove conflict (i.e. if left and base are different + // here) + result.git_refs.remove(*removed_git_ref_name); + } + result } @@ -278,6 +308,10 @@ impl View for ReadonlyView { Box::new(self.data.head_ids.iter()) } + fn git_refs(&self) -> &BTreeMap { + &self.data.git_refs + } + fn op_store(&self) -> Arc { self.op_store.clone() } @@ -365,6 +399,10 @@ impl View for MutableView { Box::new(self.data.head_ids.iter()) } + fn git_refs(&self) -> &BTreeMap { + &self.data.git_refs + } + fn op_store(&self) -> Arc { self.op_store.clone() } @@ -402,6 +440,14 @@ impl MutableView { self.remove_non_heads(); } + pub fn insert_git_ref(&mut self, name: String, commit_id: CommitId) { + self.data.git_refs.insert(name, commit_id); + } + + pub fn remove_git_ref(&mut self, name: &str) { + self.data.git_refs.remove(name); + } + pub fn set_view(&mut self, data: op_store::View) { self.data = data; self.remove_non_heads(); diff --git a/lib/tests/test_git.rs b/lib/tests/test_git.rs index 51625be31..23bf61b4d 100644 --- a/lib/tests/test_git.rs +++ b/lib/tests/test_git.rs @@ -63,21 +63,222 @@ fn test_import_refs() { let commit2 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit1]); let commit3 = empty_git_commit(&git_repo, "refs/heads/feature1", &[&commit2]); let commit4 = empty_git_commit(&git_repo, "refs/heads/feature2", &[&commit2]); + let commit5 = empty_git_commit(&git_repo, "refs/remotes/origin/main", &[&commit2]); + // Should not be imported + empty_git_commit(&git_repo, "refs/notes/x", &[&commit2]); std::fs::create_dir(&jj_repo_dir).unwrap(); let repo = ReadonlyRepo::init_external_git(&settings, jj_repo_dir, git_repo_dir); let mut tx = repo.start_transaction("test"); let heads_before: HashSet<_> = repo.view().heads().cloned().collect(); jujube_lib::git::import_refs(&mut tx).unwrap_or_default(); - let heads_after: HashSet<_> = tx.as_repo().view().heads().cloned().collect(); + let view = tx.as_repo().view(); + let heads_after: HashSet<_> = view.heads().cloned().collect(); let expected_heads: HashSet<_> = heads_before - .union(&hashset!(commit_id(&commit3), commit_id(&commit4))) + .union(&hashset!( + commit_id(&commit3), + commit_id(&commit4), + commit_id(&commit5) + )) .cloned() .collect(); assert_eq!(heads_after, expected_heads); + assert_eq!(view.git_refs().len(), 4); + assert_eq!( + view.git_refs().get("refs/heads/main"), + Some(commit_id(&commit2)).as_ref() + ); + assert_eq!( + view.git_refs().get("refs/heads/feature1"), + Some(commit_id(&commit3)).as_ref() + ); + assert_eq!( + view.git_refs().get("refs/heads/feature2"), + Some(commit_id(&commit4)).as_ref() + ); + assert_eq!( + view.git_refs().get("refs/remotes/origin/main"), + Some(commit_id(&commit5)).as_ref() + ); tx.discard(); } +#[test] +fn test_import_refs_reimport() { + let settings = testutils::user_settings(); + let temp_dir = tempfile::tempdir().unwrap(); + let git_repo_dir = temp_dir.path().join("git"); + let jj_repo_dir = temp_dir.path().join("jj"); + + let git_repo = git2::Repository::init_bare(&git_repo_dir).unwrap(); + let commit1 = empty_git_commit(&git_repo, "refs/heads/main", &[]); + let commit2 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit1]); + let commit3 = empty_git_commit(&git_repo, "refs/heads/feature1", &[&commit2]); + let commit4 = empty_git_commit(&git_repo, "refs/heads/feature2", &[&commit2]); + + std::fs::create_dir(&jj_repo_dir).unwrap(); + let mut repo = ReadonlyRepo::init_external_git(&settings, jj_repo_dir, git_repo_dir); + let heads_before: HashSet<_> = repo.view().heads().cloned().collect(); + let mut tx = repo.start_transaction("test"); + jujube_lib::git::import_refs(&mut tx).unwrap_or_default(); + tx.commit(); + + // Delete feature1 and rewrite feature2 + git_repo + .find_reference("refs/heads/feature1") + .unwrap() + .delete() + .unwrap(); + git_repo + .find_reference("refs/heads/feature2") + .unwrap() + .delete() + .unwrap(); + let commit5 = empty_git_commit(&git_repo, "refs/heads/feature2", &[&commit2]); + + Arc::get_mut(&mut repo).unwrap().reload(); + let mut tx = repo.start_transaction("test"); + jujube_lib::git::import_refs(&mut tx).unwrap_or_default(); + + let view = tx.as_repo().view(); + let heads_after: HashSet<_> = view.heads().cloned().collect(); + // TODO: commit3 and commit4 should probably be removed + let expected_heads: HashSet<_> = heads_before + .union(&hashset!( + commit_id(&commit3), + commit_id(&commit4), + commit_id(&commit5) + )) + .cloned() + .collect(); + assert_eq!(heads_after, expected_heads); + assert_eq!(view.git_refs().len(), 2); + assert_eq!( + view.git_refs().get("refs/heads/main"), + Some(commit_id(&commit2)).as_ref() + ); + assert_eq!( + view.git_refs().get("refs/heads/feature2"), + Some(commit_id(&commit5)).as_ref() + ); + tx.discard(); +} + +fn git_ref(git_repo: &git2::Repository, name: &str, target: Oid) { + git_repo.reference(name, target, true, "").unwrap(); +} + +fn delete_git_ref(git_repo: &git2::Repository, name: &str) { + git_repo.find_reference(name).unwrap().delete().unwrap(); +} + +#[test] +fn test_import_refs_merge() { + let settings = testutils::user_settings(); + let (_temp_dir, mut repo) = testutils::init_repo(&settings, true); + let git_repo = repo.store().git_repo().unwrap(); + + // Set up the following refs and update them as follows: + // sideways-unchanged: one operation rewrites the ref + // unchanged-sideways: the other operation rewrites the ref + // remove-unchanged: one operation removes the ref + // unchanged-remove: the other operation removes the ref + // forward-forward: two operations move forward by different amounts + // sideways-sideways: two operations rewrite the ref + // forward-remove: one operation moves forward, the other operation removes + // remove-forward: one operation removes, the other operation moves + // add-add: two operations add the ref with different target + // + // The above cases distinguish between refs moving forward and sideways (and + // there are no tests for refs moving backward) because we may want to treat + // the cases differently, although that's still unclear. + // + // TODO: Consider adding more systematic testing to cover + // all state transitions. For example, the above does not include a case + // where a ref is added on both sides and one is an ancestor of the other + // (we should probably resolve that in favor of the descendant). + let commit1 = empty_git_commit(&git_repo, "refs/heads/main", &[]); + let commit2 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit1]); + let commit3 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit2]); + let commit4 = empty_git_commit(&git_repo, "refs/heads/feature1", &[&commit2]); + let commit5 = empty_git_commit(&git_repo, "refs/heads/feature2", &[&commit2]); + git_ref(&git_repo, "refs/heads/sideways-unchanged", commit3.id()); + git_ref(&git_repo, "refs/heads/unchanged-sideways", commit3.id()); + git_ref(&git_repo, "refs/heads/remove-unchanged", commit3.id()); + git_ref(&git_repo, "refs/heads/unchanged-remove", commit3.id()); + git_ref(&git_repo, "refs/heads/sideways-sideways", commit3.id()); + git_ref(&git_repo, "refs/heads/forward-forward", commit1.id()); + git_ref(&git_repo, "refs/heads/forward-remove", commit1.id()); + git_ref(&git_repo, "refs/heads/remove-forward", commit1.id()); + let mut tx = repo.start_transaction("initial import"); + jujube_lib::git::import_refs(&mut tx).unwrap_or_default(); + tx.commit(); + Arc::get_mut(&mut repo).unwrap().reload(); + + // One of the concurrent operations: + git_ref(&git_repo, "refs/heads/sideways-unchanged", commit4.id()); + delete_git_ref(&git_repo, "refs/heads/remove-unchanged"); + git_ref(&git_repo, "refs/heads/sideways-sideways", commit4.id()); + git_ref(&git_repo, "refs/heads/forward-forward", commit2.id()); + git_ref(&git_repo, "refs/heads/forward-remove", commit2.id()); + delete_git_ref(&git_repo, "refs/heads/remove-forward"); + git_ref(&git_repo, "refs/heads/add-add", commit3.id()); + let mut tx1 = repo.start_transaction("concurrent import 1"); + jujube_lib::git::import_refs(&mut tx1).unwrap_or_default(); + tx1.commit(); + + // The other concurrent operation: + git_ref(&git_repo, "refs/heads/unchanged-sideways", commit4.id()); + delete_git_ref(&git_repo, "refs/heads/unchanged-remove"); + git_ref(&git_repo, "refs/heads/sideways-sideways", commit5.id()); + git_ref(&git_repo, "refs/heads/forward-forward", commit3.id()); + delete_git_ref(&git_repo, "refs/heads/forward-remove"); + git_ref(&git_repo, "refs/heads/remove-forward", commit2.id()); + git_ref(&git_repo, "refs/heads/add-add", commit4.id()); + let mut tx2 = repo.start_transaction("concurrent import 2"); + jujube_lib::git::import_refs(&mut tx2).unwrap_or_default(); + tx2.commit(); + + // Reload the repo, causing the operations to be merged. + Arc::get_mut(&mut repo).unwrap().reload(); + + let view = repo.view(); + let git_refs = view.git_refs(); + assert_eq!(git_refs.len(), 9); + assert_eq!( + git_refs.get("refs/heads/sideways-unchanged"), + Some(commit_id(&commit4)).as_ref() + ); + assert_eq!( + git_refs.get("refs/heads/unchanged-sideways"), + Some(commit_id(&commit4)).as_ref() + ); + assert_eq!(git_refs.get("refs/heads/remove-unchanged"), None); + assert_eq!(git_refs.get("refs/heads/unchanged-remove"), None); + // TODO: Perhaps we should automatically resolve this to the descendant-most + // commit? (We currently do get the descendant-most, but that's only because we + // let the later operation overwrite.) + assert_eq!( + git_refs.get("refs/heads/forward-forward"), + Some(commit_id(&commit3)).as_ref() + ); + // TODO: The rest of these should be conflicts (however we decide to represent + // that). + assert_eq!( + git_refs.get("refs/heads/sideways-sideways"), + Some(commit_id(&commit5)).as_ref() + ); + assert_eq!(git_refs.get("refs/heads/forward-remove"), None); + assert_eq!( + git_refs.get("refs/heads/remove-forward"), + Some(commit_id(&commit2)).as_ref() + ); + assert_eq!( + git_refs.get("refs/heads/add-add"), + Some(commit_id(&commit4)).as_ref() + ); +} + #[test] fn test_import_refs_empty_git_repo() { let settings = testutils::user_settings(); @@ -92,8 +293,10 @@ fn test_import_refs_empty_git_repo() { let heads_before: HashSet<_> = repo.view().heads().cloned().collect(); let mut tx = repo.start_transaction("test"); jujube_lib::git::import_refs(&mut tx).unwrap_or_default(); - let heads_after: HashSet<_> = tx.as_repo().view().heads().cloned().collect(); + let view = tx.as_repo().view(); + let heads_after: HashSet<_> = view.heads().cloned().collect(); assert_eq!(heads_before, heads_after); + assert_eq!(view.git_refs().len(), 0); tx.discard(); }