forked from mirrors/jj
git_backend: on gc(), remove unreachable no-gc refs and compact them
With my jj repo, the number of jj/keep refs went down from 87887 to 27733. The .git directory size is halved, but we'll need to clean up extra and index files to save disk space. "git gc --prune=now && jj debug reindex" passed, so the repo wouldn't be corrupted. #12
This commit is contained in:
parent
351487b9f5
commit
3d0b3d57d8
3 changed files with 286 additions and 4 deletions
|
@ -15,10 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
### New features
|
||||
|
||||
* New `jj op abandon` command is added to clean up the operation history. If GC
|
||||
is implemented, Git refs and commit objects can be compacted.
|
||||
* New `jj op abandon` command is added to clean up the operation history. Git
|
||||
refs and commit objects can be further compacted by `jj util gc`.
|
||||
|
||||
* `jj util gc` now removes unreachable operation and view objects.
|
||||
* `jj util gc` now removes unreachable operation, view, and Git objects.
|
||||
|
||||
* `jj branch rename` will now warn if the renamed branch has a remote branch, since
|
||||
those will have to be manually renamed outside of `jj`.
|
||||
|
|
|
@ -607,6 +607,88 @@ fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
|
|||
}
|
||||
}
|
||||
|
||||
fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
|
||||
let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
|
||||
gix::refs::transaction::RefEdit {
|
||||
change: gix::refs::transaction::Change::Delete {
|
||||
expected,
|
||||
log: gix::refs::transaction::RefLog::AndReference,
|
||||
},
|
||||
name: git_ref.name,
|
||||
deref: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
|
||||
/// unreachable and non-head refs.
|
||||
fn recreate_no_gc_refs(
|
||||
git_repo: &gix::Repository,
|
||||
new_heads: impl IntoIterator<Item = CommitId>,
|
||||
keep_newer: SystemTime,
|
||||
) -> Result<(), BackendError> {
|
||||
// Calculate diff between existing no-gc refs and new heads.
|
||||
let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
|
||||
let mut no_gc_refs_to_keep_count: usize = 0;
|
||||
let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
|
||||
let git_references = git_repo
|
||||
.references()
|
||||
.map_err(|err| BackendError::Other(err.into()))?;
|
||||
let no_gc_refs_iter = git_references
|
||||
.prefixed(NO_GC_REF_NAMESPACE)
|
||||
.map_err(|err| BackendError::Other(err.into()))?;
|
||||
for git_ref in no_gc_refs_iter {
|
||||
let git_ref = git_ref.map_err(BackendError::Other)?.detach();
|
||||
let oid = git_ref.target.try_id().ok_or_else(|| {
|
||||
let name = git_ref.name.as_bstr();
|
||||
BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
|
||||
})?;
|
||||
let id = CommitId::from_bytes(oid.as_bytes());
|
||||
let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
|
||||
if new_heads.contains(&id) && name_good {
|
||||
no_gc_refs_to_keep_count += 1;
|
||||
continue;
|
||||
}
|
||||
// Check timestamp of loose ref, but this is still racy on re-import
|
||||
// because:
|
||||
// - existing packed ref won't be demoted to loose ref
|
||||
// - existing loose ref won't be touched
|
||||
//
|
||||
// TODO: might be better to switch to a dummy merge, where new no-gc ref
|
||||
// will always have a unique name. Doing that with the current
|
||||
// ref-per-head strategy would increase the number of the no-gc refs.
|
||||
// https://github.com/martinvonz/jj/pull/2659#issuecomment-1837057782
|
||||
let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
|
||||
if let Ok(metadata) = loose_ref_path.metadata() {
|
||||
let mtime = metadata.modified().expect("unsupported platform?");
|
||||
if mtime > keep_newer {
|
||||
tracing::trace!(?git_ref, "not deleting new");
|
||||
no_gc_refs_to_keep_count += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Also deletes no-gc ref of random name created by old jj.
|
||||
tracing::trace!(?git_ref, ?name_good, "will delete");
|
||||
no_gc_refs_to_delete.push(git_ref);
|
||||
}
|
||||
tracing::info!(
|
||||
new_heads_count = new_heads.len(),
|
||||
no_gc_refs_to_keep_count,
|
||||
no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
|
||||
"collected reachable refs"
|
||||
);
|
||||
|
||||
// It's slow to delete packed refs one by one, so update refs all at once.
|
||||
let ref_edits = itertools::chain(
|
||||
no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
|
||||
new_heads.iter().map(to_no_gc_ref_update),
|
||||
);
|
||||
git_repo
|
||||
.edit_references(ref_edits)
|
||||
.map_err(|err| BackendError::Other(err.into()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_git_gc(git_dir: &Path) -> Result<(), GitGcError> {
|
||||
let mut git = Command::new("git");
|
||||
git.arg("--git-dir=."); // turn off discovery
|
||||
|
@ -1083,7 +1165,18 @@ impl Backend for GitBackend {
|
|||
Ok((id, contents))
|
||||
}
|
||||
|
||||
fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> {
|
||||
#[tracing::instrument(skip(self, index))]
|
||||
fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
|
||||
let git_repo = self.lock_git_repo();
|
||||
let new_heads = index
|
||||
.all_heads_for_gc()
|
||||
.map_err(|err| BackendError::Other(err.into()))?
|
||||
.filter(|id| *id != self.root_commit_id);
|
||||
recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
|
||||
// TODO: remove unreachable entries from extras table if segment file
|
||||
// mtime <= keep_newer? (it won't be consistent with no-gc refs
|
||||
// preserved by the keep_newer timestamp though)
|
||||
// TODO: remove unreachable extras table segments
|
||||
// TODO: pass in keep_newer to "git gc" command
|
||||
run_git_gc(self.git_repo_path()).map_err(|err| BackendError::Other(err.into()))
|
||||
}
|
||||
|
|
189
lib/tests/test_git_backend.rs
Normal file
189
lib/tests/test_git_backend.rs
Normal file
|
@ -0,0 +1,189 @@
|
|||
// Copyright 2024 The Jujutsu Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use jj_lib::backend::CommitId;
|
||||
use jj_lib::git_backend::GitBackend;
|
||||
use jj_lib::repo::{ReadonlyRepo, Repo};
|
||||
use maplit::hashset;
|
||||
use testutils::{create_random_commit, CommitGraphBuilder, TestRepo, TestRepoBackend};
|
||||
|
||||
fn get_git_backend(repo: &Arc<ReadonlyRepo>) -> &GitBackend {
|
||||
repo.store()
|
||||
.backend_impl()
|
||||
.downcast_ref::<GitBackend>()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn get_git_repo(repo: &Arc<ReadonlyRepo>) -> gix::Repository {
|
||||
get_git_backend(repo).git_repo()
|
||||
}
|
||||
|
||||
fn collect_no_gc_refs(git_repo: &gix::Repository) -> HashSet<CommitId> {
|
||||
let git_refs = git_repo.references().unwrap();
|
||||
let no_gc_refs_iter = git_refs.prefixed("refs/jj/keep/").unwrap();
|
||||
no_gc_refs_iter
|
||||
.map(|git_ref| CommitId::from_bytes(git_ref.unwrap().id().as_bytes()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gc() {
|
||||
// TODO: Better way to disable the test if git command couldn't be executed
|
||||
if Command::new("git").arg("--version").status().is_err() {
|
||||
eprintln!("Skipping because git command might fail to run");
|
||||
return;
|
||||
}
|
||||
|
||||
let settings = testutils::user_settings();
|
||||
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
||||
let repo = test_repo.repo;
|
||||
let git_repo = get_git_repo(&repo);
|
||||
let base_index = repo.readonly_index();
|
||||
|
||||
// Set up commits:
|
||||
//
|
||||
// H (predecessor: D)
|
||||
// G |
|
||||
// |\|
|
||||
// | F
|
||||
// E |
|
||||
// D | |
|
||||
// C |/
|
||||
// |/
|
||||
// B
|
||||
// A
|
||||
let mut tx = repo.start_transaction(&settings);
|
||||
let mut graph_builder = CommitGraphBuilder::new(&settings, tx.mut_repo());
|
||||
let commit_a = graph_builder.initial_commit();
|
||||
let commit_b = graph_builder.commit_with_parents(&[&commit_a]);
|
||||
let commit_c = graph_builder.commit_with_parents(&[&commit_b]);
|
||||
let commit_d = graph_builder.commit_with_parents(&[&commit_c]);
|
||||
let commit_e = graph_builder.commit_with_parents(&[&commit_b]);
|
||||
let commit_f = graph_builder.commit_with_parents(&[&commit_b]);
|
||||
let commit_g = graph_builder.commit_with_parents(&[&commit_e, &commit_f]);
|
||||
let commit_h = create_random_commit(tx.mut_repo(), &settings)
|
||||
.set_parents(vec![commit_f.id().clone()])
|
||||
.set_predecessors(vec![commit_d.id().clone()])
|
||||
.write()
|
||||
.unwrap();
|
||||
let repo = tx.commit("test");
|
||||
assert_eq!(
|
||||
*repo.view().heads(),
|
||||
hashset! {
|
||||
commit_d.id().clone(),
|
||||
commit_g.id().clone(),
|
||||
commit_h.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// At first, all commits have no-gc refs
|
||||
assert_eq!(
|
||||
collect_no_gc_refs(&git_repo),
|
||||
hashset! {
|
||||
commit_a.id().clone(),
|
||||
commit_b.id().clone(),
|
||||
commit_c.id().clone(),
|
||||
commit_d.id().clone(),
|
||||
commit_e.id().clone(),
|
||||
commit_f.id().clone(),
|
||||
commit_g.id().clone(),
|
||||
commit_h.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// Empty index, but all kept by file modification time
|
||||
// (Beware that this invokes "git gc" and refs will be packed.)
|
||||
repo.store()
|
||||
.gc(base_index.as_index(), SystemTime::UNIX_EPOCH)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
collect_no_gc_refs(&git_repo),
|
||||
hashset! {
|
||||
commit_a.id().clone(),
|
||||
commit_b.id().clone(),
|
||||
commit_c.id().clone(),
|
||||
commit_d.id().clone(),
|
||||
commit_e.id().clone(),
|
||||
commit_f.id().clone(),
|
||||
commit_g.id().clone(),
|
||||
commit_h.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// All reachable: redundant no-gc refs will be removed
|
||||
let now = SystemTime::now();
|
||||
repo.store().gc(repo.index(), now).unwrap();
|
||||
assert_eq!(
|
||||
collect_no_gc_refs(&git_repo),
|
||||
hashset! {
|
||||
commit_d.id().clone(),
|
||||
commit_g.id().clone(),
|
||||
commit_h.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// G is no longer reachable
|
||||
let mut mut_index = base_index.start_modification();
|
||||
mut_index.add_commit(&commit_a);
|
||||
mut_index.add_commit(&commit_b);
|
||||
mut_index.add_commit(&commit_c);
|
||||
mut_index.add_commit(&commit_d);
|
||||
mut_index.add_commit(&commit_e);
|
||||
mut_index.add_commit(&commit_f);
|
||||
mut_index.add_commit(&commit_h);
|
||||
repo.store().gc(mut_index.as_index(), now).unwrap();
|
||||
assert_eq!(
|
||||
collect_no_gc_refs(&git_repo),
|
||||
hashset! {
|
||||
commit_d.id().clone(),
|
||||
commit_e.id().clone(),
|
||||
commit_h.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// D|E|H are no longer reachable
|
||||
let mut mut_index = base_index.start_modification();
|
||||
mut_index.add_commit(&commit_a);
|
||||
mut_index.add_commit(&commit_b);
|
||||
mut_index.add_commit(&commit_c);
|
||||
mut_index.add_commit(&commit_f);
|
||||
repo.store().gc(mut_index.as_index(), now).unwrap();
|
||||
assert_eq!(
|
||||
collect_no_gc_refs(&git_repo),
|
||||
hashset! {
|
||||
commit_c.id().clone(),
|
||||
commit_f.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// B|C|F are no longer reachable
|
||||
let mut mut_index = base_index.start_modification();
|
||||
mut_index.add_commit(&commit_a);
|
||||
repo.store().gc(mut_index.as_index(), now).unwrap();
|
||||
assert_eq!(
|
||||
collect_no_gc_refs(&git_repo),
|
||||
hashset! {
|
||||
commit_a.id().clone(),
|
||||
},
|
||||
);
|
||||
|
||||
// All unreachable
|
||||
repo.store().gc(base_index.as_index(), now).unwrap();
|
||||
assert_eq!(collect_no_gc_refs(&git_repo), hashset! {});
|
||||
}
|
Loading…
Reference in a new issue