diff --git a/cli/examples/custom-backend/main.rs b/cli/examples/custom-backend/main.rs index beacdf853..d16d18c73 100644 --- a/cli/examples/custom-backend/main.rs +++ b/cli/examples/custom-backend/main.rs @@ -18,17 +18,18 @@ use std::path::Path; use std::time::SystemTime; use async_trait::async_trait; +use futures::stream::BoxStream; use jj_cli::cli_util::{CliRunner, CommandHelper}; use jj_cli::command_error::CommandError; use jj_cli::ui::Ui; use jj_lib::backend::{ Backend, BackendInitError, BackendLoadError, BackendResult, ChangeId, Commit, CommitId, - Conflict, ConflictId, FileId, SigningFn, SymlinkId, Tree, TreeId, + Conflict, ConflictId, CopyRecord, FileId, SigningFn, SymlinkId, Tree, TreeId, }; use jj_lib::git_backend::GitBackend; use jj_lib::index::Index; use jj_lib::repo::StoreFactories; -use jj_lib::repo_path::RepoPath; +use jj_lib::repo_path::{RepoPath, RepoPathBuf}; use jj_lib::settings::UserSettings; use jj_lib::signing::Signer; use jj_lib::workspace::{Workspace, WorkspaceInitError}; @@ -174,6 +175,15 @@ impl Backend for JitBackend { self.inner.write_commit(contents, sign_with) } + fn get_copy_records( + &self, + paths: &[RepoPathBuf], + roots: &[CommitId], + heads: &[CommitId], + ) -> BackendResult>> { + self.inner.get_copy_records(paths, roots, heads) + } + fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { self.inner.gc(index, keep_newer) } diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 29e94a672..156b76fc1 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -15,12 +15,13 @@ #![allow(missing_docs)] use std::any::Any; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::fmt::Debug; use std::io::Read; use std::time::SystemTime; use async_trait::async_trait; +use futures::stream::BoxStream; use thiserror::Error; use crate::content_hash::ContentHash; @@ -152,6 +153,47 @@ pub struct Conflict { pub adds: Vec, } +/// An individual copy source. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct CopySource { + /// The source path a target was copied from. + /// + /// It is not required that the source path is different than the target + /// path. A custom backend may choose to represent 'rollbacks' as copies + /// from a file unto itself, from a specific prior commit. + pub path: RepoPathBuf, + pub file: FileId, + /// The source commit the target was copied from. If not specified, then the + /// parent of the target commit is the source commit. Backends may use this + /// field to implement 'integration' logic, where a source may be + /// periodically merged into a target, similar to a branch, but the + /// branching occurs at the file level rather than the repository level. It + /// also follows naturally that any copy source targeted to a specific + /// commit should avoid copy propagation on rebasing, which is desirable + /// for 'fork' style copies. + /// + /// If specified, it is required that the commit id is an ancestor of the + /// commit with which this copy source is associated. + pub commit: Option, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum CopySources { + Resolved(CopySource), + Conflict(HashSet), +} + +/// An individual copy event, from file A -> B. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct CopyRecord { + /// The destination of the copy, B. + pub target: RepoPathBuf, + /// The CommitId where the copy took place. + pub id: CommitId, + /// The source of the copy, A. + pub sources: CopySources, +} + /// Error that may occur during backend initialization. #[derive(Debug, Error)] #[error(transparent)] @@ -416,6 +458,24 @@ pub trait Backend: Send + Sync + Debug { sign_with: Option<&mut SigningFn>, ) -> BackendResult<(CommitId, Commit)>; + /// Get all copy records for `paths` in the dag range `roots..heads`. + /// + /// The exact order these are returned is unspecified, but it is guaranteed + /// to be reverse-topological. That is, for any two copy records with + /// different commit ids A and B, if A is an ancestor of B, A is streamed + /// after B. + /// + /// Streaming by design to better support large backends which may have very + /// large single-file histories. This also allows more iterative algorithms + /// like blame/annotate to short-circuit after a point without wasting + /// unnecessary resources. + fn get_copy_records( + &self, + paths: &[RepoPathBuf], + roots: &[CommitId], + heads: &[CommitId], + ) -> BackendResult>>; + /// Perform garbage collection. /// /// All commits found in the `index` won't be removed. In addition to that, diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index 3428897f8..15265f1ae 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -25,6 +25,7 @@ use std::time::SystemTime; use std::{fs, io, str}; use async_trait::async_trait; +use futures::stream::BoxStream; use gix::bstr::BString; use gix::objs::{CommitRef, CommitRefIter, WriteTo}; use itertools::Itertools; @@ -34,16 +35,16 @@ use thiserror::Error; use crate::backend::{ make_root_commit, Backend, BackendError, BackendInitError, BackendLoadError, BackendResult, - ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MergedTreeId, - MillisSinceEpoch, SecureSig, Signature, SigningFn, SymlinkId, Timestamp, Tree, TreeId, - TreeValue, + ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, CopyRecord, FileId, + MergedTreeId, MillisSinceEpoch, SecureSig, Signature, SigningFn, SymlinkId, Timestamp, Tree, + TreeId, TreeValue, }; use crate::file_util::{IoResultExt as _, PathError}; use crate::index::Index; use crate::lock::FileLock; use crate::merge::{Merge, MergeBuilder}; use crate::object_id::ObjectId; -use crate::repo_path::{RepoPath, RepoPathComponentBuf}; +use crate::repo_path::{RepoPath, RepoPathBuf, RepoPathComponentBuf}; use crate::settings::UserSettings; use crate::stacked_table::{ MutableTable, ReadonlyTable, TableSegment, TableStore, TableStoreError, @@ -1209,6 +1210,15 @@ impl Backend for GitBackend { Ok((id, contents)) } + fn get_copy_records( + &self, + _paths: &[RepoPathBuf], + _roots: &[CommitId], + _heads: &[CommitId], + ) -> BackendResult>> { + Err(BackendError::Unsupported("get_copy_records".into())) + } + #[tracing::instrument(skip(self, index))] fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { let git_repo = self.lock_git_repo(); diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index 4d03c12e2..97afb6a7a 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -24,20 +24,21 @@ use std::time::SystemTime; use async_trait::async_trait; use blake2::{Blake2b512, Digest}; +use futures::stream::BoxStream; use prost::Message; use tempfile::NamedTempFile; use crate::backend::{ make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict, - ConflictId, ConflictTerm, FileId, MergedTreeId, MillisSinceEpoch, SecureSig, Signature, - SigningFn, SymlinkId, Timestamp, Tree, TreeId, TreeValue, + ConflictId, ConflictTerm, CopyRecord, FileId, MergedTreeId, MillisSinceEpoch, SecureSig, + Signature, SigningFn, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; use crate::content_hash::blake2b_hash; use crate::file_util::persist_content_addressed_temp_file; use crate::index::Index; use crate::merge::MergeBuilder; use crate::object_id::ObjectId; -use crate::repo_path::{RepoPath, RepoPathComponentBuf}; +use crate::repo_path::{RepoPath, RepoPathBuf, RepoPathComponentBuf}; const COMMIT_ID_LENGTH: usize = 64; const CHANGE_ID_LENGTH: usize = 16; @@ -301,6 +302,15 @@ impl Backend for LocalBackend { Ok((id, commit)) } + fn get_copy_records( + &self, + _paths: &[RepoPathBuf], + _roots: &[CommitId], + _heads: &[CommitId], + ) -> BackendResult>> { + Err(BackendError::Unsupported("get_copy_records".into())) + } + fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> { Ok(()) } diff --git a/lib/src/secret_backend.rs b/lib/src/secret_backend.rs index 44cb11dbe..4a4bec81e 100644 --- a/lib/src/secret_backend.rs +++ b/lib/src/secret_backend.rs @@ -20,15 +20,16 @@ use std::path::Path; use std::time::SystemTime; use async_trait::async_trait; +use futures::stream::BoxStream; use crate::backend::{ Backend, BackendError, BackendLoadError, BackendResult, ChangeId, Commit, CommitId, Conflict, - ConflictId, FileId, SigningFn, SymlinkId, Tree, TreeId, + ConflictId, CopyRecord, FileId, SigningFn, SymlinkId, Tree, TreeId, }; use crate::git_backend::GitBackend; use crate::index::Index; use crate::object_id::ObjectId; -use crate::repo_path::RepoPath; +use crate::repo_path::{RepoPath, RepoPathBuf}; use crate::settings::UserSettings; const SECRET_CONTENTS_HEX: [&str; 2] = [ @@ -167,6 +168,15 @@ impl Backend for SecretBackend { self.inner.write_commit(contents, sign_with) } + fn get_copy_records( + &self, + paths: &[RepoPathBuf], + roots: &[CommitId], + heads: &[CommitId], + ) -> BackendResult>> { + self.inner.get_copy_records(paths, roots, heads) + } + fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { self.inner.gc(index, keep_newer) } diff --git a/lib/testutils/src/test_backend.rs b/lib/testutils/src/test_backend.rs index 26c426ef1..5a64e989a 100644 --- a/lib/testutils/src/test_backend.rs +++ b/lib/testutils/src/test_backend.rs @@ -21,9 +21,10 @@ use std::sync::{Arc, Mutex, MutexGuard, OnceLock}; use std::time::SystemTime; use async_trait::async_trait; +use futures::stream::BoxStream; use jj_lib::backend::{ make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict, - ConflictId, FileId, SecureSig, SigningFn, SymlinkId, Tree, TreeId, + ConflictId, CopyRecord, FileId, SecureSig, SigningFn, SymlinkId, Tree, TreeId, }; use jj_lib::index::Index; use jj_lib::object_id::ObjectId; @@ -300,6 +301,15 @@ impl Backend for TestBackend { Ok((id, contents)) } + fn get_copy_records( + &self, + _paths: &[RepoPathBuf], + _roots: &[CommitId], + _heads: &[CommitId], + ) -> BackendResult>> { + Err(BackendError::Unsupported("get_copy_records".into())) + } + fn gc(&self, _index: &dyn Index, _keep_newer: SystemTime) -> BackendResult<()> { Ok(()) }