2022-11-26 23:57:50 +00:00
|
|
|
// Copyright 2022 The Jujutsu Authors
|
2022-09-24 01:49:56 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2023-05-12 13:05:32 +00:00
|
|
|
use std::any::Any;
|
2022-09-24 01:49:56 +00:00
|
|
|
use std::io::Read;
|
|
|
|
use std::path::Path;
|
2024-01-09 08:14:30 +00:00
|
|
|
use std::time::SystemTime;
|
2022-09-24 01:49:56 +00:00
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
use async_trait::async_trait;
|
2024-03-02 04:19:26 +00:00
|
|
|
use jj_cli::cli_util::{CliRunner, CommandHelper};
|
|
|
|
use jj_cli::command_error::CommandError;
|
2023-06-28 14:12:40 +00:00
|
|
|
use jj_cli::ui::Ui;
|
|
|
|
use jj_lib::backend::{
|
2023-07-06 04:20:24 +00:00
|
|
|
Backend, BackendInitError, BackendLoadError, BackendResult, ChangeId, Commit, CommitId,
|
2023-11-12 01:40:23 +00:00
|
|
|
Conflict, ConflictId, FileId, SigningFn, SymlinkId, Tree, TreeId,
|
2022-09-24 01:49:56 +00:00
|
|
|
};
|
2023-06-28 14:12:40 +00:00
|
|
|
use jj_lib::git_backend::GitBackend;
|
2024-01-09 08:14:30 +00:00
|
|
|
use jj_lib::index::Index;
|
2023-06-28 14:12:40 +00:00
|
|
|
use jj_lib::repo::StoreFactories;
|
|
|
|
use jj_lib::repo_path::RepoPath;
|
2023-11-11 00:16:40 +00:00
|
|
|
use jj_lib::settings::UserSettings;
|
2023-11-25 23:36:21 +00:00
|
|
|
use jj_lib::signing::Signer;
|
|
|
|
use jj_lib::workspace::{Workspace, WorkspaceInitError};
|
2022-09-24 01:49:56 +00:00
|
|
|
|
|
|
|
#[derive(clap::Parser, Clone, Debug)]
|
2023-12-02 00:32:41 +00:00
|
|
|
enum CustomCommand {
|
2022-09-24 01:49:56 +00:00
|
|
|
/// Initialize a workspace using the Jit backend
|
|
|
|
InitJit,
|
|
|
|
}
|
|
|
|
|
2023-01-03 09:38:17 +00:00
|
|
|
fn create_store_factories() -> StoreFactories {
|
2024-04-11 18:58:38 +00:00
|
|
|
let mut store_factories = StoreFactories::empty();
|
2022-09-24 01:49:56 +00:00
|
|
|
// Register the backend so it can be loaded when the repo is loaded. The name
|
|
|
|
// must match `Backend::name()`.
|
2022-12-14 18:08:31 +00:00
|
|
|
store_factories.add_backend(
|
2022-09-24 01:49:56 +00:00
|
|
|
"jit",
|
2023-11-11 00:16:40 +00:00
|
|
|
Box::new(|settings, store_path| Ok(Box::new(JitBackend::load(settings, store_path)?))),
|
2022-09-24 01:49:56 +00:00
|
|
|
);
|
2023-01-03 09:38:17 +00:00
|
|
|
store_factories
|
|
|
|
}
|
|
|
|
|
2023-01-03 12:53:30 +00:00
|
|
|
fn run_custom_command(
|
2023-01-04 08:57:36 +00:00
|
|
|
_ui: &mut Ui,
|
2023-01-04 08:18:45 +00:00
|
|
|
command_helper: &CommandHelper,
|
2023-12-02 00:32:41 +00:00
|
|
|
command: CustomCommand,
|
2023-01-03 09:38:17 +00:00
|
|
|
) -> Result<(), CommandError> {
|
2023-01-03 12:53:30 +00:00
|
|
|
match command {
|
2023-12-02 00:32:41 +00:00
|
|
|
CustomCommand::InitJit => {
|
2023-01-04 08:18:45 +00:00
|
|
|
let wc_path = command_helper.cwd();
|
2022-09-24 01:49:56 +00:00
|
|
|
// Initialize a workspace with the custom backend
|
2023-09-07 08:07:09 +00:00
|
|
|
Workspace::init_with_backend(
|
|
|
|
command_helper.settings(),
|
|
|
|
wc_path,
|
2023-11-11 00:16:40 +00:00
|
|
|
&|settings, store_path| Ok(Box::new(JitBackend::init(settings, store_path)?)),
|
2023-11-25 23:36:21 +00:00
|
|
|
Signer::from_settings(command_helper.settings())
|
|
|
|
.map_err(WorkspaceInitError::SignInit)?,
|
2023-09-07 08:07:09 +00:00
|
|
|
)?;
|
2022-09-24 01:49:56 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-19 15:10:18 +00:00
|
|
|
fn main() -> std::process::ExitCode {
|
2023-01-03 08:33:53 +00:00
|
|
|
CliRunner::init()
|
2024-04-11 18:58:38 +00:00
|
|
|
.add_store_factories(create_store_factories())
|
2023-01-03 12:53:30 +00:00
|
|
|
.add_subcommand(run_custom_command)
|
2023-01-19 15:10:18 +00:00
|
|
|
.run()
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// A commit backend that's extremely similar to the Git backend
|
|
|
|
#[derive(Debug)]
|
|
|
|
struct JitBackend {
|
|
|
|
inner: GitBackend,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl JitBackend {
|
2023-11-11 00:16:40 +00:00
|
|
|
fn init(settings: &UserSettings, store_path: &Path) -> Result<Self, BackendInitError> {
|
|
|
|
let inner = GitBackend::init_internal(settings, store_path)?;
|
2023-07-05 13:37:29 +00:00
|
|
|
Ok(JitBackend { inner })
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
2023-11-11 00:16:40 +00:00
|
|
|
fn load(settings: &UserSettings, store_path: &Path) -> Result<Self, BackendLoadError> {
|
|
|
|
let inner = GitBackend::load(settings, store_path)?;
|
2023-07-05 13:42:08 +00:00
|
|
|
Ok(JitBackend { inner })
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
#[async_trait]
|
2022-09-24 01:49:56 +00:00
|
|
|
impl Backend for JitBackend {
|
2023-05-12 13:05:32 +00:00
|
|
|
fn as_any(&self) -> &dyn Any {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
|
2022-09-24 01:49:56 +00:00
|
|
|
fn name(&self) -> &str {
|
|
|
|
"jit"
|
|
|
|
}
|
|
|
|
|
2023-02-06 18:05:09 +00:00
|
|
|
fn commit_id_length(&self) -> usize {
|
|
|
|
self.inner.commit_id_length()
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
2023-02-06 18:15:01 +00:00
|
|
|
fn change_id_length(&self) -> usize {
|
|
|
|
self.inner.change_id_length()
|
|
|
|
}
|
|
|
|
|
2023-09-19 10:55:51 +00:00
|
|
|
fn root_commit_id(&self) -> &CommitId {
|
|
|
|
self.inner.root_commit_id()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn root_change_id(&self) -> &ChangeId {
|
|
|
|
self.inner.root_change_id()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn empty_tree_id(&self) -> &TreeId {
|
|
|
|
self.inner.empty_tree_id()
|
|
|
|
}
|
|
|
|
|
2023-10-19 18:27:55 +00:00
|
|
|
fn concurrency(&self) -> usize {
|
|
|
|
1
|
|
|
|
}
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_file(&self, path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
|
|
|
|
self.inner.read_file(path, id).await
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn write_file(&self, path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId> {
|
|
|
|
self.inner.write_file(path, contents)
|
|
|
|
}
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
|
|
|
|
self.inner.read_symlink(path, id).await
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
|
|
|
|
self.inner.write_symlink(path, target)
|
|
|
|
}
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
|
|
|
|
self.inner.read_tree(path, id).await
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
|
|
|
|
self.inner.write_tree(path, contents)
|
|
|
|
}
|
|
|
|
|
2023-10-27 05:54:09 +00:00
|
|
|
fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
|
|
|
|
self.inner.read_conflict(path, id)
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId> {
|
|
|
|
self.inner.write_conflict(path, contents)
|
|
|
|
}
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
|
|
|
|
self.inner.read_commit(id).await
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
|
|
|
|
2023-11-12 01:40:23 +00:00
|
|
|
fn write_commit(
|
|
|
|
&self,
|
|
|
|
contents: Commit,
|
2023-11-28 06:34:02 +00:00
|
|
|
sign_with: Option<&mut SigningFn>,
|
2023-11-12 01:40:23 +00:00
|
|
|
) -> BackendResult<(CommitId, Commit)> {
|
|
|
|
self.inner.write_commit(contents, sign_with)
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|
2023-12-01 22:00:22 +00:00
|
|
|
|
2024-01-09 08:14:30 +00:00
|
|
|
fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
|
|
|
|
self.inner.gc(index, keep_newer)
|
2023-12-01 22:00:22 +00:00
|
|
|
}
|
2022-09-24 01:49:56 +00:00
|
|
|
}
|