// Copyright 2020 The Jujutsu Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #![allow(missing_docs)] use std::collections::{BTreeMap, HashSet}; use std::error::Error; use std::ffi::OsString; use std::fs; use std::fs::{File, Metadata, OpenOptions}; use std::io::{Read, Write}; use std::ops::Bound; #[cfg(unix)] use std::os::unix::fs::symlink; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::mpsc::{channel, Sender}; use std::sync::Arc; use std::time::UNIX_EPOCH; use itertools::Itertools; use once_cell::unsync::OnceCell; use prost::Message; use rayon::iter::IntoParallelIterator; use rayon::prelude::ParallelIterator; use tempfile::NamedTempFile; use thiserror::Error; use tracing::{instrument, trace_span}; use crate::backend::{ BackendError, ConflictId, FileId, MillisSinceEpoch, ObjectId, SymlinkId, TreeId, TreeValue, }; use crate::conflicts; #[cfg(feature = "watchman")] use crate::fsmonitor::watchman; use crate::fsmonitor::FsmonitorKind; use crate::gitignore::GitIgnoreFile; use crate::lock::FileLock; use crate::matchers::{ DifferenceMatcher, EverythingMatcher, IntersectionMatcher, Matcher, PrefixMatcher, }; use crate::merge::Merge; use crate::merged_tree::MergedTree; use crate::op_store::{OperationId, WorkspaceId}; use crate::repo_path::{FsPathParseError, RepoPath, RepoPathComponent, RepoPathJoin}; use crate::store::Store; use crate::tree::{Diff, Tree}; #[cfg(unix)] type FileExecutableFlag = bool; #[cfg(windows)] type FileExecutableFlag = (); #[derive(Debug, PartialEq, Eq, Clone)] pub enum FileType { Normal { executable: FileExecutableFlag }, Symlink, GitSubmodule, } #[derive(Debug, PartialEq, Eq, Clone)] pub struct FileState { pub file_type: FileType, pub mtime: MillisSinceEpoch, pub size: u64, /* TODO: What else do we need here? Git stores a lot of fields. * TODO: Could possibly handle case-insensitive file systems keeping an * Option with the actual path here. */ } impl FileState { fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self { #[cfg(windows)] let executable = { // Windows doesn't support executable bit. let _ = executable; () }; FileState { file_type: FileType::Normal { executable }, mtime: mtime_from_metadata(metadata), size, } } fn for_symlink(metadata: &Metadata) -> Self { // When using fscrypt, the reported size is not the content size. So if // we were to record the content size here (like we do for regular files), we // would end up thinking the file has changed every time we snapshot. FileState { file_type: FileType::Symlink, mtime: mtime_from_metadata(metadata), size: metadata.len(), } } fn for_gitsubmodule() -> Self { FileState { file_type: FileType::GitSubmodule, mtime: MillisSinceEpoch(0), size: 0, } } #[cfg(unix)] fn mark_executable(&mut self, executable: bool) { if let FileType::Normal { .. } = &self.file_type { self.file_type = FileType::Normal { executable } } } } pub struct TreeState { store: Arc, working_copy_path: PathBuf, state_path: PathBuf, tree_id: TreeId, file_states: BTreeMap, // Currently only path prefixes sparse_patterns: Vec, own_mtime: MillisSinceEpoch, /// The most recent clock value returned by Watchman. Will only be set if /// the repo is configured to use the Watchman filesystem monitor and /// Watchman has been queried at least once. watchman_clock: Option, } fn file_state_from_proto(proto: crate::protos::working_copy::FileState) -> FileState { let file_type = match proto.file_type() { crate::protos::working_copy::FileType::Normal => FileType::Normal { executable: FileExecutableFlag::default(), }, #[cfg(unix)] crate::protos::working_copy::FileType::Executable => FileType::Normal { executable: true }, // can exist in files written by older versions of jj #[cfg(windows)] crate::protos::working_copy::FileType::Executable => FileType::Normal { executable: () }, crate::protos::working_copy::FileType::Symlink => FileType::Symlink, crate::protos::working_copy::FileType::Conflict => FileType::Normal { executable: FileExecutableFlag::default(), }, crate::protos::working_copy::FileType::GitSubmodule => FileType::GitSubmodule, }; FileState { file_type, mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch), size: proto.size, } } fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::FileState { let mut proto = crate::protos::working_copy::FileState::default(); let file_type = match &file_state.file_type { #[cfg(unix)] FileType::Normal { executable: false } => crate::protos::working_copy::FileType::Normal, #[cfg(unix)] FileType::Normal { executable: true } => crate::protos::working_copy::FileType::Executable, #[cfg(windows)] FileType::Normal { executable: () } => crate::protos::working_copy::FileType::Normal, FileType::Symlink => crate::protos::working_copy::FileType::Symlink, FileType::GitSubmodule => crate::protos::working_copy::FileType::GitSubmodule, }; proto.file_type = file_type as i32; proto.mtime_millis_since_epoch = file_state.mtime.0; proto.size = file_state.size; proto } fn file_states_from_proto( proto: &crate::protos::working_copy::TreeState, ) -> BTreeMap { let mut file_states = BTreeMap::new(); for (path_str, proto_file_state) in &proto.file_states { let path = RepoPath::from_internal_string(path_str.as_str()); file_states.insert(path, file_state_from_proto(proto_file_state.clone())); } file_states } fn sparse_patterns_from_proto(proto: &crate::protos::working_copy::TreeState) -> Vec { let mut sparse_patterns = vec![]; if let Some(proto_sparse_patterns) = proto.sparse_patterns.as_ref() { for prefix in &proto_sparse_patterns.prefixes { sparse_patterns.push(RepoPath::from_internal_string(prefix.as_str())); } } else { // For compatibility with old working copies. // TODO: Delete this is late 2022 or so. sparse_patterns.push(RepoPath::root()); } sparse_patterns } /// Creates intermediate directories from the `working_copy_path` to the /// `repo_path` parent. /// /// If an intermediate directory exists and if it is a symlink, this function /// will return an error. The `working_copy_path` directory may be a symlink. /// /// Note that this does not prevent TOCTOU bugs caused by concurrent checkouts. /// Another process may remove the directory created by this function and put a /// symlink there. fn create_parent_dirs(working_copy_path: &Path, repo_path: &RepoPath) -> Result<(), CheckoutError> { let (_, dir_components) = repo_path .components() .split_last() .expect("repo path shouldn't be root"); let mut dir_path = working_copy_path.to_owned(); for c in dir_components { dir_path.push(c.as_str()); match fs::create_dir(&dir_path) { Ok(()) => {} Err(_) if dir_path .symlink_metadata() .map(|m| m.is_dir()) .unwrap_or(false) => {} Err(err) => { return Err(CheckoutError::IoError { message: format!( "Failed to create parent directories for {}", repo_path.to_fs_path(working_copy_path).display(), ), err, }); } } } Ok(()) } fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch { let time = metadata .modified() .expect("File mtime not supported on this platform?"); let since_epoch = time .duration_since(UNIX_EPOCH) .expect("mtime before unix epoch"); MillisSinceEpoch( i64::try_from(since_epoch.as_millis()) .expect("mtime billions of years into the future or past"), ) } fn file_state(metadata: &Metadata) -> Option { let metadata_file_type = metadata.file_type(); let file_type = if metadata_file_type.is_dir() { None } else if metadata_file_type.is_symlink() { Some(FileType::Symlink) } else if metadata_file_type.is_file() { #[cfg(unix)] if metadata.permissions().mode() & 0o111 != 0 { Some(FileType::Normal { executable: true }) } else { Some(FileType::Normal { executable: false }) } #[cfg(windows)] Some(FileType::Normal { executable: () }) } else { None }; file_type.map(|file_type| { let mtime = mtime_from_metadata(metadata); let size = metadata.len(); FileState { file_type, mtime, size, } }) } #[derive(Debug, PartialEq, Eq, Clone)] pub struct CheckoutStats { pub updated_files: u32, pub added_files: u32, pub removed_files: u32, } #[derive(Debug, Error)] pub enum SnapshotError { #[error("Failed to query the filesystem monitor: {0}")] FsmonitorError(String), #[error("{message}: {err}")] IoError { message: String, #[source] err: std::io::Error, }, #[error("Working copy path {} is not valid UTF-8", path.to_string_lossy())] InvalidUtf8Path { path: OsString }, #[error("Symlink {path} target is not valid UTF-8")] InvalidUtf8SymlinkTarget { path: PathBuf, target: PathBuf }, #[error("Internal backend error: {0}")] InternalBackendError(#[from] BackendError), #[error(transparent)] TreeStateError(#[from] TreeStateError), } #[derive(Debug, Error)] pub enum CheckoutError { // The current working-copy commit was deleted, maybe by an overly aggressive GC that happened // while the current process was running. #[error("Current working-copy commit not found: {source}")] SourceNotFound { source: Box, }, // Another process checked out a commit while the current process was running (after the // working copy was read by the current process). #[error("Concurrent checkout")] ConcurrentCheckout, #[error("{message}: {err:?}")] IoError { message: String, #[source] err: std::io::Error, }, #[error("Internal error: {0}")] InternalBackendError(#[from] BackendError), #[error(transparent)] TreeStateError(#[from] TreeStateError), } impl CheckoutError { fn for_stat_error(err: std::io::Error, path: &Path) -> Self { CheckoutError::IoError { message: format!("Failed to stat file {}", path.display()), err, } } } fn suppress_file_exists_error(orig_err: CheckoutError) -> Result<(), CheckoutError> { match orig_err { CheckoutError::IoError { err, .. } if err.kind() == std::io::ErrorKind::AlreadyExists => { Ok(()) } _ => Err(orig_err), } } pub struct SnapshotOptions<'a> { pub base_ignores: Arc, pub fsmonitor_kind: Option, pub progress: Option<&'a SnapshotProgress<'a>>, } impl SnapshotOptions<'_> { pub fn empty_for_test() -> Self { SnapshotOptions { base_ignores: GitIgnoreFile::empty(), fsmonitor_kind: None, progress: None, } } } struct FsmonitorMatcher { matcher: Option>, watchman_clock: Option, } #[derive(Debug, Error)] pub enum ResetError { // The current working-copy commit was deleted, maybe by an overly aggressive GC that happened // while the current process was running. #[error("Current working-copy commit not found: {source}")] SourceNotFound { source: Box, }, #[error("Internal error: {0}")] InternalBackendError(#[from] BackendError), #[error(transparent)] TreeStateError(#[from] TreeStateError), } struct DirectoryToVisit { dir: RepoPath, disk_dir: PathBuf, git_ignore: Arc, } #[derive(Debug, Error)] pub enum TreeStateError { #[error("Reading tree state from {path}: {source}")] ReadTreeState { path: PathBuf, source: std::io::Error, }, #[error("Decoding tree state from {path}: {source}")] DecodeTreeState { path: PathBuf, source: prost::DecodeError, }, #[error("Writing tree state to temporary file {path}: {source}")] WriteTreeState { path: PathBuf, source: std::io::Error, }, #[error("Persisting tree state to file {path}: {source}")] PersistTreeState { path: PathBuf, source: tempfile::PersistError, }, #[error("Filesystem monitor error: {0}")] Fsmonitor(Box), } impl TreeState { pub fn working_copy_path(&self) -> &Path { &self.working_copy_path } pub fn current_tree_id(&self) -> &TreeId { &self.tree_id } pub fn file_states(&self) -> &BTreeMap { &self.file_states } pub fn sparse_patterns(&self) -> &Vec { &self.sparse_patterns } fn sparse_matcher(&self) -> Box { Box::new(PrefixMatcher::new(&self.sparse_patterns)) } pub fn init( store: Arc, working_copy_path: PathBuf, state_path: PathBuf, ) -> Result { let mut wc = TreeState::empty(store, working_copy_path, state_path); wc.save()?; Ok(wc) } fn empty(store: Arc, working_copy_path: PathBuf, state_path: PathBuf) -> TreeState { let tree_id = store.empty_tree_id().clone(); // Canonicalize the working copy path because "repo/." makes libgit2 think that // everything should be ignored TreeState { store, working_copy_path: working_copy_path.canonicalize().unwrap(), state_path, tree_id, file_states: BTreeMap::new(), sparse_patterns: vec![RepoPath::root()], own_mtime: MillisSinceEpoch(0), watchman_clock: None, } } pub fn load( store: Arc, working_copy_path: PathBuf, state_path: PathBuf, ) -> Result { let tree_state_path = state_path.join("tree_state"); let file = match File::open(&tree_state_path) { Err(ref err) if err.kind() == std::io::ErrorKind::NotFound => { return TreeState::init(store, working_copy_path, state_path); } Err(err) => { return Err(TreeStateError::ReadTreeState { path: tree_state_path, source: err, }) } Ok(file) => file, }; let mut wc = TreeState::empty(store, working_copy_path, state_path); wc.read(&tree_state_path, file)?; Ok(wc) } fn update_own_mtime(&mut self) { if let Ok(metadata) = self.state_path.join("tree_state").symlink_metadata() { self.own_mtime = mtime_from_metadata(&metadata); } else { self.own_mtime = MillisSinceEpoch(0); } } fn read(&mut self, tree_state_path: &Path, mut file: File) -> Result<(), TreeStateError> { self.update_own_mtime(); let mut buf = Vec::new(); file.read_to_end(&mut buf) .map_err(|err| TreeStateError::ReadTreeState { path: tree_state_path.to_owned(), source: err, })?; let proto = crate::protos::working_copy::TreeState::decode(&*buf).map_err(|err| { TreeStateError::DecodeTreeState { path: tree_state_path.to_owned(), source: err, } })?; self.tree_id = TreeId::new(proto.tree_id.clone()); self.file_states = file_states_from_proto(&proto); self.sparse_patterns = sparse_patterns_from_proto(&proto); self.watchman_clock = proto.watchman_clock; Ok(()) } fn save(&mut self) -> Result<(), TreeStateError> { let mut proto = crate::protos::working_copy::TreeState { tree_id: self.tree_id.to_bytes(), ..Default::default() }; for (file, file_state) in &self.file_states { proto.file_states.insert( file.to_internal_file_string(), file_state_to_proto(file_state), ); } let mut sparse_patterns = crate::protos::working_copy::SparsePatterns::default(); for path in &self.sparse_patterns { sparse_patterns .prefixes .push(path.to_internal_file_string()); } proto.sparse_patterns = Some(sparse_patterns); proto.watchman_clock = self.watchman_clock.clone(); let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap(); temp_file .as_file_mut() .write_all(&proto.encode_to_vec()) .map_err(|err| TreeStateError::WriteTreeState { path: self.state_path.clone(), source: err, })?; // update own write time while we before we rename it, so we know // there is no unknown data in it self.update_own_mtime(); // TODO: Retry if persisting fails (it will on Windows if the file happened to // be open for read). let target_path = self.state_path.join("tree_state"); temp_file .persist(&target_path) .map_err(|err| TreeStateError::PersistTreeState { path: target_path.clone(), source: err, })?; Ok(()) } fn current_tree(&self) -> Result { self.store.get_tree(&RepoPath::root(), &self.tree_id) } fn write_file_to_store( &self, path: &RepoPath, disk_path: &Path, ) -> Result { let mut file = File::open(disk_path).map_err(|err| SnapshotError::IoError { message: format!("Failed to open file {}", disk_path.display()), err, })?; Ok(self.store.write_file(path, &mut file)?) } fn write_symlink_to_store( &self, path: &RepoPath, disk_path: &Path, ) -> Result { let target = disk_path .read_link() .map_err(|err| SnapshotError::IoError { message: format!("Failed to read symlink {}", disk_path.display()), err, })?; let str_target = target .to_str() .ok_or_else(|| SnapshotError::InvalidUtf8SymlinkTarget { path: disk_path.to_path_buf(), target: target.clone(), })?; Ok(self.store.write_symlink(path, str_target)?) } fn reset_watchman(&mut self) { self.watchman_clock.take(); } #[cfg(feature = "watchman")] #[tokio::main] #[instrument(skip(self))] pub async fn query_watchman( &self, ) -> Result<(watchman::Clock, Option>), TreeStateError> { let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path) .await .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?; let previous_clock = self.watchman_clock.clone().map(watchman::Clock::from); let changed_files = fsmonitor .query_changed_files(previous_clock) .await .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?; Ok(changed_files) } /// Look for changes to the working copy. If there are any changes, create /// a new tree from it and return it, and also update the dirstate on disk. #[instrument(skip_all)] pub fn snapshot(&mut self, options: SnapshotOptions) -> Result { let SnapshotOptions { base_ignores, fsmonitor_kind, progress, } = options; let sparse_matcher = self.sparse_matcher(); let fsmonitor_clock_needs_save = fsmonitor_kind.is_some(); let FsmonitorMatcher { matcher: fsmonitor_matcher, watchman_clock, } = self.make_fsmonitor_matcher(fsmonitor_kind)?; let fsmonitor_matcher = match fsmonitor_matcher.as_ref() { None => &EverythingMatcher, Some(fsmonitor_matcher) => fsmonitor_matcher.as_ref(), }; let (tree_entries_tx, tree_entries_rx) = channel(); let (file_states_tx, file_states_rx) = channel(); let (present_files_tx, present_files_rx) = channel(); trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> { let matcher = IntersectionMatcher::new(sparse_matcher.as_ref(), fsmonitor_matcher); let current_tree = self.current_tree()?; let current_tree = MergedTree::legacy(current_tree); let directory_to_visit = DirectoryToVisit { dir: RepoPath::root(), disk_dir: self.working_copy_path.clone(), git_ignore: base_ignores, }; self.visit_directory( &matcher, ¤t_tree, tree_entries_tx, file_states_tx, present_files_tx, directory_to_visit, progress, ) })?; let mut tree_builder = self.store.tree_builder(self.tree_id.clone()); let mut deleted_files: HashSet<_> = trace_span!("collecting existing files").in_scope(|| { self.file_states .iter() .filter(|&(path, state)| { fsmonitor_matcher.matches(path) && state.file_type != FileType::GitSubmodule }) .map(|(path, _state)| path.clone()) .collect() }); trace_span!("process tree entries").in_scope(|| -> Result<(), SnapshotError> { while let Ok((path, tree_values)) = tree_entries_rx.recv() { match tree_values.into_resolved() { Ok(tree_value) => { tree_builder.set(path, tree_value.unwrap()); } Err(conflict) => { let conflict_id = self.store.write_conflict(&path, &conflict)?; tree_builder.set(path, TreeValue::Conflict(conflict_id)); } } } Ok(()) })?; trace_span!("process file states").in_scope(|| { while let Ok((path, file_state)) = file_states_rx.recv() { self.file_states.insert(path, file_state); } }); trace_span!("process present files").in_scope(|| { while let Ok(path) = present_files_rx.recv() { deleted_files.remove(&path); } }); trace_span!("process deleted files").in_scope(|| { for file in &deleted_files { self.file_states.remove(file); tree_builder.remove(file.clone()); } }); let has_changes = tree_builder.has_overrides(); trace_span!("write tree").in_scope(|| { self.tree_id = tree_builder.write_tree(); }); if cfg!(debug_assertions) { let tree = self.current_tree().unwrap(); let tree_paths: HashSet<_> = tree .entries_matching(sparse_matcher.as_ref()) .map(|(path, _)| path) .collect(); let state_paths: HashSet<_> = self.file_states.keys().cloned().collect(); assert_eq!(state_paths, tree_paths); } self.watchman_clock = watchman_clock; Ok(has_changes || fsmonitor_clock_needs_save) } #[allow(clippy::too_many_arguments)] fn visit_directory( &self, matcher: &dyn Matcher, current_tree: &MergedTree, tree_entries_tx: Sender<(RepoPath, Merge>)>, file_states_tx: Sender<(RepoPath, FileState)>, present_files_tx: Sender, directory_to_visit: DirectoryToVisit, progress: Option<&SnapshotProgress>, ) -> Result<(), SnapshotError> { let DirectoryToVisit { dir, disk_dir, git_ignore, } = directory_to_visit; if matcher.visit(&dir).is_nothing() { return Ok(()); } let git_ignore = git_ignore.chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore")); let dir_entries = disk_dir .read_dir() .unwrap() .map(|maybe_entry| maybe_entry.unwrap()) .collect_vec(); dir_entries.into_par_iter().try_for_each_with( ( tree_entries_tx.clone(), file_states_tx.clone(), present_files_tx.clone(), ), |(tree_entries_tx, file_states_tx, present_files_tx), entry| -> Result<(), SnapshotError> { let file_type = entry.file_type().unwrap(); let file_name = entry.file_name(); let name = file_name .to_str() .ok_or_else(|| SnapshotError::InvalidUtf8Path { path: file_name.clone(), })?; if name == ".jj" || name == ".git" { return Ok(()); } let path = dir.join(&RepoPathComponent::from(name)); if let Some(file_state) = self.file_states.get(&path) { if file_state.file_type == FileType::GitSubmodule { return Ok(()); } } if file_type.is_dir() { if git_ignore.matches_all_files_in(&path.to_internal_dir_string()) { // If the whole directory is ignored, visit only paths we're already // tracking. let tracked_paths = self .file_states .range((Bound::Excluded(&path), Bound::Unbounded)) .take_while(|(sub_path, _)| path.contains(sub_path)) .map(|(sub_path, file_state)| (sub_path.clone(), file_state.clone())) .collect_vec(); for (tracked_path, current_file_state) in tracked_paths { if !matcher.matches(&tracked_path) { continue; } let disk_path = tracked_path.to_fs_path(&self.working_copy_path); let metadata = match disk_path.metadata() { Ok(metadata) => metadata, Err(err) if err.kind() == std::io::ErrorKind::NotFound => { continue; } Err(err) => { return Err(SnapshotError::IoError { message: format!( "Failed to stat file {}", disk_path.display() ), err, }); } }; if let Some(new_file_state) = file_state(&metadata) { present_files_tx.send(tracked_path.clone()).ok(); let update = self.get_updated_tree_value( &tracked_path, disk_path, Some(¤t_file_state), current_tree, &new_file_state, )?; if let Some(tree_value) = update { tree_entries_tx .send((tracked_path.clone(), tree_value)) .ok(); } file_states_tx.send((tracked_path, new_file_state)).ok(); } } } else { let directory_to_visit = DirectoryToVisit { dir: path, disk_dir: entry.path(), git_ignore: git_ignore.clone(), }; self.visit_directory( matcher, current_tree, tree_entries_tx.clone(), file_states_tx.clone(), present_files_tx.clone(), directory_to_visit, progress, )?; } } else if matcher.matches(&path) { if let Some(progress) = progress { progress(&path); } let maybe_current_file_state = self.file_states.get(&path); if maybe_current_file_state.is_none() && git_ignore.matches_file(&path.to_internal_file_string()) { // If it wasn't already tracked and it matches // the ignored paths, then // ignore it. } else { let metadata = entry.metadata().map_err(|err| SnapshotError::IoError { message: format!("Failed to stat file {}", entry.path().display()), err, })?; if let Some(new_file_state) = file_state(&metadata) { present_files_tx.send(path.clone()).ok(); let update = self.get_updated_tree_value( &path, entry.path(), maybe_current_file_state, current_tree, &new_file_state, )?; if let Some(tree_value) = update { tree_entries_tx.send((path.clone(), tree_value)).ok(); } file_states_tx.send((path, new_file_state)).ok(); } } } Ok(()) }, )?; Ok(()) } #[instrument(skip_all)] fn make_fsmonitor_matcher( &mut self, fsmonitor_kind: Option, ) -> Result { let (watchman_clock, changed_files) = match fsmonitor_kind { None => (None, None), Some(FsmonitorKind::Test { changed_files }) => (None, Some(changed_files)), #[cfg(feature = "watchman")] Some(FsmonitorKind::Watchman) => match self.query_watchman() { Ok((watchman_clock, changed_files)) => (Some(watchman_clock.into()), changed_files), Err(err) => { tracing::warn!(?err, "Failed to query filesystem monitor"); (None, None) } }, #[cfg(not(feature = "watchman"))] Some(FsmonitorKind::Watchman) => { return Err(SnapshotError::FsmonitorError( "Cannot query Watchman because jj was not compiled with the `watchman` \ feature (consider disabling `core.fsmonitor`)" .to_string(), )); } }; let matcher: Option> = match changed_files { None => None, Some(changed_files) => { let repo_paths = trace_span!("processing fsmonitor paths").in_scope(|| { changed_files .into_iter() .filter_map(|path| { match RepoPath::parse_fs_path( &self.working_copy_path, &self.working_copy_path, path, ) { Ok(repo_path) => Some(repo_path), Err(FsPathParseError::InputNotInRepo(_)) => None, } }) .collect_vec() }); Some(Box::new(PrefixMatcher::new(&repo_paths))) } }; Ok(FsmonitorMatcher { matcher, watchman_clock, }) } fn get_updated_tree_value( &self, repo_path: &RepoPath, disk_path: PathBuf, maybe_current_file_state: Option<&FileState>, current_tree: &MergedTree, new_file_state: &FileState, ) -> Result>>, SnapshotError> { let clean = match maybe_current_file_state { None => { // untracked false } Some(current_file_state) => { // If the file's mtime was set at the same time as this state file's own mtime, // then we don't know if the file was modified before or after this state file. current_file_state == new_file_state && current_file_state.mtime < self.own_mtime } }; if clean { Ok(None) } else { let new_file_type = new_file_state.file_type.clone(); let current_tree_values = current_tree.path_value(repo_path); let new_tree_values = self.write_path_to_store( repo_path, &disk_path, ¤t_tree_values, new_file_type, )?; if new_tree_values != current_tree_values { Ok(Some(new_tree_values)) } else { Ok(None) } } } fn write_path_to_store( &self, repo_path: &RepoPath, disk_path: &Path, current_tree_values: &Merge>, file_type: FileType, ) -> Result>, SnapshotError> { let executable = match file_type { FileType::Normal { executable } => executable, FileType::Symlink => { let id = self.write_symlink_to_store(repo_path, disk_path)?; return Ok(Merge::normal(TreeValue::Symlink(id))); } FileType::GitSubmodule => panic!("git submodule cannot be written to store"), }; // If the file contained a conflict before and is now a normal file on disk, we // try to parse any conflict markers in the file into a conflict. if let Some(current_tree_value) = current_tree_values.as_resolved() { #[cfg(unix)] let _ = current_tree_value; // use the variable let id = self.write_file_to_store(repo_path, disk_path)?; // On Windows, we preserve the executable bit from the current tree. #[cfg(windows)] let executable = { let () = executable; // use the variable if let Some(TreeValue::File { id: _, executable }) = current_tree_value { *executable } else { false } }; Ok(Merge::normal(TreeValue::File { id, executable })) } else if let Some(old_file_ids) = current_tree_values.to_file_merge() { let content = fs::read(disk_path).map_err(|err| SnapshotError::IoError { message: format!("Failed to open file {}", disk_path.display()), err, })?; let new_file_ids = conflicts::update_from_content( &old_file_ids, self.store.as_ref(), repo_path, &content, )?; match new_file_ids.into_resolved() { Ok(file_id) => { #[cfg(windows)] let executable = { let () = executable; // use the variable false }; Ok(Merge::normal(TreeValue::File { id: file_id.unwrap(), executable, })) } Err(new_file_ids) => { if new_file_ids != old_file_ids { Ok(current_tree_values.with_new_file_ids(&new_file_ids)) } else { Ok(current_tree_values.clone()) } } } } else { Ok(current_tree_values.clone()) } } fn write_file( &self, disk_path: &Path, path: &RepoPath, id: &FileId, executable: bool, ) -> Result { create_parent_dirs(&self.working_copy_path, path)?; let mut file = OpenOptions::new() .write(true) .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink. .open(disk_path) .map_err(|err| CheckoutError::IoError { message: format!("Failed to open file {} for writing", disk_path.display()), err, })?; let mut contents = self.store.read_file(path, id)?; let size = std::io::copy(&mut contents, &mut file).map_err(|err| CheckoutError::IoError { message: format!("Failed to write file {}", disk_path.display()), err, })?; self.set_executable(disk_path, executable)?; // Read the file state from the file descriptor. That way, know that the file // exists and is of the expected type, and the stat information is most likely // accurate, except for other processes modifying the file concurrently (The // mtime is set at write time and won't change when we close the file.) let metadata = file .metadata() .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?; Ok(FileState::for_file(executable, size, &metadata)) } #[cfg_attr(windows, allow(unused_variables))] fn write_symlink( &self, disk_path: &Path, path: &RepoPath, id: &SymlinkId, ) -> Result { create_parent_dirs(&self.working_copy_path, path)?; let target = self.store.read_symlink(path, id)?; #[cfg(windows)] { println!("ignoring symlink at {:?}", path); } #[cfg(unix)] { let target = PathBuf::from(&target); symlink(&target, disk_path).map_err(|err| CheckoutError::IoError { message: format!( "Failed to create symlink from {} to {}", disk_path.display(), target.display() ), err, })?; } let metadata = disk_path .symlink_metadata() .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?; Ok(FileState::for_symlink(&metadata)) } fn write_conflict( &self, disk_path: &Path, path: &RepoPath, id: &ConflictId, ) -> Result { create_parent_dirs(&self.working_copy_path, path)?; let conflict = self.store.read_conflict(path, id)?; let mut file = OpenOptions::new() .write(true) .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink. .open(disk_path) .map_err(|err| CheckoutError::IoError { message: format!("Failed to open file {} for writing", disk_path.display()), err, })?; let mut conflict_data = vec![]; conflicts::materialize(&conflict, self.store.as_ref(), path, &mut conflict_data) .expect("Failed to materialize conflict to in-memory buffer"); file.write_all(&conflict_data) .map_err(|err| CheckoutError::IoError { message: format!("Failed to write conflict to file {}", disk_path.display()), err, })?; let size = conflict_data.len() as u64; // TODO: Set the executable bit correctly (when possible) and preserve that on // Windows like we do with the executable bit for regular files. let metadata = file .metadata() .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?; Ok(FileState::for_file(false, size, &metadata)) } #[cfg_attr(windows, allow(unused_variables))] fn set_executable(&self, disk_path: &Path, executable: bool) -> Result<(), CheckoutError> { #[cfg(unix)] { let mode = if executable { 0o755 } else { 0o644 }; fs::set_permissions(disk_path, fs::Permissions::from_mode(mode)) .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?; } Ok(()) } pub fn check_out(&mut self, new_tree: &Tree) -> Result { let old_tree = self.current_tree().map_err(|err| match err { err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound { source: Box::new(err), }, other => CheckoutError::InternalBackendError(other), })?; let stats = self.update(&old_tree, new_tree, self.sparse_matcher().as_ref(), Err)?; self.tree_id = new_tree.id().clone(); Ok(stats) } pub fn set_sparse_patterns( &mut self, sparse_patterns: Vec, ) -> Result { let tree = self.current_tree().map_err(|err| match err { err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound { source: Box::new(err), }, other => CheckoutError::InternalBackendError(other), })?; let old_matcher = PrefixMatcher::new(&self.sparse_patterns); let new_matcher = PrefixMatcher::new(&sparse_patterns); let added_matcher = DifferenceMatcher::new(&new_matcher, &old_matcher); let removed_matcher = DifferenceMatcher::new(&old_matcher, &new_matcher); let empty_tree = Tree::null(self.store.clone(), RepoPath::root()); let added_stats = self.update( &empty_tree, &tree, &added_matcher, suppress_file_exists_error, // Keep un-ignored file and mark it as modified )?; let removed_stats = self.update(&tree, &empty_tree, &removed_matcher, Err)?; self.sparse_patterns = sparse_patterns; assert_eq!(added_stats.updated_files, 0); assert_eq!(added_stats.removed_files, 0); assert_eq!(removed_stats.updated_files, 0); assert_eq!(removed_stats.added_files, 0); Ok(CheckoutStats { updated_files: 0, added_files: added_stats.added_files, removed_files: removed_stats.removed_files, }) } fn update( &mut self, old_tree: &Tree, new_tree: &Tree, matcher: &dyn Matcher, mut handle_error: impl FnMut(CheckoutError) -> Result<(), CheckoutError>, ) -> Result { let mut stats = CheckoutStats { updated_files: 0, added_files: 0, removed_files: 0, }; let mut apply_diff = |path: RepoPath, diff: Diff| -> Result<(), CheckoutError> { let disk_path = path.to_fs_path(&self.working_copy_path); // TODO: Check that the file has not changed before overwriting/removing it. match diff.into_options() { (Some(_before), None) => { fs::remove_file(&disk_path).ok(); let mut parent_dir = disk_path.parent().unwrap(); loop { if fs::remove_dir(parent_dir).is_err() { break; } parent_dir = parent_dir.parent().unwrap(); } self.file_states.remove(&path); stats.removed_files += 1; } (None, Some(after)) => { let file_state = match after { TreeValue::File { id, executable } => { self.write_file(&disk_path, &path, &id, executable)? } TreeValue::Symlink(id) => self.write_symlink(&disk_path, &path, &id)?, TreeValue::Conflict(id) => self.write_conflict(&disk_path, &path, &id)?, TreeValue::GitSubmodule(_id) => { println!("ignoring git submodule at {path:?}"); FileState::for_gitsubmodule() } TreeValue::Tree(_id) => { panic!("unexpected tree entry in diff at {path:?}"); } }; self.file_states.insert(path, file_state); stats.added_files += 1; } ( Some(TreeValue::File { id: old_id, executable: old_executable, }), Some(TreeValue::File { id, executable }), ) if id == old_id => { // Optimization for when only the executable bit changed assert_ne!(executable, old_executable); #[cfg(unix)] { self.set_executable(&disk_path, executable)?; let file_state = self.file_states.get_mut(&path).unwrap(); file_state.mark_executable(executable); } stats.updated_files += 1; } (Some(_before), Some(after)) => { fs::remove_file(&disk_path).ok(); let file_state = match after { TreeValue::File { id, executable } => { self.write_file(&disk_path, &path, &id, executable)? } TreeValue::Symlink(id) => self.write_symlink(&disk_path, &path, &id)?, TreeValue::Conflict(id) => self.write_conflict(&disk_path, &path, &id)?, TreeValue::GitSubmodule(_id) => { println!("ignoring git submodule at {path:?}"); FileState::for_gitsubmodule() } TreeValue::Tree(_id) => { panic!("unexpected tree entry in diff at {path:?}"); } }; self.file_states.insert(path, file_state); stats.updated_files += 1; } (None, None) => { unreachable!() } } Ok(()) }; for (path, diff) in old_tree.diff(new_tree, matcher) { apply_diff(path, diff).or_else(&mut handle_error)?; } Ok(stats) } pub fn reset(&mut self, new_tree: &Tree) -> Result<(), ResetError> { let old_tree = self.current_tree().map_err(|err| match err { err @ BackendError::ObjectNotFound { .. } => ResetError::SourceNotFound { source: Box::new(err), }, other => ResetError::InternalBackendError(other), })?; for (path, diff) in old_tree.diff(new_tree, self.sparse_matcher().as_ref()) { match diff { Diff::Removed(_before) => { self.file_states.remove(&path); } Diff::Added(after) | Diff::Modified(_, after) => { let file_type = match after { #[cfg(unix)] TreeValue::File { id: _, executable } => FileType::Normal { executable }, #[cfg(windows)] TreeValue::File { .. } => FileType::Normal { executable: () }, TreeValue::Symlink(_id) => FileType::Symlink, TreeValue::Conflict(_id) => { // TODO: Try to set the executable bit based on the conflict FileType::Normal { executable: FileExecutableFlag::default(), } } TreeValue::GitSubmodule(_id) => { println!("ignoring git submodule at {path:?}"); FileType::GitSubmodule } TreeValue::Tree(_id) => { panic!("unexpected tree entry in diff at {path:?}"); } }; let file_state = FileState { file_type, mtime: MillisSinceEpoch(0), size: 0, }; self.file_states.insert(path.clone(), file_state); } } } self.tree_id = new_tree.id().clone(); Ok(()) } } /// Working copy state stored in "checkout" file. #[derive(Clone, Debug)] struct CheckoutState { operation_id: OperationId, workspace_id: WorkspaceId, } pub struct WorkingCopy { store: Arc, working_copy_path: PathBuf, state_path: PathBuf, checkout_state: OnceCell, tree_state: OnceCell, } impl WorkingCopy { /// Initializes a new working copy at `working_copy_path`. The working /// copy's state will be stored in the `state_path` directory. The working /// copy will have the empty tree checked out. pub fn init( store: Arc, working_copy_path: PathBuf, state_path: PathBuf, operation_id: OperationId, workspace_id: WorkspaceId, ) -> Result { let proto = crate::protos::working_copy::Checkout { operation_id: operation_id.to_bytes(), workspace_id: workspace_id.as_str().to_string(), ..Default::default() }; let mut file = OpenOptions::new() .create_new(true) .write(true) .open(state_path.join("checkout")) .unwrap(); file.write_all(&proto.encode_to_vec()).unwrap(); let tree_state = TreeState::init(store.clone(), working_copy_path.clone(), state_path.clone())?; Ok(WorkingCopy { store, working_copy_path, state_path, checkout_state: OnceCell::new(), tree_state: OnceCell::with_value(tree_state), }) } pub fn load(store: Arc, working_copy_path: PathBuf, state_path: PathBuf) -> WorkingCopy { WorkingCopy { store, working_copy_path, state_path, checkout_state: OnceCell::new(), tree_state: OnceCell::new(), } } pub fn working_copy_path(&self) -> &Path { &self.working_copy_path } pub fn state_path(&self) -> &Path { &self.state_path } fn write_proto(&self, proto: crate::protos::working_copy::Checkout) { let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap(); temp_file .as_file_mut() .write_all(&proto.encode_to_vec()) .unwrap(); // TODO: Retry if persisting fails (it will on Windows if the file happened to // be open for read). temp_file.persist(self.state_path.join("checkout")).unwrap(); } fn checkout_state(&self) -> &CheckoutState { self.checkout_state.get_or_init(|| { let buf = fs::read(self.state_path.join("checkout")).unwrap(); let proto = crate::protos::working_copy::Checkout::decode(&*buf).unwrap(); CheckoutState { operation_id: OperationId::new(proto.operation_id), workspace_id: if proto.workspace_id.is_empty() { // For compatibility with old working copies. // TODO: Delete in mid 2022 or so WorkspaceId::default() } else { WorkspaceId::new(proto.workspace_id) }, } }) } fn checkout_state_mut(&mut self) -> &mut CheckoutState { self.checkout_state(); // ensure loaded self.checkout_state.get_mut().unwrap() } pub fn operation_id(&self) -> &OperationId { &self.checkout_state().operation_id } pub fn workspace_id(&self) -> &WorkspaceId { &self.checkout_state().workspace_id } #[instrument(skip_all)] fn tree_state(&self) -> Result<&TreeState, TreeStateError> { self.tree_state.get_or_try_init(|| { TreeState::load( self.store.clone(), self.working_copy_path.clone(), self.state_path.clone(), ) }) } fn tree_state_mut(&mut self) -> Result<&mut TreeState, TreeStateError> { self.tree_state()?; // ensure loaded Ok(self.tree_state.get_mut().unwrap()) } pub fn current_tree_id(&self) -> Result<&TreeId, TreeStateError> { Ok(self.tree_state()?.current_tree_id()) } pub fn file_states(&self) -> Result<&BTreeMap, TreeStateError> { Ok(self.tree_state()?.file_states()) } pub fn sparse_patterns(&self) -> Result<&[RepoPath], TreeStateError> { Ok(self.tree_state()?.sparse_patterns()) } #[instrument(skip_all)] fn save(&mut self) { self.write_proto(crate::protos::working_copy::Checkout { operation_id: self.operation_id().to_bytes(), workspace_id: self.workspace_id().as_str().to_string(), ..Default::default() }); } pub fn start_mutation(&mut self) -> Result { let lock_path = self.state_path.join("working_copy.lock"); let lock = FileLock::lock(lock_path); // Re-read from disk after taking the lock self.checkout_state.take(); // TODO: It's expensive to reload the whole tree. We should first check if it // has changed. self.tree_state.take(); let old_operation_id = self.operation_id().clone(); let old_tree_id = self.current_tree_id()?.clone(); Ok(LockedWorkingCopy { wc: self, lock, old_operation_id, old_tree_id, tree_state_dirty: false, closed: false, }) } pub fn check_out( &mut self, operation_id: OperationId, old_tree_id: Option<&TreeId>, new_tree: &Tree, ) -> Result { let mut locked_wc = self.start_mutation()?; // Check if the current working-copy commit has changed on disk compared to what // the caller expected. It's safe to check out another commit // regardless, but it's probably not what the caller wanted, so we let // them know. if let Some(old_tree_id) = old_tree_id { if *old_tree_id != locked_wc.old_tree_id { locked_wc.discard(); return Err(CheckoutError::ConcurrentCheckout); } } let stats = locked_wc.check_out(new_tree)?; locked_wc.finish(operation_id)?; Ok(stats) } #[cfg(feature = "watchman")] pub fn query_watchman( &self, ) -> Result<(watchman::Clock, Option>), TreeStateError> { self.tree_state()?.query_watchman() } } /// A working copy that's locked on disk. The lock is held until you call /// `finish()` or `discard()`. pub struct LockedWorkingCopy<'a> { wc: &'a mut WorkingCopy, #[allow(dead_code)] lock: FileLock, old_operation_id: OperationId, old_tree_id: TreeId, tree_state_dirty: bool, closed: bool, } impl LockedWorkingCopy<'_> { /// The operation at the time the lock was taken pub fn old_operation_id(&self) -> &OperationId { &self.old_operation_id } /// The tree at the time the lock was taken pub fn old_tree_id(&self) -> &TreeId { &self.old_tree_id } pub fn reset_watchman(&mut self) -> Result<(), SnapshotError> { self.wc.tree_state_mut()?.reset_watchman(); self.tree_state_dirty = true; Ok(()) } // The base_ignores are passed in here rather than being set on the TreeState // because the TreeState may be long-lived if the library is used in a // long-lived process. pub fn snapshot(&mut self, options: SnapshotOptions) -> Result { let tree_state = self.wc.tree_state_mut()?; self.tree_state_dirty |= tree_state.snapshot(options)?; Ok(tree_state.current_tree_id().clone()) } pub fn check_out(&mut self, new_tree: &Tree) -> Result { // TODO: Write a "pending_checkout" file with the new TreeId so we can // continue an interrupted update if we find such a file. let stats = self.wc.tree_state_mut()?.check_out(new_tree)?; self.tree_state_dirty = true; Ok(stats) } pub fn reset(&mut self, new_tree: &Tree) -> Result<(), ResetError> { self.wc.tree_state_mut()?.reset(new_tree)?; self.tree_state_dirty = true; Ok(()) } pub fn sparse_patterns(&self) -> Result<&[RepoPath], TreeStateError> { self.wc.sparse_patterns() } pub fn set_sparse_patterns( &mut self, new_sparse_patterns: Vec, ) -> Result { // TODO: Write a "pending_checkout" file with new sparse patterns so we can // continue an interrupted update if we find such a file. let stats = self .wc .tree_state_mut()? .set_sparse_patterns(new_sparse_patterns)?; self.tree_state_dirty = true; Ok(stats) } #[instrument(skip_all)] pub fn finish(mut self, operation_id: OperationId) -> Result<(), TreeStateError> { assert!(self.tree_state_dirty || &self.old_tree_id == self.wc.current_tree_id()?); if self.tree_state_dirty { self.wc.tree_state_mut()?.save()?; } if self.old_operation_id != operation_id { self.wc.checkout_state_mut().operation_id = operation_id; self.wc.save(); } // TODO: Clear the "pending_checkout" file here. self.tree_state_dirty = false; self.closed = true; Ok(()) } pub fn discard(mut self) { // Undo the changes in memory self.wc.tree_state.take(); self.tree_state_dirty = false; self.closed = true; } } impl Drop for LockedWorkingCopy<'_> { fn drop(&mut self) { if !self.closed && !std::thread::panicking() { eprintln!("BUG: Working copy lock was dropped without being closed."); } } } pub type SnapshotProgress<'a> = dyn Fn(&RepoPath) + 'a + Sync;