just playing with tangled
at docs-prerelease 2228 lines 86 kB view raw
1// Copyright 2020 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#![allow(missing_docs)] 16 17use std::any::Any; 18use std::collections::HashSet; 19use std::fmt::Debug; 20use std::fmt::Error; 21use std::fmt::Formatter; 22use std::fs; 23use std::io; 24use std::io::Cursor; 25use std::io::Read; 26use std::path::Path; 27use std::path::PathBuf; 28use std::process::Command; 29use std::process::ExitStatus; 30use std::str; 31use std::sync::Arc; 32use std::sync::Mutex; 33use std::sync::MutexGuard; 34use std::time::SystemTime; 35 36use async_trait::async_trait; 37use futures::stream::BoxStream; 38use gix::bstr::BString; 39use gix::objs::CommitRef; 40use gix::objs::CommitRefIter; 41use gix::objs::WriteTo; 42use itertools::Itertools; 43use pollster::FutureExt; 44use prost::Message; 45use smallvec::SmallVec; 46use thiserror::Error; 47 48use crate::backend::make_root_commit; 49use crate::backend::Backend; 50use crate::backend::BackendError; 51use crate::backend::BackendInitError; 52use crate::backend::BackendLoadError; 53use crate::backend::BackendResult; 54use crate::backend::ChangeId; 55use crate::backend::Commit; 56use crate::backend::CommitId; 57use crate::backend::Conflict; 58use crate::backend::ConflictId; 59use crate::backend::ConflictTerm; 60use crate::backend::CopyRecord; 61use crate::backend::FileId; 62use crate::backend::MergedTreeId; 63use crate::backend::MillisSinceEpoch; 64use crate::backend::SecureSig; 65use crate::backend::Signature; 66use crate::backend::SigningFn; 67use crate::backend::SymlinkId; 68use crate::backend::Timestamp; 69use crate::backend::Tree; 70use crate::backend::TreeId; 71use crate::backend::TreeValue; 72use crate::file_util::IoResultExt as _; 73use crate::file_util::PathError; 74use crate::index::Index; 75use crate::lock::FileLock; 76use crate::merge::Merge; 77use crate::merge::MergeBuilder; 78use crate::object_id::ObjectId; 79use crate::repo_path::RepoPath; 80use crate::repo_path::RepoPathBuf; 81use crate::repo_path::RepoPathComponentBuf; 82use crate::settings::UserSettings; 83use crate::stacked_table::MutableTable; 84use crate::stacked_table::ReadonlyTable; 85use crate::stacked_table::TableSegment; 86use crate::stacked_table::TableStore; 87use crate::stacked_table::TableStoreError; 88 89const HASH_LENGTH: usize = 20; 90const CHANGE_ID_LENGTH: usize = 16; 91/// Ref namespace used only for preventing GC. 92const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/"; 93const CONFLICT_SUFFIX: &str = ".jjconflict"; 94 95const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees"; 96 97#[derive(Debug, Error)] 98pub enum GitBackendInitError { 99 #[error("Failed to initialize git repository")] 100 InitRepository(#[source] gix::init::Error), 101 #[error("Failed to open git repository")] 102 OpenRepository(#[source] gix::open::Error), 103 #[error(transparent)] 104 Path(PathError), 105} 106 107impl From<Box<GitBackendInitError>> for BackendInitError { 108 fn from(err: Box<GitBackendInitError>) -> Self { 109 BackendInitError(err) 110 } 111} 112 113#[derive(Debug, Error)] 114pub enum GitBackendLoadError { 115 #[error("Failed to open git repository")] 116 OpenRepository(#[source] gix::open::Error), 117 #[error(transparent)] 118 Path(PathError), 119} 120 121impl From<Box<GitBackendLoadError>> for BackendLoadError { 122 fn from(err: Box<GitBackendLoadError>) -> Self { 123 BackendLoadError(err) 124 } 125} 126 127/// `GitBackend`-specific error that may occur after the backend is loaded. 128#[derive(Debug, Error)] 129pub enum GitBackendError { 130 #[error("Failed to read non-git metadata")] 131 ReadMetadata(#[source] TableStoreError), 132 #[error("Failed to write non-git metadata")] 133 WriteMetadata(#[source] TableStoreError), 134} 135 136impl From<GitBackendError> for BackendError { 137 fn from(err: GitBackendError) -> Self { 138 BackendError::Other(err.into()) 139 } 140} 141 142#[derive(Debug, Error)] 143pub enum GitGcError { 144 #[error("Failed to run git gc command")] 145 GcCommand(#[source] std::io::Error), 146 #[error("git gc command exited with an error: {0}")] 147 GcCommandErrorStatus(ExitStatus), 148} 149 150pub struct GitBackend { 151 // While gix::Repository can be created from gix::ThreadSafeRepository, it's 152 // cheaper to cache the thread-local instance behind a mutex than creating 153 // one for each backend method call. Our GitBackend is most likely to be 154 // used in a single-threaded context. 155 base_repo: gix::ThreadSafeRepository, 156 repo: Mutex<gix::Repository>, 157 root_commit_id: CommitId, 158 root_change_id: ChangeId, 159 empty_tree_id: TreeId, 160 extra_metadata_store: TableStore, 161 cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>, 162} 163 164impl GitBackend { 165 pub fn name() -> &'static str { 166 "git" 167 } 168 169 fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self { 170 let repo = Mutex::new(base_repo.to_thread_local()); 171 let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]); 172 let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]); 173 let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904"); 174 GitBackend { 175 base_repo, 176 repo, 177 root_commit_id, 178 root_change_id, 179 empty_tree_id, 180 extra_metadata_store, 181 cached_extra_metadata: Mutex::new(None), 182 } 183 } 184 185 pub fn init_internal( 186 settings: &UserSettings, 187 store_path: &Path, 188 ) -> Result<Self, Box<GitBackendInitError>> { 189 let git_repo_path = Path::new("git"); 190 let git_repo = gix::ThreadSafeRepository::init_opts( 191 store_path.join(git_repo_path), 192 gix::create::Kind::Bare, 193 gix::create::Options::default(), 194 gix_open_opts_from_settings(settings), 195 ) 196 .map_err(GitBackendInitError::InitRepository)?; 197 Self::init_with_repo(store_path, git_repo_path, git_repo) 198 } 199 200 /// Initializes backend by creating a new Git repo at the specified 201 /// workspace path. The workspace directory must exist. 202 pub fn init_colocated( 203 settings: &UserSettings, 204 store_path: &Path, 205 workspace_root: &Path, 206 ) -> Result<Self, Box<GitBackendInitError>> { 207 let canonical_workspace_root = { 208 let path = store_path.join(workspace_root); 209 dunce::canonicalize(&path) 210 .context(&path) 211 .map_err(GitBackendInitError::Path)? 212 }; 213 let git_repo = gix::ThreadSafeRepository::init_opts( 214 canonical_workspace_root, 215 gix::create::Kind::WithWorktree, 216 gix::create::Options::default(), 217 gix_open_opts_from_settings(settings), 218 ) 219 .map_err(GitBackendInitError::InitRepository)?; 220 let git_repo_path = workspace_root.join(".git"); 221 Self::init_with_repo(store_path, &git_repo_path, git_repo) 222 } 223 224 /// Initializes backend with an existing Git repo at the specified path. 225 pub fn init_external( 226 settings: &UserSettings, 227 store_path: &Path, 228 git_repo_path: &Path, 229 ) -> Result<Self, Box<GitBackendInitError>> { 230 let canonical_git_repo_path = { 231 let path = store_path.join(git_repo_path); 232 canonicalize_git_repo_path(&path) 233 .context(&path) 234 .map_err(GitBackendInitError::Path)? 235 }; 236 let git_repo = gix::ThreadSafeRepository::open_opts( 237 canonical_git_repo_path, 238 gix_open_opts_from_settings(settings), 239 ) 240 .map_err(GitBackendInitError::OpenRepository)?; 241 Self::init_with_repo(store_path, git_repo_path, git_repo) 242 } 243 244 fn init_with_repo( 245 store_path: &Path, 246 git_repo_path: &Path, 247 git_repo: gix::ThreadSafeRepository, 248 ) -> Result<Self, Box<GitBackendInitError>> { 249 let extra_path = store_path.join("extra"); 250 fs::create_dir(&extra_path) 251 .context(&extra_path) 252 .map_err(GitBackendInitError::Path)?; 253 let target_path = store_path.join("git_target"); 254 if cfg!(windows) && git_repo_path.is_relative() { 255 // When a repository is created in Windows, format the path with *forward 256 // slashes* and not backwards slashes. This makes it possible to use the same 257 // repository under Windows Subsystem for Linux. 258 // 259 // This only works for relative paths. If the path is absolute, there's not much 260 // we can do, and it simply won't work inside and outside WSL at the same time. 261 let git_repo_path_string = git_repo_path 262 .components() 263 .map(|component| component.as_os_str().to_str().unwrap().to_owned()) 264 .join("/"); 265 fs::write(&target_path, git_repo_path_string.as_bytes()) 266 .context(&target_path) 267 .map_err(GitBackendInitError::Path)?; 268 } else { 269 fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes()) 270 .context(&target_path) 271 .map_err(GitBackendInitError::Path)?; 272 }; 273 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH); 274 Ok(GitBackend::new(git_repo, extra_metadata_store)) 275 } 276 277 pub fn load( 278 settings: &UserSettings, 279 store_path: &Path, 280 ) -> Result<Self, Box<GitBackendLoadError>> { 281 let git_repo_path = { 282 let target_path = store_path.join("git_target"); 283 let git_repo_path_str = fs::read_to_string(&target_path) 284 .context(&target_path) 285 .map_err(GitBackendLoadError::Path)?; 286 let git_repo_path = store_path.join(git_repo_path_str); 287 canonicalize_git_repo_path(&git_repo_path) 288 .context(&git_repo_path) 289 .map_err(GitBackendLoadError::Path)? 290 }; 291 let repo = gix::ThreadSafeRepository::open_opts( 292 git_repo_path, 293 gix_open_opts_from_settings(settings), 294 ) 295 .map_err(GitBackendLoadError::OpenRepository)?; 296 let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH); 297 Ok(GitBackend::new(repo, extra_metadata_store)) 298 } 299 300 fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> { 301 self.repo.lock().unwrap() 302 } 303 304 /// Returns new thread-local instance to access to the underlying Git repo. 305 pub fn git_repo(&self) -> gix::Repository { 306 self.base_repo.to_thread_local() 307 } 308 309 /// Creates new owned git repository instance. 310 pub fn open_git_repo(&self) -> Result<git2::Repository, git2::Error> { 311 git2::Repository::open(self.git_repo_path()) 312 } 313 314 /// Path to the `.git` directory or the repository itself if it's bare. 315 pub fn git_repo_path(&self) -> &Path { 316 self.base_repo.path() 317 } 318 319 /// Path to the working directory if the repository isn't bare. 320 pub fn git_workdir(&self) -> Option<&Path> { 321 self.base_repo.work_dir() 322 } 323 324 fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> { 325 let mut locked_head = self.cached_extra_metadata.lock().unwrap(); 326 match locked_head.as_ref() { 327 Some(head) => Ok(head.clone()), 328 None => { 329 let table = self 330 .extra_metadata_store 331 .get_head() 332 .map_err(GitBackendError::ReadMetadata)?; 333 *locked_head = Some(table.clone()); 334 Ok(table) 335 } 336 } 337 } 338 339 fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> { 340 let table = self 341 .extra_metadata_store 342 .get_head_locked() 343 .map_err(GitBackendError::ReadMetadata)?; 344 Ok(table) 345 } 346 347 fn save_extra_metadata_table( 348 &self, 349 mut_table: MutableTable, 350 _table_lock: &FileLock, 351 ) -> BackendResult<()> { 352 let table = self 353 .extra_metadata_store 354 .save_table(mut_table) 355 .map_err(GitBackendError::WriteMetadata)?; 356 // Since the parent table was the head, saved table are likely to be new head. 357 // If it's not, cache will be reloaded when entry can't be found. 358 *self.cached_extra_metadata.lock().unwrap() = Some(table); 359 Ok(()) 360 } 361 362 /// Imports the given commits and ancestors from the backing Git repo. 363 /// 364 /// The `head_ids` may contain commits that have already been imported, but 365 /// the caller should filter them out to eliminate redundant I/O processing. 366 #[tracing::instrument(skip(self, head_ids))] 367 pub fn import_head_commits<'a>( 368 &self, 369 head_ids: impl IntoIterator<Item = &'a CommitId>, 370 ) -> BackendResult<()> { 371 self.import_head_commits_with_tree_conflicts(head_ids, true) 372 } 373 374 fn import_head_commits_with_tree_conflicts<'a>( 375 &self, 376 head_ids: impl IntoIterator<Item = &'a CommitId>, 377 uses_tree_conflict_format: bool, 378 ) -> BackendResult<()> { 379 let head_ids: HashSet<&CommitId> = head_ids 380 .into_iter() 381 .filter(|&id| *id != self.root_commit_id) 382 .collect(); 383 if head_ids.is_empty() { 384 return Ok(()); 385 } 386 387 // Create no-gc ref even if known to the extras table. Concurrent GC 388 // process might have deleted the no-gc ref. 389 let locked_repo = self.lock_git_repo(); 390 locked_repo 391 .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update)) 392 .map_err(|err| BackendError::Other(Box::new(err)))?; 393 394 // These commits are imported from Git. Make our change ids persist (otherwise 395 // future write_commit() could reassign new change id.) 396 tracing::debug!( 397 heads_count = head_ids.len(), 398 "import extra metadata entries" 399 ); 400 let (table, table_lock) = self.read_extra_metadata_table_locked()?; 401 let mut mut_table = table.start_mutation(); 402 import_extra_metadata_entries_from_heads( 403 &locked_repo, 404 &mut mut_table, 405 &table_lock, 406 &head_ids, 407 uses_tree_conflict_format, 408 )?; 409 self.save_extra_metadata_table(mut_table, &table_lock) 410 } 411 412 fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> { 413 let git_blob_id = validate_git_object_id(id)?; 414 let locked_repo = self.lock_git_repo(); 415 let mut blob = locked_repo 416 .find_object(git_blob_id) 417 .map_err(|err| map_not_found_err(err, id))? 418 .try_into_blob() 419 .map_err(|err| to_read_object_err(err, id))?; 420 Ok(Box::new(Cursor::new(blob.take_data()))) 421 } 422 423 fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> { 424 let attributes = gix::worktree::Stack::new( 425 Path::new(""), 426 gix::worktree::stack::State::AttributesStack(Default::default()), 427 gix::worktree::glob::pattern::Case::Sensitive, 428 Vec::new(), 429 Vec::new(), 430 ); 431 let filter = gix::diff::blob::Pipeline::new( 432 Default::default(), 433 gix::filter::plumbing::Pipeline::new( 434 self.git_repo() 435 .command_context() 436 .map_err(|err| BackendError::Other(Box::new(err)))?, 437 Default::default(), 438 ), 439 Vec::new(), 440 Default::default(), 441 ); 442 Ok(gix::diff::blob::Platform::new( 443 Default::default(), 444 filter, 445 gix::diff::blob::pipeline::Mode::ToGit, 446 attributes, 447 )) 448 } 449 450 fn read_tree_for_commit<'repo>( 451 &self, 452 repo: &'repo gix::Repository, 453 id: &CommitId, 454 ) -> BackendResult<gix::Tree<'repo>> { 455 let tree = self.read_commit(id).block_on()?.root_tree.to_merge(); 456 // TODO(kfm): probably want to do something here if it is a merge 457 let tree_id = tree.first().clone(); 458 let gix_id = validate_git_object_id(&tree_id)?; 459 repo.find_object(gix_id) 460 .map_err(|err| map_not_found_err(err, &tree_id))? 461 .try_into_tree() 462 .map_err(|err| to_read_object_err(err, &tree_id)) 463 } 464} 465 466/// Canonicalizes the given `path` except for the last `".git"` component. 467/// 468/// The last path component matters when opening a Git repo without `core.bare` 469/// config. This config is usually set, but the "repo" tool will set up such 470/// repositories and symlinks. Opening such repo with fully-canonicalized path 471/// would turn a colocated Git repo into a bare repo. 472pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> { 473 if path.ends_with(".git") { 474 let workdir = path.parent().unwrap(); 475 dunce::canonicalize(workdir).map(|dir| dir.join(".git")) 476 } else { 477 dunce::canonicalize(path) 478 } 479} 480 481fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options { 482 let user_name = settings.user_name(); 483 let user_email = settings.user_email(); 484 gix::open::Options::default() 485 .config_overrides([ 486 // Committer has to be configured to record reflog. Author isn't 487 // needed, but let's copy the same values. 488 format!("author.name={user_name}"), 489 format!("author.email={user_email}"), 490 format!("committer.name={user_name}"), 491 format!("committer.email={user_email}"), 492 ]) 493 // The git_target path should point the repository, not the working directory. 494 .open_path_as_is(true) 495} 496 497/// Reads the `jj:trees` header from the commit. 498fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> { 499 for (key, value) in &git_commit.extra_headers { 500 if *key == JJ_TREES_COMMIT_HEADER { 501 let mut tree_ids = SmallVec::new(); 502 for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') { 503 let tree_id = TreeId::try_from_hex(hex).or(Err(()))?; 504 if tree_id.as_bytes().len() != HASH_LENGTH { 505 return Err(()); 506 } 507 tree_ids.push(tree_id); 508 } 509 if tree_ids.len() % 2 == 0 { 510 return Err(()); 511 } 512 return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids)))); 513 } 514 } 515 Ok(None) 516} 517 518fn commit_from_git_without_root_parent( 519 id: &CommitId, 520 git_object: &gix::Object, 521 uses_tree_conflict_format: bool, 522 is_shallow: bool, 523) -> BackendResult<Commit> { 524 let commit = git_object 525 .try_to_commit_ref() 526 .map_err(|err| to_read_object_err(err, id))?; 527 528 // We reverse the bits of the commit id to create the change id. We don't want 529 // to use the first bytes unmodified because then it would be ambiguous 530 // if a given hash prefix refers to the commit id or the change id. It 531 // would have been enough to pick the last 16 bytes instead of the 532 // leading 16 bytes to address that. We also reverse the bits to make it less 533 // likely that users depend on any relationship between the two ids. 534 let change_id = ChangeId::new( 535 id.as_bytes()[4..HASH_LENGTH] 536 .iter() 537 .rev() 538 .map(|b| b.reverse_bits()) 539 .collect(), 540 ); 541 // shallow commits don't have parents their parents actually fetched, so we 542 // discard them here 543 // TODO: This causes issues when a shallow repository is deepened/unshallowed 544 let parents = if is_shallow { 545 vec![] 546 } else { 547 commit 548 .parents() 549 .map(|oid| CommitId::from_bytes(oid.as_bytes())) 550 .collect_vec() 551 }; 552 let tree_id = TreeId::from_bytes(commit.tree().as_bytes()); 553 // If this commit is a conflict, we'll update the root tree later, when we read 554 // the extra metadata. 555 let root_tree = root_tree_from_header(&commit) 556 .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?; 557 let root_tree = root_tree.unwrap_or_else(|| { 558 if uses_tree_conflict_format { 559 MergedTreeId::resolved(tree_id) 560 } else { 561 MergedTreeId::Legacy(tree_id) 562 } 563 }); 564 // Use lossy conversion as commit message with "mojibake" is still better than 565 // nothing. 566 // TODO: what should we do with commit.encoding? 567 let description = String::from_utf8_lossy(commit.message).into_owned(); 568 let author = signature_from_git(commit.author()); 569 let committer = signature_from_git(commit.committer()); 570 571 // If the commit is signed, extract both the signature and the signed data 572 // (which is the commit buffer with the gpgsig header omitted). 573 // We have to re-parse the raw commit data because gix CommitRef does not give 574 // us the sogned data, only the signature. 575 // Ideally, we could use try_to_commit_ref_iter at the beginning of this 576 // function and extract everything from that. For now, this works 577 let secure_sig = commit 578 .extra_headers 579 .iter() 580 // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too 581 .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256") 582 .then(|| CommitRefIter::signature(&git_object.data)) 583 .transpose() 584 .map_err(|err| to_read_object_err(err, id))? 585 .flatten() 586 .map(|(sig, data)| SecureSig { 587 data: data.to_bstring().into(), 588 sig: sig.into_owned().into(), 589 }); 590 591 Ok(Commit { 592 parents, 593 predecessors: vec![], 594 // If this commit has associated extra metadata, we may reset this later. 595 root_tree, 596 change_id, 597 description, 598 author, 599 committer, 600 secure_sig, 601 }) 602} 603 604const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING"; 605 606fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature { 607 let name = signature.name; 608 let name = if name != EMPTY_STRING_PLACEHOLDER { 609 String::from_utf8_lossy(name).into_owned() 610 } else { 611 "".to_string() 612 }; 613 let email = signature.email; 614 let email = if email != EMPTY_STRING_PLACEHOLDER { 615 String::from_utf8_lossy(email).into_owned() 616 } else { 617 "".to_string() 618 }; 619 let timestamp = MillisSinceEpoch(signature.time.seconds * 1000); 620 let tz_offset = signature.time.offset.div_euclid(60); // in minutes 621 Signature { 622 name, 623 email, 624 timestamp: Timestamp { 625 timestamp, 626 tz_offset, 627 }, 628 } 629} 630 631fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> { 632 // git does not support empty names or emails 633 let name = if !signature.name.is_empty() { 634 &signature.name 635 } else { 636 EMPTY_STRING_PLACEHOLDER 637 }; 638 let email = if !signature.email.is_empty() { 639 &signature.email 640 } else { 641 EMPTY_STRING_PLACEHOLDER 642 }; 643 let time = gix::date::Time::new( 644 signature.timestamp.timestamp.0.div_euclid(1000), 645 signature.timestamp.tz_offset * 60, // in seconds 646 ); 647 gix::actor::SignatureRef { 648 name: name.into(), 649 email: email.into(), 650 time, 651 } 652} 653 654fn serialize_extras(commit: &Commit) -> Vec<u8> { 655 let mut proto = crate::protos::git_store::Commit { 656 change_id: commit.change_id.to_bytes(), 657 ..Default::default() 658 }; 659 if let MergedTreeId::Merge(tree_ids) = &commit.root_tree { 660 proto.uses_tree_conflict_format = true; 661 if !tree_ids.is_resolved() { 662 proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect(); 663 } 664 } 665 for predecessor in &commit.predecessors { 666 proto.predecessors.push(predecessor.to_bytes()); 667 } 668 proto.encode_to_vec() 669} 670 671fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) { 672 let proto = crate::protos::git_store::Commit::decode(bytes).unwrap(); 673 commit.change_id = ChangeId::new(proto.change_id); 674 if proto.uses_tree_conflict_format { 675 if !proto.root_tree.is_empty() { 676 let merge_builder: MergeBuilder<_> = proto 677 .root_tree 678 .iter() 679 .map(|id_bytes| TreeId::from_bytes(id_bytes)) 680 .collect(); 681 let merge = merge_builder.build(); 682 // Check that the trees from the extras match the one we found in the jj:trees 683 // header 684 if let MergedTreeId::Merge(existing_merge) = &commit.root_tree { 685 assert!(existing_merge.is_resolved() || *existing_merge == merge); 686 } 687 commit.root_tree = MergedTreeId::Merge(merge); 688 } else { 689 // uses_tree_conflict_format was set but there was no root_tree override in the 690 // proto, which means we should just promote the tree id from the 691 // git commit to be a known-conflict-free tree 692 let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree else { 693 panic!("root tree should have been initialized to a legacy id"); 694 }; 695 commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone()); 696 } 697 } 698 for predecessor in &proto.predecessors { 699 commit.predecessors.push(CommitId::from_bytes(predecessor)); 700 } 701} 702 703/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist. 704/// Used for preventing GC of commits we create. 705fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit { 706 let name = format!("{NO_GC_REF_NAMESPACE}{id}"); 707 let new = gix::refs::Target::Object(validate_git_object_id(id).unwrap()); 708 let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone()); 709 gix::refs::transaction::RefEdit { 710 change: gix::refs::transaction::Change::Update { 711 log: gix::refs::transaction::LogChange { 712 message: "used by jj".into(), 713 ..Default::default() 714 }, 715 expected, 716 new, 717 }, 718 name: name.try_into().unwrap(), 719 deref: false, 720 } 721} 722 723fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit { 724 let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target); 725 gix::refs::transaction::RefEdit { 726 change: gix::refs::transaction::Change::Delete { 727 expected, 728 log: gix::refs::transaction::RefLog::AndReference, 729 }, 730 name: git_ref.name, 731 deref: false, 732 } 733} 734 735/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other 736/// unreachable and non-head refs. 737fn recreate_no_gc_refs( 738 git_repo: &gix::Repository, 739 new_heads: impl IntoIterator<Item = CommitId>, 740 keep_newer: SystemTime, 741) -> BackendResult<()> { 742 // Calculate diff between existing no-gc refs and new heads. 743 let new_heads: HashSet<CommitId> = new_heads.into_iter().collect(); 744 let mut no_gc_refs_to_keep_count: usize = 0; 745 let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new(); 746 let git_references = git_repo 747 .references() 748 .map_err(|err| BackendError::Other(err.into()))?; 749 let no_gc_refs_iter = git_references 750 .prefixed(NO_GC_REF_NAMESPACE) 751 .map_err(|err| BackendError::Other(err.into()))?; 752 for git_ref in no_gc_refs_iter { 753 let git_ref = git_ref.map_err(BackendError::Other)?.detach(); 754 let oid = git_ref.target.try_id().ok_or_else(|| { 755 let name = git_ref.name.as_bstr(); 756 BackendError::Other(format!("Symbolic no-gc ref found: {name}").into()) 757 })?; 758 let id = CommitId::from_bytes(oid.as_bytes()); 759 let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex(); 760 if new_heads.contains(&id) && name_good { 761 no_gc_refs_to_keep_count += 1; 762 continue; 763 } 764 // Check timestamp of loose ref, but this is still racy on re-import 765 // because: 766 // - existing packed ref won't be demoted to loose ref 767 // - existing loose ref won't be touched 768 // 769 // TODO: might be better to switch to a dummy merge, where new no-gc ref 770 // will always have a unique name. Doing that with the current 771 // ref-per-head strategy would increase the number of the no-gc refs. 772 // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782 773 let loose_ref_path = git_repo.path().join(git_ref.name.to_path()); 774 if let Ok(metadata) = loose_ref_path.metadata() { 775 let mtime = metadata.modified().expect("unsupported platform?"); 776 if mtime > keep_newer { 777 tracing::trace!(?git_ref, "not deleting new"); 778 no_gc_refs_to_keep_count += 1; 779 continue; 780 } 781 } 782 // Also deletes no-gc ref of random name created by old jj. 783 tracing::trace!(?git_ref, ?name_good, "will delete"); 784 no_gc_refs_to_delete.push(git_ref); 785 } 786 tracing::info!( 787 new_heads_count = new_heads.len(), 788 no_gc_refs_to_keep_count, 789 no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(), 790 "collected reachable refs" 791 ); 792 793 // It's slow to delete packed refs one by one, so update refs all at once. 794 let ref_edits = itertools::chain( 795 no_gc_refs_to_delete.into_iter().map(to_ref_deletion), 796 new_heads.iter().map(to_no_gc_ref_update), 797 ); 798 git_repo 799 .edit_references(ref_edits) 800 .map_err(|err| BackendError::Other(err.into()))?; 801 802 Ok(()) 803} 804 805fn run_git_gc(git_dir: &Path) -> Result<(), GitGcError> { 806 let mut git = Command::new("git"); 807 git.arg("--git-dir=."); // turn off discovery 808 git.arg("gc"); 809 // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be 810 // canonicalized as UNC path, which wouldn't be supported by git. 811 git.current_dir(git_dir); 812 // TODO: pass output to UI layer instead of printing directly here 813 let status = git.status().map_err(GitGcError::GcCommand)?; 814 if !status.success() { 815 return Err(GitGcError::GcCommandErrorStatus(status)); 816 } 817 Ok(()) 818} 819 820fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> { 821 if id.as_bytes().len() != HASH_LENGTH { 822 return Err(BackendError::InvalidHashLength { 823 expected: HASH_LENGTH, 824 actual: id.as_bytes().len(), 825 object_type: id.object_type(), 826 hash: id.hex(), 827 }); 828 } 829 Ok(id.as_bytes().try_into().unwrap()) 830} 831 832fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError { 833 if matches!(err, gix::object::find::existing::Error::NotFound { .. }) { 834 BackendError::ObjectNotFound { 835 object_type: id.object_type(), 836 hash: id.hex(), 837 source: Box::new(err), 838 } 839 } else { 840 to_read_object_err(err, id) 841 } 842} 843 844fn to_read_object_err( 845 err: impl Into<Box<dyn std::error::Error + Send + Sync>>, 846 id: &impl ObjectId, 847) -> BackendError { 848 BackendError::ReadObject { 849 object_type: id.object_type(), 850 hash: id.hex(), 851 source: err.into(), 852 } 853} 854 855fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError { 856 BackendError::InvalidUtf8 { 857 object_type: id.object_type(), 858 hash: id.hex(), 859 source, 860 } 861} 862 863fn import_extra_metadata_entries_from_heads( 864 git_repo: &gix::Repository, 865 mut_table: &mut MutableTable, 866 _table_lock: &FileLock, 867 head_ids: &HashSet<&CommitId>, 868 uses_tree_conflict_format: bool, 869) -> BackendResult<()> { 870 let shallow_commits = git_repo 871 .shallow_commits() 872 .map_err(|e| BackendError::Other(Box::new(e)))?; 873 874 let mut work_ids = head_ids 875 .iter() 876 .filter(|&id| mut_table.get_value(id.as_bytes()).is_none()) 877 .map(|&id| id.clone()) 878 .collect_vec(); 879 while let Some(id) = work_ids.pop() { 880 let git_object = git_repo 881 .find_object(validate_git_object_id(&id)?) 882 .map_err(|err| map_not_found_err(err, &id))?; 883 let is_shallow = shallow_commits 884 .as_ref() 885 .is_some_and(|shallow| shallow.contains(&git_object.id)); 886 // TODO(#1624): Should we read the root tree here and check if it has a 887 // `.jjconflict-...` entries? That could happen if the user used `git` to e.g. 888 // change the description of a commit with tree-level conflicts. 889 let commit = commit_from_git_without_root_parent( 890 &id, 891 &git_object, 892 uses_tree_conflict_format, 893 is_shallow, 894 )?; 895 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit)); 896 work_ids.extend( 897 commit 898 .parents 899 .into_iter() 900 .filter(|id| mut_table.get_value(id.as_bytes()).is_none()), 901 ); 902 } 903 Ok(()) 904} 905 906impl Debug for GitBackend { 907 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { 908 f.debug_struct("GitBackend") 909 .field("path", &self.git_repo_path()) 910 .finish() 911 } 912} 913 914#[async_trait] 915impl Backend for GitBackend { 916 fn as_any(&self) -> &dyn Any { 917 self 918 } 919 920 fn name(&self) -> &str { 921 Self::name() 922 } 923 924 fn commit_id_length(&self) -> usize { 925 HASH_LENGTH 926 } 927 928 fn change_id_length(&self) -> usize { 929 CHANGE_ID_LENGTH 930 } 931 932 fn root_commit_id(&self) -> &CommitId { 933 &self.root_commit_id 934 } 935 936 fn root_change_id(&self) -> &ChangeId { 937 &self.root_change_id 938 } 939 940 fn empty_tree_id(&self) -> &TreeId { 941 &self.empty_tree_id 942 } 943 944 fn concurrency(&self) -> usize { 945 1 946 } 947 948 async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> { 949 self.read_file_sync(id) 950 } 951 952 async fn write_file( 953 &self, 954 _path: &RepoPath, 955 contents: &mut (dyn Read + Send), 956 ) -> BackendResult<FileId> { 957 let mut bytes = Vec::new(); 958 contents.read_to_end(&mut bytes).unwrap(); 959 let locked_repo = self.lock_git_repo(); 960 let oid = locked_repo 961 .write_blob(bytes) 962 .map_err(|err| BackendError::WriteObject { 963 object_type: "file", 964 source: Box::new(err), 965 })?; 966 Ok(FileId::new(oid.as_bytes().to_vec())) 967 } 968 969 async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> { 970 let git_blob_id = validate_git_object_id(id)?; 971 let locked_repo = self.lock_git_repo(); 972 let mut blob = locked_repo 973 .find_object(git_blob_id) 974 .map_err(|err| map_not_found_err(err, id))? 975 .try_into_blob() 976 .map_err(|err| to_read_object_err(err, id))?; 977 let target = String::from_utf8(blob.take_data()) 978 .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?; 979 Ok(target) 980 } 981 982 async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> { 983 let locked_repo = self.lock_git_repo(); 984 let oid = 985 locked_repo 986 .write_blob(target.as_bytes()) 987 .map_err(|err| BackendError::WriteObject { 988 object_type: "symlink", 989 source: Box::new(err), 990 })?; 991 Ok(SymlinkId::new(oid.as_bytes().to_vec())) 992 } 993 994 async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> { 995 if id == &self.empty_tree_id { 996 return Ok(Tree::default()); 997 } 998 let git_tree_id = validate_git_object_id(id)?; 999 1000 let locked_repo = self.lock_git_repo(); 1001 let git_tree = locked_repo 1002 .find_object(git_tree_id) 1003 .map_err(|err| map_not_found_err(err, id))? 1004 .try_into_tree() 1005 .map_err(|err| to_read_object_err(err, id))?; 1006 let mut tree = Tree::default(); 1007 for entry in git_tree.iter() { 1008 let entry = entry.map_err(|err| to_read_object_err(err, id))?; 1009 let name = 1010 str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?; 1011 let (name, value) = match entry.mode().kind() { 1012 gix::object::tree::EntryKind::Tree => { 1013 let id = TreeId::from_bytes(entry.oid().as_bytes()); 1014 (name, TreeValue::Tree(id)) 1015 } 1016 gix::object::tree::EntryKind::Blob => { 1017 let id = FileId::from_bytes(entry.oid().as_bytes()); 1018 if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) { 1019 ( 1020 basename, 1021 TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())), 1022 ) 1023 } else { 1024 ( 1025 name, 1026 TreeValue::File { 1027 id, 1028 executable: false, 1029 }, 1030 ) 1031 } 1032 } 1033 gix::object::tree::EntryKind::BlobExecutable => { 1034 let id = FileId::from_bytes(entry.oid().as_bytes()); 1035 ( 1036 name, 1037 TreeValue::File { 1038 id, 1039 executable: true, 1040 }, 1041 ) 1042 } 1043 gix::object::tree::EntryKind::Link => { 1044 let id = SymlinkId::from_bytes(entry.oid().as_bytes()); 1045 (name, TreeValue::Symlink(id)) 1046 } 1047 gix::object::tree::EntryKind::Commit => { 1048 let id = CommitId::from_bytes(entry.oid().as_bytes()); 1049 (name, TreeValue::GitSubmodule(id)) 1050 } 1051 }; 1052 tree.set(RepoPathComponentBuf::from(name), value); 1053 } 1054 Ok(tree) 1055 } 1056 1057 async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> { 1058 // Tree entries to be written must be sorted by Entry::filename(), which 1059 // is slightly different from the order of our backend::Tree. 1060 let entries = contents 1061 .entries() 1062 .map(|entry| { 1063 let name = entry.name().as_internal_str(); 1064 match entry.value() { 1065 TreeValue::File { 1066 id, 1067 executable: false, 1068 } => gix::objs::tree::Entry { 1069 mode: gix::object::tree::EntryKind::Blob.into(), 1070 filename: name.into(), 1071 oid: id.as_bytes().try_into().unwrap(), 1072 }, 1073 TreeValue::File { 1074 id, 1075 executable: true, 1076 } => gix::objs::tree::Entry { 1077 mode: gix::object::tree::EntryKind::BlobExecutable.into(), 1078 filename: name.into(), 1079 oid: id.as_bytes().try_into().unwrap(), 1080 }, 1081 TreeValue::Symlink(id) => gix::objs::tree::Entry { 1082 mode: gix::object::tree::EntryKind::Link.into(), 1083 filename: name.into(), 1084 oid: id.as_bytes().try_into().unwrap(), 1085 }, 1086 TreeValue::Tree(id) => gix::objs::tree::Entry { 1087 mode: gix::object::tree::EntryKind::Tree.into(), 1088 filename: name.into(), 1089 oid: id.as_bytes().try_into().unwrap(), 1090 }, 1091 TreeValue::GitSubmodule(id) => gix::objs::tree::Entry { 1092 mode: gix::object::tree::EntryKind::Commit.into(), 1093 filename: name.into(), 1094 oid: id.as_bytes().try_into().unwrap(), 1095 }, 1096 TreeValue::Conflict(id) => gix::objs::tree::Entry { 1097 mode: gix::object::tree::EntryKind::Blob.into(), 1098 filename: (name.to_owned() + CONFLICT_SUFFIX).into(), 1099 oid: id.as_bytes().try_into().unwrap(), 1100 }, 1101 } 1102 }) 1103 .sorted_unstable() 1104 .collect(); 1105 let locked_repo = self.lock_git_repo(); 1106 let oid = locked_repo 1107 .write_object(gix::objs::Tree { entries }) 1108 .map_err(|err| BackendError::WriteObject { 1109 object_type: "tree", 1110 source: Box::new(err), 1111 })?; 1112 Ok(TreeId::from_bytes(oid.as_bytes())) 1113 } 1114 1115 fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> { 1116 let mut file = self.read_file_sync(&FileId::new(id.to_bytes()))?; 1117 let mut data = String::new(); 1118 file.read_to_string(&mut data) 1119 .map_err(|err| BackendError::ReadObject { 1120 object_type: "conflict".to_owned(), 1121 hash: id.hex(), 1122 source: err.into(), 1123 })?; 1124 let json: serde_json::Value = serde_json::from_str(&data).unwrap(); 1125 Ok(Conflict { 1126 removes: conflict_term_list_from_json(json.get("removes").unwrap()), 1127 adds: conflict_term_list_from_json(json.get("adds").unwrap()), 1128 }) 1129 } 1130 1131 fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> { 1132 let json = serde_json::json!({ 1133 "removes": conflict_term_list_to_json(&conflict.removes), 1134 "adds": conflict_term_list_to_json(&conflict.adds), 1135 }); 1136 let json_string = json.to_string(); 1137 let bytes = json_string.as_bytes(); 1138 let locked_repo = self.lock_git_repo(); 1139 let oid = locked_repo 1140 .write_blob(bytes) 1141 .map_err(|err| BackendError::WriteObject { 1142 object_type: "conflict", 1143 source: Box::new(err), 1144 })?; 1145 Ok(ConflictId::from_bytes(oid.as_bytes())) 1146 } 1147 1148 #[tracing::instrument(skip(self))] 1149 async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> { 1150 if *id == self.root_commit_id { 1151 return Ok(make_root_commit( 1152 self.root_change_id().clone(), 1153 self.empty_tree_id.clone(), 1154 )); 1155 } 1156 let git_commit_id = validate_git_object_id(id)?; 1157 1158 let mut commit = { 1159 let locked_repo = self.lock_git_repo(); 1160 let git_object = locked_repo 1161 .find_object(git_commit_id) 1162 .map_err(|err| map_not_found_err(err, id))?; 1163 let is_shallow = locked_repo 1164 .shallow_commits() 1165 .ok() 1166 .flatten() 1167 .is_some_and(|shallow| shallow.contains(&git_object.id)); 1168 commit_from_git_without_root_parent(id, &git_object, false, is_shallow)? 1169 }; 1170 if commit.parents.is_empty() { 1171 commit.parents.push(self.root_commit_id.clone()); 1172 }; 1173 1174 let table = self.cached_extra_metadata_table()?; 1175 if let Some(extras) = table.get_value(id.as_bytes()) { 1176 deserialize_extras(&mut commit, extras); 1177 } else { 1178 // TODO: Remove this hack and map to ObjectNotFound error if we're sure that 1179 // there are no reachable ancestor commits without extras metadata. Git commits 1180 // imported by jj < 0.8.0 might not have extras (#924). 1181 // https://github.com/jj-vcs/jj/issues/2343 1182 tracing::info!("unimported Git commit found"); 1183 self.import_head_commits([id])?; 1184 let table = self.cached_extra_metadata_table()?; 1185 let extras = table.get_value(id.as_bytes()).unwrap(); 1186 deserialize_extras(&mut commit, extras); 1187 } 1188 Ok(commit) 1189 } 1190 1191 async fn write_commit( 1192 &self, 1193 mut contents: Commit, 1194 mut sign_with: Option<&mut SigningFn>, 1195 ) -> BackendResult<(CommitId, Commit)> { 1196 assert!(contents.secure_sig.is_none(), "commit.secure_sig was set"); 1197 1198 let locked_repo = self.lock_git_repo(); 1199 let git_tree_id = match &contents.root_tree { 1200 MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?, 1201 MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() { 1202 Some(tree_id) => validate_git_object_id(tree_id)?, 1203 None => write_tree_conflict(&locked_repo, tree_ids)?, 1204 }, 1205 }; 1206 let author = signature_to_git(&contents.author); 1207 let mut committer = signature_to_git(&contents.committer); 1208 let message = &contents.description; 1209 if contents.parents.is_empty() { 1210 return Err(BackendError::Other( 1211 "Cannot write a commit with no parents".into(), 1212 )); 1213 } 1214 let mut parents = SmallVec::new(); 1215 for parent_id in &contents.parents { 1216 if *parent_id == self.root_commit_id { 1217 // Git doesn't have a root commit, so if the parent is the root commit, we don't 1218 // add it to the list of parents to write in the Git commit. We also check that 1219 // there are no other parents since Git cannot represent a merge between a root 1220 // commit and another commit. 1221 if contents.parents.len() > 1 { 1222 return Err(BackendError::Unsupported( 1223 "The Git backend does not support creating merge commits with the root \ 1224 commit as one of the parents." 1225 .to_owned(), 1226 )); 1227 } 1228 } else { 1229 parents.push(validate_git_object_id(parent_id)?); 1230 } 1231 } 1232 let mut extra_headers = vec![]; 1233 if let MergedTreeId::Merge(tree_ids) = &contents.root_tree { 1234 if !tree_ids.is_resolved() { 1235 let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes(); 1236 extra_headers.push(( 1237 BString::new(JJ_TREES_COMMIT_HEADER.to_vec()), 1238 BString::new(value), 1239 )); 1240 } 1241 } 1242 let extras = serialize_extras(&contents); 1243 1244 // If two writers write commits of the same id with different metadata, they 1245 // will both succeed and the metadata entries will be "merged" later. Since 1246 // metadata entry is keyed by the commit id, one of the entries would be lost. 1247 // To prevent such race condition locally, we extend the scope covered by the 1248 // table lock. This is still racy if multiple machines are involved and the 1249 // repository is rsync-ed. 1250 let (table, table_lock) = self.read_extra_metadata_table_locked()?; 1251 let id = loop { 1252 let mut commit = gix::objs::Commit { 1253 message: message.to_owned().into(), 1254 tree: git_tree_id, 1255 author: author.into(), 1256 committer: committer.into(), 1257 encoding: None, 1258 parents: parents.clone(), 1259 extra_headers: extra_headers.clone(), 1260 }; 1261 1262 if let Some(sign) = &mut sign_with { 1263 // we don't use gix pool, but at least use their heuristic 1264 let mut data = Vec::with_capacity(512); 1265 commit.write_to(&mut data).unwrap(); 1266 1267 let sig = sign(&data).map_err(|err| BackendError::WriteObject { 1268 object_type: "commit", 1269 source: Box::new(err), 1270 })?; 1271 commit 1272 .extra_headers 1273 .push(("gpgsig".into(), sig.clone().into())); 1274 contents.secure_sig = Some(SecureSig { data, sig }); 1275 } 1276 1277 let git_id = 1278 locked_repo 1279 .write_object(&commit) 1280 .map_err(|err| BackendError::WriteObject { 1281 object_type: "commit", 1282 source: Box::new(err), 1283 })?; 1284 1285 match table.get_value(git_id.as_bytes()) { 1286 Some(existing_extras) if existing_extras != extras => { 1287 // It's possible a commit already exists with the same commit id but different 1288 // change id. Adjust the timestamp until this is no longer the case. 1289 committer.time.seconds -= 1; 1290 } 1291 _ => break CommitId::from_bytes(git_id.as_bytes()), 1292 } 1293 }; 1294 1295 // Everything up to this point had no permanent effect on the repo except 1296 // GC-able objects 1297 locked_repo 1298 .edit_reference(to_no_gc_ref_update(&id)) 1299 .map_err(|err| BackendError::Other(Box::new(err)))?; 1300 1301 // Update the signature to match the one that was actually written to the object 1302 // store 1303 contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000); 1304 let mut mut_table = table.start_mutation(); 1305 mut_table.add_entry(id.to_bytes(), extras); 1306 self.save_extra_metadata_table(mut_table, &table_lock)?; 1307 Ok((id, contents)) 1308 } 1309 1310 fn get_copy_records( 1311 &self, 1312 paths: Option<&[RepoPathBuf]>, 1313 root_id: &CommitId, 1314 head_id: &CommitId, 1315 ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> { 1316 let repo = self.git_repo(); 1317 let root_tree = self.read_tree_for_commit(&repo, root_id)?; 1318 let head_tree = self.read_tree_for_commit(&repo, head_id)?; 1319 1320 let change_to_copy_record = 1321 |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> { 1322 let gix::object::tree::diff::Change::Rewrite { 1323 source_location, 1324 source_id, 1325 location: dest_location, 1326 .. 1327 } = change 1328 else { 1329 return Ok(None); 1330 }; 1331 1332 let source = str::from_utf8(source_location) 1333 .map_err(|err| to_invalid_utf8_err(err, root_id))?; 1334 let dest = str::from_utf8(dest_location) 1335 .map_err(|err| to_invalid_utf8_err(err, head_id))?; 1336 1337 let target = RepoPathBuf::from_internal_string(dest); 1338 if !paths.map_or(true, |paths| paths.contains(&target)) { 1339 return Ok(None); 1340 } 1341 1342 Ok(Some(CopyRecord { 1343 target, 1344 target_commit: head_id.clone(), 1345 source: RepoPathBuf::from_internal_string(source), 1346 source_file: FileId::from_bytes(source_id.as_bytes()), 1347 source_commit: root_id.clone(), 1348 })) 1349 }; 1350 1351 let mut records: Vec<BackendResult<CopyRecord>> = Vec::new(); 1352 root_tree 1353 .changes() 1354 .map_err(|err| BackendError::Other(err.into()))? 1355 .options(|opts| { 1356 opts.track_path().track_rewrites(Some(gix::diff::Rewrites { 1357 copies: Some(gix::diff::rewrites::Copies { 1358 source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles, 1359 percentage: Some(0.5), 1360 }), 1361 percentage: Some(0.5), 1362 limit: 1000, 1363 track_empty: false, 1364 })); 1365 }) 1366 .for_each_to_obtain_tree_with_cache( 1367 &head_tree, 1368 &mut self.new_diff_platform()?, 1369 |change| -> BackendResult<_> { 1370 match change_to_copy_record(change) { 1371 Ok(None) => {} 1372 Ok(Some(change)) => records.push(Ok(change)), 1373 Err(err) => records.push(Err(err)), 1374 } 1375 Ok(gix::object::tree::diff::Action::Continue) 1376 }, 1377 ) 1378 .map_err(|err| BackendError::Other(err.into()))?; 1379 Ok(Box::pin(futures::stream::iter(records))) 1380 } 1381 1382 #[tracing::instrument(skip(self, index))] 1383 fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { 1384 let git_repo = self.lock_git_repo(); 1385 let new_heads = index 1386 .all_heads_for_gc() 1387 .map_err(|err| BackendError::Other(err.into()))? 1388 .filter(|id| *id != self.root_commit_id); 1389 recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?; 1390 // TODO: remove unreachable entries from extras table if segment file 1391 // mtime <= keep_newer? (it won't be consistent with no-gc refs 1392 // preserved by the keep_newer timestamp though) 1393 // TODO: remove unreachable extras table segments 1394 // TODO: pass in keep_newer to "git gc" command 1395 run_git_gc(self.git_repo_path()).map_err(|err| BackendError::Other(err.into()))?; 1396 // Since "git gc" will move loose refs into packed refs, in-memory 1397 // packed-refs cache should be invalidated without relying on mtime. 1398 git_repo.refs.force_refresh_packed_buffer().ok(); 1399 Ok(()) 1400 } 1401} 1402 1403/// Write a tree conflict as a special tree with `.jjconflict-base-N` and 1404/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd. 1405fn write_tree_conflict( 1406 repo: &gix::Repository, 1407 conflict: &Merge<TreeId>, 1408) -> BackendResult<gix::ObjectId> { 1409 // Tree entries to be written must be sorted by Entry::filename(). 1410 let mut entries = itertools::chain( 1411 conflict 1412 .removes() 1413 .enumerate() 1414 .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)), 1415 conflict 1416 .adds() 1417 .enumerate() 1418 .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)), 1419 ) 1420 .map(|(name, tree_id)| gix::objs::tree::Entry { 1421 mode: gix::object::tree::EntryKind::Tree.into(), 1422 filename: name.into(), 1423 oid: tree_id.as_bytes().try_into().unwrap(), 1424 }) 1425 .collect_vec(); 1426 let readme_id = repo 1427 .write_blob( 1428 r#"This commit was made by jj, https://github.com/jj-vcs/jj. 1429The commit contains file conflicts, and therefore looks wrong when used with plain 1430Git or other tools that are unfamiliar with jj. 1431 1432The .jjconflict-* directories represent the different inputs to the conflict. 1433For details, see 1434https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details 1435 1436If you see this file in your working copy, it probably means that you used a 1437regular `git` command to check out a conflicted commit. Use `jj abandon` to 1438recover. 1439"#, 1440 ) 1441 .map_err(|err| { 1442 BackendError::Other(format!("Failed to write README for conflict tree: {err}").into()) 1443 })? 1444 .detach(); 1445 entries.push(gix::objs::tree::Entry { 1446 mode: gix::object::tree::EntryKind::Blob.into(), 1447 filename: "README".into(), 1448 oid: readme_id, 1449 }); 1450 entries.sort_unstable(); 1451 let id = repo 1452 .write_object(gix::objs::Tree { entries }) 1453 .map_err(|err| BackendError::WriteObject { 1454 object_type: "tree", 1455 source: Box::new(err), 1456 })?; 1457 Ok(id.detach()) 1458} 1459 1460fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value { 1461 serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect()) 1462} 1463 1464fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> { 1465 json.as_array() 1466 .unwrap() 1467 .iter() 1468 .map(conflict_term_from_json) 1469 .collect() 1470} 1471 1472fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value { 1473 serde_json::json!({ 1474 "value": tree_value_to_json(&part.value), 1475 }) 1476} 1477 1478fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm { 1479 let json_value = json.get("value").unwrap(); 1480 ConflictTerm { 1481 value: tree_value_from_json(json_value), 1482 } 1483} 1484 1485fn tree_value_to_json(value: &TreeValue) -> serde_json::Value { 1486 match value { 1487 TreeValue::File { id, executable } => serde_json::json!({ 1488 "file": { 1489 "id": id.hex(), 1490 "executable": executable, 1491 }, 1492 }), 1493 TreeValue::Symlink(id) => serde_json::json!({ 1494 "symlink_id": id.hex(), 1495 }), 1496 TreeValue::Tree(id) => serde_json::json!({ 1497 "tree_id": id.hex(), 1498 }), 1499 TreeValue::GitSubmodule(id) => serde_json::json!({ 1500 "submodule_id": id.hex(), 1501 }), 1502 TreeValue::Conflict(id) => serde_json::json!({ 1503 "conflict_id": id.hex(), 1504 }), 1505 } 1506} 1507 1508fn tree_value_from_json(json: &serde_json::Value) -> TreeValue { 1509 if let Some(json_file) = json.get("file") { 1510 TreeValue::File { 1511 id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())), 1512 executable: json_file.get("executable").unwrap().as_bool().unwrap(), 1513 } 1514 } else if let Some(json_id) = json.get("symlink_id") { 1515 TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id))) 1516 } else if let Some(json_id) = json.get("tree_id") { 1517 TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id))) 1518 } else if let Some(json_id) = json.get("submodule_id") { 1519 TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id))) 1520 } else if let Some(json_id) = json.get("conflict_id") { 1521 TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id))) 1522 } else { 1523 panic!("unexpected json value in conflict: {json:#?}"); 1524 } 1525} 1526 1527fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> { 1528 hex::decode(value.as_str().unwrap()).unwrap() 1529} 1530 1531#[cfg(test)] 1532mod tests { 1533 use assert_matches::assert_matches; 1534 use git2::Oid; 1535 use hex::ToHex; 1536 use pollster::FutureExt; 1537 use test_case::test_case; 1538 1539 use super::*; 1540 use crate::config::StackedConfig; 1541 use crate::content_hash::blake2b_hash; 1542 1543 #[test_case(false; "legacy tree format")] 1544 #[test_case(true; "tree-level conflict format")] 1545 fn read_plain_git_commit(uses_tree_conflict_format: bool) { 1546 let settings = user_settings(); 1547 let temp_dir = testutils::new_temp_dir(); 1548 let store_path = temp_dir.path(); 1549 let git_repo_path = temp_dir.path().join("git"); 1550 let git_repo = git2::Repository::init(git_repo_path).unwrap(); 1551 1552 // Add a commit with some files in 1553 let blob1 = git_repo.blob(b"content1").unwrap(); 1554 let blob2 = git_repo.blob(b"normal").unwrap(); 1555 let mut dir_tree_builder = git_repo.treebuilder(None).unwrap(); 1556 dir_tree_builder.insert("normal", blob1, 0o100644).unwrap(); 1557 dir_tree_builder.insert("symlink", blob2, 0o120000).unwrap(); 1558 let dir_tree_id = dir_tree_builder.write().unwrap(); 1559 let mut root_tree_builder = git_repo.treebuilder(None).unwrap(); 1560 root_tree_builder 1561 .insert("dir", dir_tree_id, 0o040000) 1562 .unwrap(); 1563 let root_tree_id = root_tree_builder.write().unwrap(); 1564 let git_author = git2::Signature::new( 1565 "git author", 1566 "git.author@example.com", 1567 &git2::Time::new(1000, 60), 1568 ) 1569 .unwrap(); 1570 let git_committer = git2::Signature::new( 1571 "git committer", 1572 "git.committer@example.com", 1573 &git2::Time::new(2000, -480), 1574 ) 1575 .unwrap(); 1576 let git_tree = git_repo.find_tree(root_tree_id).unwrap(); 1577 let git_commit_id = git_repo 1578 .commit( 1579 None, 1580 &git_author, 1581 &git_committer, 1582 "git commit message", 1583 &git_tree, 1584 &[], 1585 ) 1586 .unwrap(); 1587 let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263"); 1588 // The change id is the leading reverse bits of the commit id 1589 let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25"); 1590 // Check that the git commit above got the hash we expect 1591 assert_eq!(git_commit_id.as_bytes(), commit_id.as_bytes()); 1592 1593 // Add an empty commit on top 1594 let git_commit_id2 = git_repo 1595 .commit( 1596 None, 1597 &git_author, 1598 &git_committer, 1599 "git commit message 2", 1600 &git_tree, 1601 &[&git_repo.find_commit(git_commit_id).unwrap()], 1602 ) 1603 .unwrap(); 1604 let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes()); 1605 1606 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1607 1608 // Import the head commit and its ancestors 1609 backend 1610 .import_head_commits_with_tree_conflicts([&commit_id2], uses_tree_conflict_format) 1611 .unwrap(); 1612 // Ref should be created only for the head commit 1613 let git_refs = backend 1614 .open_git_repo() 1615 .unwrap() 1616 .references_glob("refs/jj/keep/*") 1617 .unwrap() 1618 .map(|git_ref| git_ref.unwrap().target().unwrap()) 1619 .collect_vec(); 1620 assert_eq!(git_refs, vec![git_commit_id2]); 1621 1622 let commit = backend.read_commit(&commit_id).block_on().unwrap(); 1623 assert_eq!(&commit.change_id, &change_id); 1624 assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]); 1625 assert_eq!(commit.predecessors, vec![]); 1626 assert_eq!( 1627 commit.root_tree.to_merge(), 1628 Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes())) 1629 ); 1630 if uses_tree_conflict_format { 1631 assert_matches!(commit.root_tree, MergedTreeId::Merge(_)); 1632 } else { 1633 assert_matches!(commit.root_tree, MergedTreeId::Legacy(_)); 1634 } 1635 assert_eq!(commit.description, "git commit message"); 1636 assert_eq!(commit.author.name, "git author"); 1637 assert_eq!(commit.author.email, "git.author@example.com"); 1638 assert_eq!( 1639 commit.author.timestamp.timestamp, 1640 MillisSinceEpoch(1000 * 1000) 1641 ); 1642 assert_eq!(commit.author.timestamp.tz_offset, 60); 1643 assert_eq!(commit.committer.name, "git committer"); 1644 assert_eq!(commit.committer.email, "git.committer@example.com"); 1645 assert_eq!( 1646 commit.committer.timestamp.timestamp, 1647 MillisSinceEpoch(2000 * 1000) 1648 ); 1649 assert_eq!(commit.committer.timestamp.tz_offset, -480); 1650 1651 let root_tree = backend 1652 .read_tree( 1653 RepoPath::root(), 1654 &TreeId::from_bytes(root_tree_id.as_bytes()), 1655 ) 1656 .block_on() 1657 .unwrap(); 1658 let mut root_entries = root_tree.entries(); 1659 let dir = root_entries.next().unwrap(); 1660 assert_eq!(root_entries.next(), None); 1661 assert_eq!(dir.name().as_internal_str(), "dir"); 1662 assert_eq!( 1663 dir.value(), 1664 &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes())) 1665 ); 1666 1667 let dir_tree = backend 1668 .read_tree( 1669 RepoPath::from_internal_string("dir"), 1670 &TreeId::from_bytes(dir_tree_id.as_bytes()), 1671 ) 1672 .block_on() 1673 .unwrap(); 1674 let mut entries = dir_tree.entries(); 1675 let file = entries.next().unwrap(); 1676 let symlink = entries.next().unwrap(); 1677 assert_eq!(entries.next(), None); 1678 assert_eq!(file.name().as_internal_str(), "normal"); 1679 assert_eq!( 1680 file.value(), 1681 &TreeValue::File { 1682 id: FileId::from_bytes(blob1.as_bytes()), 1683 executable: false 1684 } 1685 ); 1686 assert_eq!(symlink.name().as_internal_str(), "symlink"); 1687 assert_eq!( 1688 symlink.value(), 1689 &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes())) 1690 ); 1691 1692 let commit2 = backend.read_commit(&commit_id2).block_on().unwrap(); 1693 assert_eq!(commit2.parents, vec![commit_id.clone()]); 1694 assert_eq!(commit.predecessors, vec![]); 1695 assert_eq!( 1696 commit.root_tree.to_merge(), 1697 Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes())) 1698 ); 1699 if uses_tree_conflict_format { 1700 assert_matches!(commit.root_tree, MergedTreeId::Merge(_)); 1701 } else { 1702 assert_matches!(commit.root_tree, MergedTreeId::Legacy(_)); 1703 } 1704 } 1705 1706 #[test] 1707 fn read_git_commit_without_importing() { 1708 let settings = user_settings(); 1709 let temp_dir = testutils::new_temp_dir(); 1710 let store_path = temp_dir.path(); 1711 let git_repo_path = temp_dir.path().join("git"); 1712 let git_repo = git2::Repository::init(git_repo_path).unwrap(); 1713 1714 let signature = git2::Signature::now("Someone", "someone@example.com").unwrap(); 1715 let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap(); 1716 let empty_tree = git_repo.find_tree(empty_tree_id).unwrap(); 1717 let git_commit_id = git_repo 1718 .commit( 1719 Some("refs/heads/main"), 1720 &signature, 1721 &signature, 1722 "git commit message", 1723 &empty_tree, 1724 &[], 1725 ) 1726 .unwrap(); 1727 1728 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1729 1730 // read_commit() without import_head_commits() works as of now. This might be 1731 // changed later. 1732 assert!(backend 1733 .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes())) 1734 .block_on() 1735 .is_ok()); 1736 assert!( 1737 backend 1738 .cached_extra_metadata_table() 1739 .unwrap() 1740 .get_value(git_commit_id.as_bytes()) 1741 .is_some(), 1742 "extra metadata should have been be created" 1743 ); 1744 } 1745 1746 #[test] 1747 fn read_signed_git_commit() { 1748 let settings = user_settings(); 1749 let temp_dir = testutils::new_temp_dir(); 1750 let store_path = temp_dir.path(); 1751 let git_repo_path = temp_dir.path().join("git"); 1752 let git_repo = git2::Repository::init(git_repo_path).unwrap(); 1753 1754 let signature = git2::Signature::now("Someone", "someone@example.com").unwrap(); 1755 let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap(); 1756 let empty_tree = git_repo.find_tree(empty_tree_id).unwrap(); 1757 1758 let commit_buf = git_repo 1759 .commit_create_buffer( 1760 &signature, 1761 &signature, 1762 "git commit message", 1763 &empty_tree, 1764 &[], 1765 ) 1766 .unwrap(); 1767 1768 // libgit2-rs works with &strs here for some reason 1769 let commit_buf = std::str::from_utf8(&commit_buf).unwrap(); 1770 let secure_sig = 1771 "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n"; 1772 1773 // git2 appears to append newline unconditionally 1774 let git_commit_id = git_repo 1775 .commit_signed(commit_buf, secure_sig.trim_end_matches('\n'), None) 1776 .unwrap(); 1777 1778 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1779 1780 let commit = backend 1781 .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes())) 1782 .block_on() 1783 .unwrap(); 1784 1785 let sig = commit.secure_sig.expect("failed to read the signature"); 1786 1787 // converting to string for nicer assert diff 1788 assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig); 1789 assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_buf); 1790 } 1791 1792 #[test] 1793 fn read_empty_string_placeholder() { 1794 let git_signature1 = gix::actor::SignatureRef { 1795 name: EMPTY_STRING_PLACEHOLDER.into(), 1796 email: "git.author@example.com".into(), 1797 time: gix::date::Time::new(1000, 60 * 60), 1798 }; 1799 let signature1 = signature_from_git(git_signature1); 1800 assert!(signature1.name.is_empty()); 1801 assert_eq!(signature1.email, "git.author@example.com"); 1802 let git_signature2 = gix::actor::SignatureRef { 1803 name: "git committer".into(), 1804 email: EMPTY_STRING_PLACEHOLDER.into(), 1805 time: gix::date::Time::new(2000, -480 * 60), 1806 }; 1807 let signature2 = signature_from_git(git_signature2); 1808 assert_eq!(signature2.name, "git committer"); 1809 assert!(signature2.email.is_empty()); 1810 } 1811 1812 #[test] 1813 fn write_empty_string_placeholder() { 1814 let signature1 = Signature { 1815 name: "".to_string(), 1816 email: "someone@example.com".to_string(), 1817 timestamp: Timestamp { 1818 timestamp: MillisSinceEpoch(0), 1819 tz_offset: 0, 1820 }, 1821 }; 1822 let git_signature1 = signature_to_git(&signature1); 1823 assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER); 1824 assert_eq!(git_signature1.email, "someone@example.com"); 1825 let signature2 = Signature { 1826 name: "Someone".to_string(), 1827 email: "".to_string(), 1828 timestamp: Timestamp { 1829 timestamp: MillisSinceEpoch(0), 1830 tz_offset: 0, 1831 }, 1832 }; 1833 let git_signature2 = signature_to_git(&signature2); 1834 assert_eq!(git_signature2.name, "Someone"); 1835 assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER); 1836 } 1837 1838 /// Test that parents get written correctly 1839 #[test] 1840 fn git_commit_parents() { 1841 let settings = user_settings(); 1842 let temp_dir = testutils::new_temp_dir(); 1843 let store_path = temp_dir.path(); 1844 let git_repo_path = temp_dir.path().join("git"); 1845 let git_repo = git2::Repository::init(git_repo_path).unwrap(); 1846 1847 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1848 let mut commit = Commit { 1849 parents: vec![], 1850 predecessors: vec![], 1851 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 1852 change_id: ChangeId::from_hex("abc123"), 1853 description: "".to_string(), 1854 author: create_signature(), 1855 committer: create_signature(), 1856 secure_sig: None, 1857 }; 1858 1859 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> { 1860 backend.write_commit(commit, None).block_on() 1861 }; 1862 1863 // No parents 1864 commit.parents = vec![]; 1865 assert_matches!( 1866 write_commit(commit.clone()), 1867 Err(BackendError::Other(err)) if err.to_string().contains("no parents") 1868 ); 1869 1870 // Only root commit as parent 1871 commit.parents = vec![backend.root_commit_id().clone()]; 1872 let first_id = write_commit(commit.clone()).unwrap().0; 1873 let first_commit = backend.read_commit(&first_id).block_on().unwrap(); 1874 assert_eq!(first_commit, commit); 1875 let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap(); 1876 assert_eq!(first_git_commit.parent_ids().collect_vec(), vec![]); 1877 1878 // Only non-root commit as parent 1879 commit.parents = vec![first_id.clone()]; 1880 let second_id = write_commit(commit.clone()).unwrap().0; 1881 let second_commit = backend.read_commit(&second_id).block_on().unwrap(); 1882 assert_eq!(second_commit, commit); 1883 let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap(); 1884 assert_eq!( 1885 second_git_commit.parent_ids().collect_vec(), 1886 vec![git_id(&first_id)] 1887 ); 1888 1889 // Merge commit 1890 commit.parents = vec![first_id.clone(), second_id.clone()]; 1891 let merge_id = write_commit(commit.clone()).unwrap().0; 1892 let merge_commit = backend.read_commit(&merge_id).block_on().unwrap(); 1893 assert_eq!(merge_commit, commit); 1894 let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap(); 1895 assert_eq!( 1896 merge_git_commit.parent_ids().collect_vec(), 1897 vec![git_id(&first_id), git_id(&second_id)] 1898 ); 1899 1900 // Merge commit with root as one parent 1901 commit.parents = vec![first_id, backend.root_commit_id().clone()]; 1902 assert_matches!( 1903 write_commit(commit), 1904 Err(BackendError::Unsupported(message)) if message.contains("root commit") 1905 ); 1906 } 1907 1908 #[test] 1909 fn write_tree_conflicts() { 1910 let settings = user_settings(); 1911 let temp_dir = testutils::new_temp_dir(); 1912 let store_path = temp_dir.path(); 1913 let git_repo_path = temp_dir.path().join("git"); 1914 let git_repo = git2::Repository::init(git_repo_path).unwrap(); 1915 1916 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1917 let create_tree = |i| { 1918 let blob_id = git_repo.blob(b"content {i}").unwrap(); 1919 let mut tree_builder = git_repo.treebuilder(None).unwrap(); 1920 tree_builder 1921 .insert(format!("file{i}"), blob_id, 0o100644) 1922 .unwrap(); 1923 TreeId::from_bytes(tree_builder.write().unwrap().as_bytes()) 1924 }; 1925 1926 let root_tree = Merge::from_removes_adds( 1927 vec![create_tree(0), create_tree(1)], 1928 vec![create_tree(2), create_tree(3), create_tree(4)], 1929 ); 1930 let mut commit = Commit { 1931 parents: vec![backend.root_commit_id().clone()], 1932 predecessors: vec![], 1933 root_tree: MergedTreeId::Merge(root_tree.clone()), 1934 change_id: ChangeId::from_hex("abc123"), 1935 description: "".to_string(), 1936 author: create_signature(), 1937 committer: create_signature(), 1938 secure_sig: None, 1939 }; 1940 1941 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> { 1942 backend.write_commit(commit, None).block_on() 1943 }; 1944 1945 // When writing a tree-level conflict, the root tree on the git side has the 1946 // individual trees as subtrees. 1947 let read_commit_id = write_commit(commit.clone()).unwrap().0; 1948 let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap(); 1949 assert_eq!(read_commit, commit); 1950 let git_commit = git_repo 1951 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap()) 1952 .unwrap(); 1953 let git_tree = git_repo.find_tree(git_commit.tree_id()).unwrap(); 1954 assert!(git_tree 1955 .iter() 1956 .filter(|entry| entry.name() != Some("README")) 1957 .all(|entry| entry.filemode() == 0o040000)); 1958 let mut iter = git_tree.iter(); 1959 let entry = iter.next().unwrap(); 1960 assert_eq!(entry.name(), Some(".jjconflict-base-0")); 1961 assert_eq!( 1962 entry.id().as_bytes(), 1963 root_tree.get_remove(0).unwrap().as_bytes() 1964 ); 1965 let entry = iter.next().unwrap(); 1966 assert_eq!(entry.name(), Some(".jjconflict-base-1")); 1967 assert_eq!( 1968 entry.id().as_bytes(), 1969 root_tree.get_remove(1).unwrap().as_bytes() 1970 ); 1971 let entry = iter.next().unwrap(); 1972 assert_eq!(entry.name(), Some(".jjconflict-side-0")); 1973 assert_eq!( 1974 entry.id().as_bytes(), 1975 root_tree.get_add(0).unwrap().as_bytes() 1976 ); 1977 let entry = iter.next().unwrap(); 1978 assert_eq!(entry.name(), Some(".jjconflict-side-1")); 1979 assert_eq!( 1980 entry.id().as_bytes(), 1981 root_tree.get_add(1).unwrap().as_bytes() 1982 ); 1983 let entry = iter.next().unwrap(); 1984 assert_eq!(entry.name(), Some(".jjconflict-side-2")); 1985 assert_eq!( 1986 entry.id().as_bytes(), 1987 root_tree.get_add(2).unwrap().as_bytes() 1988 ); 1989 let entry = iter.next().unwrap(); 1990 assert_eq!(entry.name(), Some("README")); 1991 assert_eq!(entry.filemode(), 0o100644); 1992 assert!(iter.next().is_none()); 1993 1994 // When writing a single tree using the new format, it's represented by a 1995 // regular git tree. 1996 commit.root_tree = MergedTreeId::resolved(create_tree(5)); 1997 let read_commit_id = write_commit(commit.clone()).unwrap().0; 1998 let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap(); 1999 assert_eq!(read_commit, commit); 2000 let git_commit = git_repo 2001 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap()) 2002 .unwrap(); 2003 assert_eq!( 2004 MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().as_bytes())), 2005 commit.root_tree 2006 ); 2007 } 2008 2009 #[test] 2010 fn commit_has_ref() { 2011 let settings = user_settings(); 2012 let temp_dir = testutils::new_temp_dir(); 2013 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2014 let git_repo = backend.open_git_repo().unwrap(); 2015 let signature = Signature { 2016 name: "Someone".to_string(), 2017 email: "someone@example.com".to_string(), 2018 timestamp: Timestamp { 2019 timestamp: MillisSinceEpoch(0), 2020 tz_offset: 0, 2021 }, 2022 }; 2023 let commit = Commit { 2024 parents: vec![backend.root_commit_id().clone()], 2025 predecessors: vec![], 2026 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 2027 change_id: ChangeId::new(vec![]), 2028 description: "initial".to_string(), 2029 author: signature.clone(), 2030 committer: signature, 2031 secure_sig: None, 2032 }; 2033 let commit_id = backend.write_commit(commit, None).block_on().unwrap().0; 2034 let git_refs: Vec<_> = git_repo 2035 .references_glob("refs/jj/keep/*") 2036 .unwrap() 2037 .try_collect() 2038 .unwrap(); 2039 assert!(git_refs 2040 .iter() 2041 .any(|git_ref| git_ref.target().unwrap() == git_id(&commit_id))); 2042 2043 // Concurrently-running GC deletes the ref, leaving the extra metadata. 2044 for mut git_ref in git_refs { 2045 git_ref.delete().unwrap(); 2046 } 2047 // Re-imported commit should have new ref. 2048 backend.import_head_commits([&commit_id]).unwrap(); 2049 let git_refs: Vec<_> = git_repo 2050 .references_glob("refs/jj/keep/*") 2051 .unwrap() 2052 .try_collect() 2053 .unwrap(); 2054 assert!(git_refs 2055 .iter() 2056 .any(|git_ref| git_ref.target().unwrap() == git_id(&commit_id))); 2057 } 2058 2059 #[test] 2060 fn import_head_commits_duplicates() { 2061 let settings = user_settings(); 2062 let temp_dir = testutils::new_temp_dir(); 2063 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2064 let git_repo = backend.open_git_repo().unwrap(); 2065 2066 let signature = git2::Signature::now("Someone", "someone@example.com").unwrap(); 2067 let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap(); 2068 let empty_tree = git_repo.find_tree(empty_tree_id).unwrap(); 2069 let git_commit_id = git_repo 2070 .commit( 2071 Some("refs/heads/main"), 2072 &signature, 2073 &signature, 2074 "git commit message", 2075 &empty_tree, 2076 &[], 2077 ) 2078 .unwrap(); 2079 let commit_id = CommitId::from_bytes(git_commit_id.as_bytes()); 2080 2081 // Ref creation shouldn't fail because of duplicated head ids. 2082 backend 2083 .import_head_commits([&commit_id, &commit_id]) 2084 .unwrap(); 2085 let git_refs: Vec<_> = git_repo 2086 .references_glob("refs/jj/keep/*") 2087 .unwrap() 2088 .try_collect() 2089 .unwrap(); 2090 assert!(git_refs 2091 .iter() 2092 .any(|git_ref| git_ref.target().unwrap() == git_commit_id)); 2093 } 2094 2095 #[test] 2096 fn overlapping_git_commit_id() { 2097 let settings = user_settings(); 2098 let temp_dir = testutils::new_temp_dir(); 2099 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2100 let mut commit1 = Commit { 2101 parents: vec![backend.root_commit_id().clone()], 2102 predecessors: vec![], 2103 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 2104 change_id: ChangeId::new(vec![]), 2105 description: "initial".to_string(), 2106 author: create_signature(), 2107 committer: create_signature(), 2108 secure_sig: None, 2109 }; 2110 2111 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> { 2112 backend.write_commit(commit, None).block_on() 2113 }; 2114 2115 // libgit2 doesn't seem to preserve negative timestamps, so set it to at least 1 2116 // second after the epoch, so the timestamp adjustment can remove 1 2117 // second and it will still be nonnegative 2118 commit1.committer.timestamp.timestamp = MillisSinceEpoch(1000); 2119 let (commit_id1, mut commit2) = write_commit(commit1).unwrap(); 2120 commit2.predecessors.push(commit_id1.clone()); 2121 // `write_commit` should prevent the ids from being the same by changing the 2122 // committer timestamp of the commit it actually writes. 2123 let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap(); 2124 // The returned matches the ID 2125 assert_eq!( 2126 backend.read_commit(&commit_id2).block_on().unwrap(), 2127 actual_commit2 2128 ); 2129 assert_ne!(commit_id2, commit_id1); 2130 // The committer timestamp should differ 2131 assert_ne!( 2132 actual_commit2.committer.timestamp.timestamp, 2133 commit2.committer.timestamp.timestamp 2134 ); 2135 // The rest of the commit should be the same 2136 actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp; 2137 assert_eq!(actual_commit2, commit2); 2138 } 2139 2140 #[test] 2141 fn write_signed_commit() { 2142 let settings = user_settings(); 2143 let temp_dir = testutils::new_temp_dir(); 2144 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2145 2146 let commit = Commit { 2147 parents: vec![backend.root_commit_id().clone()], 2148 predecessors: vec![], 2149 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 2150 change_id: ChangeId::new(vec![]), 2151 description: "initial".to_string(), 2152 author: create_signature(), 2153 committer: create_signature(), 2154 secure_sig: None, 2155 }; 2156 2157 let mut signer = |data: &_| { 2158 let hash: String = blake2b_hash(data).encode_hex(); 2159 Ok(format!("test sig\n\n\nhash={hash}\n").into_bytes()) 2160 }; 2161 2162 let (id, commit) = backend 2163 .write_commit(commit, Some(&mut signer as &mut SigningFn)) 2164 .block_on() 2165 .unwrap(); 2166 2167 let git_repo = backend.git_repo(); 2168 let obj = git_repo 2169 .find_object(gix::ObjectId::try_from(id.as_bytes()).unwrap()) 2170 .unwrap(); 2171 insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r###" 2172 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 2173 author Someone <someone@example.com> 0 +0000 2174 committer Someone <someone@example.com> 0 +0000 2175 gpgsig test sig 2176 2177 2178 hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518 2179 2180 initial 2181 "###); 2182 2183 let returned_sig = commit.secure_sig.expect("failed to return the signature"); 2184 2185 let commit = backend.read_commit(&id).block_on().unwrap(); 2186 2187 let sig = commit.secure_sig.expect("failed to read the signature"); 2188 assert_eq!(&sig, &returned_sig); 2189 2190 insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r###" 2191 test sig 2192 2193 2194 hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518 2195 "###); 2196 insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r###" 2197 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 2198 author Someone <someone@example.com> 0 +0000 2199 committer Someone <someone@example.com> 0 +0000 2200 2201 initial 2202 "###); 2203 } 2204 2205 fn git_id(commit_id: &CommitId) -> Oid { 2206 Oid::from_bytes(commit_id.as_bytes()).unwrap() 2207 } 2208 2209 fn create_signature() -> Signature { 2210 Signature { 2211 name: "Someone".to_string(), 2212 email: "someone@example.com".to_string(), 2213 timestamp: Timestamp { 2214 timestamp: MillisSinceEpoch(0), 2215 tz_offset: 0, 2216 }, 2217 } 2218 } 2219 2220 // Not using testutils::user_settings() because there is a dependency cycle 2221 // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct 2222 // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas 2223 // our UserSettings type comes from jj_lib (1). 2224 fn user_settings() -> UserSettings { 2225 let config = StackedConfig::with_defaults(); 2226 UserSettings::from_config(config).unwrap() 2227 } 2228}