just playing with tangled
1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::collections::HashSet;
19use std::fmt::Debug;
20use std::fmt::Error;
21use std::fmt::Formatter;
22use std::fs;
23use std::io;
24use std::io::Cursor;
25use std::io::Read;
26use std::path::Path;
27use std::path::PathBuf;
28use std::process::Command;
29use std::process::ExitStatus;
30use std::str;
31use std::sync::Arc;
32use std::sync::Mutex;
33use std::sync::MutexGuard;
34use std::time::SystemTime;
35
36use async_trait::async_trait;
37use futures::stream::BoxStream;
38use gix::bstr::BString;
39use gix::objs::CommitRef;
40use gix::objs::CommitRefIter;
41use gix::objs::WriteTo;
42use itertools::Itertools;
43use pollster::FutureExt;
44use prost::Message;
45use smallvec::SmallVec;
46use thiserror::Error;
47
48use crate::backend::make_root_commit;
49use crate::backend::Backend;
50use crate::backend::BackendError;
51use crate::backend::BackendInitError;
52use crate::backend::BackendLoadError;
53use crate::backend::BackendResult;
54use crate::backend::ChangeId;
55use crate::backend::Commit;
56use crate::backend::CommitId;
57use crate::backend::Conflict;
58use crate::backend::ConflictId;
59use crate::backend::ConflictTerm;
60use crate::backend::CopyRecord;
61use crate::backend::FileId;
62use crate::backend::MergedTreeId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::SecureSig;
65use crate::backend::Signature;
66use crate::backend::SigningFn;
67use crate::backend::SymlinkId;
68use crate::backend::Timestamp;
69use crate::backend::Tree;
70use crate::backend::TreeId;
71use crate::backend::TreeValue;
72use crate::file_util::IoResultExt as _;
73use crate::file_util::PathError;
74use crate::index::Index;
75use crate::lock::FileLock;
76use crate::merge::Merge;
77use crate::merge::MergeBuilder;
78use crate::object_id::ObjectId;
79use crate::repo_path::RepoPath;
80use crate::repo_path::RepoPathBuf;
81use crate::repo_path::RepoPathComponentBuf;
82use crate::settings::UserSettings;
83use crate::stacked_table::MutableTable;
84use crate::stacked_table::ReadonlyTable;
85use crate::stacked_table::TableSegment;
86use crate::stacked_table::TableStore;
87use crate::stacked_table::TableStoreError;
88
89const HASH_LENGTH: usize = 20;
90const CHANGE_ID_LENGTH: usize = 16;
91/// Ref namespace used only for preventing GC.
92const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
93const CONFLICT_SUFFIX: &str = ".jjconflict";
94
95const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees";
96
97#[derive(Debug, Error)]
98pub enum GitBackendInitError {
99 #[error("Failed to initialize git repository")]
100 InitRepository(#[source] gix::init::Error),
101 #[error("Failed to open git repository")]
102 OpenRepository(#[source] gix::open::Error),
103 #[error(transparent)]
104 Path(PathError),
105}
106
107impl From<Box<GitBackendInitError>> for BackendInitError {
108 fn from(err: Box<GitBackendInitError>) -> Self {
109 BackendInitError(err)
110 }
111}
112
113#[derive(Debug, Error)]
114pub enum GitBackendLoadError {
115 #[error("Failed to open git repository")]
116 OpenRepository(#[source] gix::open::Error),
117 #[error(transparent)]
118 Path(PathError),
119}
120
121impl From<Box<GitBackendLoadError>> for BackendLoadError {
122 fn from(err: Box<GitBackendLoadError>) -> Self {
123 BackendLoadError(err)
124 }
125}
126
127/// `GitBackend`-specific error that may occur after the backend is loaded.
128#[derive(Debug, Error)]
129pub enum GitBackendError {
130 #[error("Failed to read non-git metadata")]
131 ReadMetadata(#[source] TableStoreError),
132 #[error("Failed to write non-git metadata")]
133 WriteMetadata(#[source] TableStoreError),
134}
135
136impl From<GitBackendError> for BackendError {
137 fn from(err: GitBackendError) -> Self {
138 BackendError::Other(err.into())
139 }
140}
141
142#[derive(Debug, Error)]
143pub enum GitGcError {
144 #[error("Failed to run git gc command")]
145 GcCommand(#[source] std::io::Error),
146 #[error("git gc command exited with an error: {0}")]
147 GcCommandErrorStatus(ExitStatus),
148}
149
150pub struct GitBackend {
151 // While gix::Repository can be created from gix::ThreadSafeRepository, it's
152 // cheaper to cache the thread-local instance behind a mutex than creating
153 // one for each backend method call. Our GitBackend is most likely to be
154 // used in a single-threaded context.
155 base_repo: gix::ThreadSafeRepository,
156 repo: Mutex<gix::Repository>,
157 root_commit_id: CommitId,
158 root_change_id: ChangeId,
159 empty_tree_id: TreeId,
160 extra_metadata_store: TableStore,
161 cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
162}
163
164impl GitBackend {
165 pub fn name() -> &'static str {
166 "git"
167 }
168
169 fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self {
170 let repo = Mutex::new(base_repo.to_thread_local());
171 let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
172 let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
173 let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
174 GitBackend {
175 base_repo,
176 repo,
177 root_commit_id,
178 root_change_id,
179 empty_tree_id,
180 extra_metadata_store,
181 cached_extra_metadata: Mutex::new(None),
182 }
183 }
184
185 pub fn init_internal(
186 settings: &UserSettings,
187 store_path: &Path,
188 ) -> Result<Self, Box<GitBackendInitError>> {
189 let git_repo_path = Path::new("git");
190 let git_repo = gix::ThreadSafeRepository::init_opts(
191 store_path.join(git_repo_path),
192 gix::create::Kind::Bare,
193 gix::create::Options::default(),
194 gix_open_opts_from_settings(settings),
195 )
196 .map_err(GitBackendInitError::InitRepository)?;
197 Self::init_with_repo(store_path, git_repo_path, git_repo)
198 }
199
200 /// Initializes backend by creating a new Git repo at the specified
201 /// workspace path. The workspace directory must exist.
202 pub fn init_colocated(
203 settings: &UserSettings,
204 store_path: &Path,
205 workspace_root: &Path,
206 ) -> Result<Self, Box<GitBackendInitError>> {
207 let canonical_workspace_root = {
208 let path = store_path.join(workspace_root);
209 dunce::canonicalize(&path)
210 .context(&path)
211 .map_err(GitBackendInitError::Path)?
212 };
213 let git_repo = gix::ThreadSafeRepository::init_opts(
214 canonical_workspace_root,
215 gix::create::Kind::WithWorktree,
216 gix::create::Options::default(),
217 gix_open_opts_from_settings(settings),
218 )
219 .map_err(GitBackendInitError::InitRepository)?;
220 let git_repo_path = workspace_root.join(".git");
221 Self::init_with_repo(store_path, &git_repo_path, git_repo)
222 }
223
224 /// Initializes backend with an existing Git repo at the specified path.
225 pub fn init_external(
226 settings: &UserSettings,
227 store_path: &Path,
228 git_repo_path: &Path,
229 ) -> Result<Self, Box<GitBackendInitError>> {
230 let canonical_git_repo_path = {
231 let path = store_path.join(git_repo_path);
232 canonicalize_git_repo_path(&path)
233 .context(&path)
234 .map_err(GitBackendInitError::Path)?
235 };
236 let git_repo = gix::ThreadSafeRepository::open_opts(
237 canonical_git_repo_path,
238 gix_open_opts_from_settings(settings),
239 )
240 .map_err(GitBackendInitError::OpenRepository)?;
241 Self::init_with_repo(store_path, git_repo_path, git_repo)
242 }
243
244 fn init_with_repo(
245 store_path: &Path,
246 git_repo_path: &Path,
247 git_repo: gix::ThreadSafeRepository,
248 ) -> Result<Self, Box<GitBackendInitError>> {
249 let extra_path = store_path.join("extra");
250 fs::create_dir(&extra_path)
251 .context(&extra_path)
252 .map_err(GitBackendInitError::Path)?;
253 let target_path = store_path.join("git_target");
254 if cfg!(windows) && git_repo_path.is_relative() {
255 // When a repository is created in Windows, format the path with *forward
256 // slashes* and not backwards slashes. This makes it possible to use the same
257 // repository under Windows Subsystem for Linux.
258 //
259 // This only works for relative paths. If the path is absolute, there's not much
260 // we can do, and it simply won't work inside and outside WSL at the same time.
261 let git_repo_path_string = git_repo_path
262 .components()
263 .map(|component| component.as_os_str().to_str().unwrap().to_owned())
264 .join("/");
265 fs::write(&target_path, git_repo_path_string.as_bytes())
266 .context(&target_path)
267 .map_err(GitBackendInitError::Path)?;
268 } else {
269 fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes())
270 .context(&target_path)
271 .map_err(GitBackendInitError::Path)?;
272 };
273 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
274 Ok(GitBackend::new(git_repo, extra_metadata_store))
275 }
276
277 pub fn load(
278 settings: &UserSettings,
279 store_path: &Path,
280 ) -> Result<Self, Box<GitBackendLoadError>> {
281 let git_repo_path = {
282 let target_path = store_path.join("git_target");
283 let git_repo_path_str = fs::read_to_string(&target_path)
284 .context(&target_path)
285 .map_err(GitBackendLoadError::Path)?;
286 let git_repo_path = store_path.join(git_repo_path_str);
287 canonicalize_git_repo_path(&git_repo_path)
288 .context(&git_repo_path)
289 .map_err(GitBackendLoadError::Path)?
290 };
291 let repo = gix::ThreadSafeRepository::open_opts(
292 git_repo_path,
293 gix_open_opts_from_settings(settings),
294 )
295 .map_err(GitBackendLoadError::OpenRepository)?;
296 let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
297 Ok(GitBackend::new(repo, extra_metadata_store))
298 }
299
300 fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
301 self.repo.lock().unwrap()
302 }
303
304 /// Returns new thread-local instance to access to the underlying Git repo.
305 pub fn git_repo(&self) -> gix::Repository {
306 self.base_repo.to_thread_local()
307 }
308
309 /// Creates new owned git repository instance.
310 pub fn open_git_repo(&self) -> Result<git2::Repository, git2::Error> {
311 git2::Repository::open(self.git_repo_path())
312 }
313
314 /// Path to the `.git` directory or the repository itself if it's bare.
315 pub fn git_repo_path(&self) -> &Path {
316 self.base_repo.path()
317 }
318
319 /// Path to the working directory if the repository isn't bare.
320 pub fn git_workdir(&self) -> Option<&Path> {
321 self.base_repo.work_dir()
322 }
323
324 fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
325 let mut locked_head = self.cached_extra_metadata.lock().unwrap();
326 match locked_head.as_ref() {
327 Some(head) => Ok(head.clone()),
328 None => {
329 let table = self
330 .extra_metadata_store
331 .get_head()
332 .map_err(GitBackendError::ReadMetadata)?;
333 *locked_head = Some(table.clone());
334 Ok(table)
335 }
336 }
337 }
338
339 fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
340 let table = self
341 .extra_metadata_store
342 .get_head_locked()
343 .map_err(GitBackendError::ReadMetadata)?;
344 Ok(table)
345 }
346
347 fn save_extra_metadata_table(
348 &self,
349 mut_table: MutableTable,
350 _table_lock: &FileLock,
351 ) -> BackendResult<()> {
352 let table = self
353 .extra_metadata_store
354 .save_table(mut_table)
355 .map_err(GitBackendError::WriteMetadata)?;
356 // Since the parent table was the head, saved table are likely to be new head.
357 // If it's not, cache will be reloaded when entry can't be found.
358 *self.cached_extra_metadata.lock().unwrap() = Some(table);
359 Ok(())
360 }
361
362 /// Imports the given commits and ancestors from the backing Git repo.
363 ///
364 /// The `head_ids` may contain commits that have already been imported, but
365 /// the caller should filter them out to eliminate redundant I/O processing.
366 #[tracing::instrument(skip(self, head_ids))]
367 pub fn import_head_commits<'a>(
368 &self,
369 head_ids: impl IntoIterator<Item = &'a CommitId>,
370 ) -> BackendResult<()> {
371 self.import_head_commits_with_tree_conflicts(head_ids, true)
372 }
373
374 fn import_head_commits_with_tree_conflicts<'a>(
375 &self,
376 head_ids: impl IntoIterator<Item = &'a CommitId>,
377 uses_tree_conflict_format: bool,
378 ) -> BackendResult<()> {
379 let head_ids: HashSet<&CommitId> = head_ids
380 .into_iter()
381 .filter(|&id| *id != self.root_commit_id)
382 .collect();
383 if head_ids.is_empty() {
384 return Ok(());
385 }
386
387 // Create no-gc ref even if known to the extras table. Concurrent GC
388 // process might have deleted the no-gc ref.
389 let locked_repo = self.lock_git_repo();
390 locked_repo
391 .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update))
392 .map_err(|err| BackendError::Other(Box::new(err)))?;
393
394 // These commits are imported from Git. Make our change ids persist (otherwise
395 // future write_commit() could reassign new change id.)
396 tracing::debug!(
397 heads_count = head_ids.len(),
398 "import extra metadata entries"
399 );
400 let (table, table_lock) = self.read_extra_metadata_table_locked()?;
401 let mut mut_table = table.start_mutation();
402 import_extra_metadata_entries_from_heads(
403 &locked_repo,
404 &mut mut_table,
405 &table_lock,
406 &head_ids,
407 uses_tree_conflict_format,
408 )?;
409 self.save_extra_metadata_table(mut_table, &table_lock)
410 }
411
412 fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> {
413 let git_blob_id = validate_git_object_id(id)?;
414 let locked_repo = self.lock_git_repo();
415 let mut blob = locked_repo
416 .find_object(git_blob_id)
417 .map_err(|err| map_not_found_err(err, id))?
418 .try_into_blob()
419 .map_err(|err| to_read_object_err(err, id))?;
420 Ok(Box::new(Cursor::new(blob.take_data())))
421 }
422
423 fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> {
424 let attributes = gix::worktree::Stack::new(
425 Path::new(""),
426 gix::worktree::stack::State::AttributesStack(Default::default()),
427 gix::worktree::glob::pattern::Case::Sensitive,
428 Vec::new(),
429 Vec::new(),
430 );
431 let filter = gix::diff::blob::Pipeline::new(
432 Default::default(),
433 gix::filter::plumbing::Pipeline::new(
434 self.git_repo()
435 .command_context()
436 .map_err(|err| BackendError::Other(Box::new(err)))?,
437 Default::default(),
438 ),
439 Vec::new(),
440 Default::default(),
441 );
442 Ok(gix::diff::blob::Platform::new(
443 Default::default(),
444 filter,
445 gix::diff::blob::pipeline::Mode::ToGit,
446 attributes,
447 ))
448 }
449
450 fn read_tree_for_commit<'repo>(
451 &self,
452 repo: &'repo gix::Repository,
453 id: &CommitId,
454 ) -> BackendResult<gix::Tree<'repo>> {
455 let tree = self.read_commit(id).block_on()?.root_tree.to_merge();
456 // TODO(kfm): probably want to do something here if it is a merge
457 let tree_id = tree.first().clone();
458 let gix_id = validate_git_object_id(&tree_id)?;
459 repo.find_object(gix_id)
460 .map_err(|err| map_not_found_err(err, &tree_id))?
461 .try_into_tree()
462 .map_err(|err| to_read_object_err(err, &tree_id))
463 }
464}
465
466/// Canonicalizes the given `path` except for the last `".git"` component.
467///
468/// The last path component matters when opening a Git repo without `core.bare`
469/// config. This config is usually set, but the "repo" tool will set up such
470/// repositories and symlinks. Opening such repo with fully-canonicalized path
471/// would turn a colocated Git repo into a bare repo.
472pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> {
473 if path.ends_with(".git") {
474 let workdir = path.parent().unwrap();
475 dunce::canonicalize(workdir).map(|dir| dir.join(".git"))
476 } else {
477 dunce::canonicalize(path)
478 }
479}
480
481fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
482 let user_name = settings.user_name();
483 let user_email = settings.user_email();
484 gix::open::Options::default()
485 .config_overrides([
486 // Committer has to be configured to record reflog. Author isn't
487 // needed, but let's copy the same values.
488 format!("author.name={user_name}"),
489 format!("author.email={user_email}"),
490 format!("committer.name={user_name}"),
491 format!("committer.email={user_email}"),
492 ])
493 // The git_target path should point the repository, not the working directory.
494 .open_path_as_is(true)
495}
496
497/// Reads the `jj:trees` header from the commit.
498fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> {
499 for (key, value) in &git_commit.extra_headers {
500 if *key == JJ_TREES_COMMIT_HEADER {
501 let mut tree_ids = SmallVec::new();
502 for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
503 let tree_id = TreeId::try_from_hex(hex).or(Err(()))?;
504 if tree_id.as_bytes().len() != HASH_LENGTH {
505 return Err(());
506 }
507 tree_ids.push(tree_id);
508 }
509 if tree_ids.len() % 2 == 0 {
510 return Err(());
511 }
512 return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids))));
513 }
514 }
515 Ok(None)
516}
517
518fn commit_from_git_without_root_parent(
519 id: &CommitId,
520 git_object: &gix::Object,
521 uses_tree_conflict_format: bool,
522 is_shallow: bool,
523) -> BackendResult<Commit> {
524 let commit = git_object
525 .try_to_commit_ref()
526 .map_err(|err| to_read_object_err(err, id))?;
527
528 // We reverse the bits of the commit id to create the change id. We don't want
529 // to use the first bytes unmodified because then it would be ambiguous
530 // if a given hash prefix refers to the commit id or the change id. It
531 // would have been enough to pick the last 16 bytes instead of the
532 // leading 16 bytes to address that. We also reverse the bits to make it less
533 // likely that users depend on any relationship between the two ids.
534 let change_id = ChangeId::new(
535 id.as_bytes()[4..HASH_LENGTH]
536 .iter()
537 .rev()
538 .map(|b| b.reverse_bits())
539 .collect(),
540 );
541 // shallow commits don't have parents their parents actually fetched, so we
542 // discard them here
543 // TODO: This causes issues when a shallow repository is deepened/unshallowed
544 let parents = if is_shallow {
545 vec![]
546 } else {
547 commit
548 .parents()
549 .map(|oid| CommitId::from_bytes(oid.as_bytes()))
550 .collect_vec()
551 };
552 let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
553 // If this commit is a conflict, we'll update the root tree later, when we read
554 // the extra metadata.
555 let root_tree = root_tree_from_header(&commit)
556 .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?;
557 let root_tree = root_tree.unwrap_or_else(|| {
558 if uses_tree_conflict_format {
559 MergedTreeId::resolved(tree_id)
560 } else {
561 MergedTreeId::Legacy(tree_id)
562 }
563 });
564 // Use lossy conversion as commit message with "mojibake" is still better than
565 // nothing.
566 // TODO: what should we do with commit.encoding?
567 let description = String::from_utf8_lossy(commit.message).into_owned();
568 let author = signature_from_git(commit.author());
569 let committer = signature_from_git(commit.committer());
570
571 // If the commit is signed, extract both the signature and the signed data
572 // (which is the commit buffer with the gpgsig header omitted).
573 // We have to re-parse the raw commit data because gix CommitRef does not give
574 // us the sogned data, only the signature.
575 // Ideally, we could use try_to_commit_ref_iter at the beginning of this
576 // function and extract everything from that. For now, this works
577 let secure_sig = commit
578 .extra_headers
579 .iter()
580 // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
581 .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
582 .then(|| CommitRefIter::signature(&git_object.data))
583 .transpose()
584 .map_err(|err| to_read_object_err(err, id))?
585 .flatten()
586 .map(|(sig, data)| SecureSig {
587 data: data.to_bstring().into(),
588 sig: sig.into_owned().into(),
589 });
590
591 Ok(Commit {
592 parents,
593 predecessors: vec![],
594 // If this commit has associated extra metadata, we may reset this later.
595 root_tree,
596 change_id,
597 description,
598 author,
599 committer,
600 secure_sig,
601 })
602}
603
604const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
605
606fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
607 let name = signature.name;
608 let name = if name != EMPTY_STRING_PLACEHOLDER {
609 String::from_utf8_lossy(name).into_owned()
610 } else {
611 "".to_string()
612 };
613 let email = signature.email;
614 let email = if email != EMPTY_STRING_PLACEHOLDER {
615 String::from_utf8_lossy(email).into_owned()
616 } else {
617 "".to_string()
618 };
619 let timestamp = MillisSinceEpoch(signature.time.seconds * 1000);
620 let tz_offset = signature.time.offset.div_euclid(60); // in minutes
621 Signature {
622 name,
623 email,
624 timestamp: Timestamp {
625 timestamp,
626 tz_offset,
627 },
628 }
629}
630
631fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> {
632 // git does not support empty names or emails
633 let name = if !signature.name.is_empty() {
634 &signature.name
635 } else {
636 EMPTY_STRING_PLACEHOLDER
637 };
638 let email = if !signature.email.is_empty() {
639 &signature.email
640 } else {
641 EMPTY_STRING_PLACEHOLDER
642 };
643 let time = gix::date::Time::new(
644 signature.timestamp.timestamp.0.div_euclid(1000),
645 signature.timestamp.tz_offset * 60, // in seconds
646 );
647 gix::actor::SignatureRef {
648 name: name.into(),
649 email: email.into(),
650 time,
651 }
652}
653
654fn serialize_extras(commit: &Commit) -> Vec<u8> {
655 let mut proto = crate::protos::git_store::Commit {
656 change_id: commit.change_id.to_bytes(),
657 ..Default::default()
658 };
659 if let MergedTreeId::Merge(tree_ids) = &commit.root_tree {
660 proto.uses_tree_conflict_format = true;
661 if !tree_ids.is_resolved() {
662 proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect();
663 }
664 }
665 for predecessor in &commit.predecessors {
666 proto.predecessors.push(predecessor.to_bytes());
667 }
668 proto.encode_to_vec()
669}
670
671fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
672 let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
673 commit.change_id = ChangeId::new(proto.change_id);
674 if proto.uses_tree_conflict_format {
675 if !proto.root_tree.is_empty() {
676 let merge_builder: MergeBuilder<_> = proto
677 .root_tree
678 .iter()
679 .map(|id_bytes| TreeId::from_bytes(id_bytes))
680 .collect();
681 let merge = merge_builder.build();
682 // Check that the trees from the extras match the one we found in the jj:trees
683 // header
684 if let MergedTreeId::Merge(existing_merge) = &commit.root_tree {
685 assert!(existing_merge.is_resolved() || *existing_merge == merge);
686 }
687 commit.root_tree = MergedTreeId::Merge(merge);
688 } else {
689 // uses_tree_conflict_format was set but there was no root_tree override in the
690 // proto, which means we should just promote the tree id from the
691 // git commit to be a known-conflict-free tree
692 let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree else {
693 panic!("root tree should have been initialized to a legacy id");
694 };
695 commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone());
696 }
697 }
698 for predecessor in &proto.predecessors {
699 commit.predecessors.push(CommitId::from_bytes(predecessor));
700 }
701}
702
703/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist.
704/// Used for preventing GC of commits we create.
705fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit {
706 let name = format!("{NO_GC_REF_NAMESPACE}{id}");
707 let new = gix::refs::Target::Object(validate_git_object_id(id).unwrap());
708 let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone());
709 gix::refs::transaction::RefEdit {
710 change: gix::refs::transaction::Change::Update {
711 log: gix::refs::transaction::LogChange {
712 message: "used by jj".into(),
713 ..Default::default()
714 },
715 expected,
716 new,
717 },
718 name: name.try_into().unwrap(),
719 deref: false,
720 }
721}
722
723fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit {
724 let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target);
725 gix::refs::transaction::RefEdit {
726 change: gix::refs::transaction::Change::Delete {
727 expected,
728 log: gix::refs::transaction::RefLog::AndReference,
729 },
730 name: git_ref.name,
731 deref: false,
732 }
733}
734
735/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other
736/// unreachable and non-head refs.
737fn recreate_no_gc_refs(
738 git_repo: &gix::Repository,
739 new_heads: impl IntoIterator<Item = CommitId>,
740 keep_newer: SystemTime,
741) -> BackendResult<()> {
742 // Calculate diff between existing no-gc refs and new heads.
743 let new_heads: HashSet<CommitId> = new_heads.into_iter().collect();
744 let mut no_gc_refs_to_keep_count: usize = 0;
745 let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new();
746 let git_references = git_repo
747 .references()
748 .map_err(|err| BackendError::Other(err.into()))?;
749 let no_gc_refs_iter = git_references
750 .prefixed(NO_GC_REF_NAMESPACE)
751 .map_err(|err| BackendError::Other(err.into()))?;
752 for git_ref in no_gc_refs_iter {
753 let git_ref = git_ref.map_err(BackendError::Other)?.detach();
754 let oid = git_ref.target.try_id().ok_or_else(|| {
755 let name = git_ref.name.as_bstr();
756 BackendError::Other(format!("Symbolic no-gc ref found: {name}").into())
757 })?;
758 let id = CommitId::from_bytes(oid.as_bytes());
759 let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex();
760 if new_heads.contains(&id) && name_good {
761 no_gc_refs_to_keep_count += 1;
762 continue;
763 }
764 // Check timestamp of loose ref, but this is still racy on re-import
765 // because:
766 // - existing packed ref won't be demoted to loose ref
767 // - existing loose ref won't be touched
768 //
769 // TODO: might be better to switch to a dummy merge, where new no-gc ref
770 // will always have a unique name. Doing that with the current
771 // ref-per-head strategy would increase the number of the no-gc refs.
772 // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782
773 let loose_ref_path = git_repo.path().join(git_ref.name.to_path());
774 if let Ok(metadata) = loose_ref_path.metadata() {
775 let mtime = metadata.modified().expect("unsupported platform?");
776 if mtime > keep_newer {
777 tracing::trace!(?git_ref, "not deleting new");
778 no_gc_refs_to_keep_count += 1;
779 continue;
780 }
781 }
782 // Also deletes no-gc ref of random name created by old jj.
783 tracing::trace!(?git_ref, ?name_good, "will delete");
784 no_gc_refs_to_delete.push(git_ref);
785 }
786 tracing::info!(
787 new_heads_count = new_heads.len(),
788 no_gc_refs_to_keep_count,
789 no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(),
790 "collected reachable refs"
791 );
792
793 // It's slow to delete packed refs one by one, so update refs all at once.
794 let ref_edits = itertools::chain(
795 no_gc_refs_to_delete.into_iter().map(to_ref_deletion),
796 new_heads.iter().map(to_no_gc_ref_update),
797 );
798 git_repo
799 .edit_references(ref_edits)
800 .map_err(|err| BackendError::Other(err.into()))?;
801
802 Ok(())
803}
804
805fn run_git_gc(git_dir: &Path) -> Result<(), GitGcError> {
806 let mut git = Command::new("git");
807 git.arg("--git-dir=."); // turn off discovery
808 git.arg("gc");
809 // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be
810 // canonicalized as UNC path, which wouldn't be supported by git.
811 git.current_dir(git_dir);
812 // TODO: pass output to UI layer instead of printing directly here
813 let status = git.status().map_err(GitGcError::GcCommand)?;
814 if !status.success() {
815 return Err(GitGcError::GcCommandErrorStatus(status));
816 }
817 Ok(())
818}
819
820fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> {
821 if id.as_bytes().len() != HASH_LENGTH {
822 return Err(BackendError::InvalidHashLength {
823 expected: HASH_LENGTH,
824 actual: id.as_bytes().len(),
825 object_type: id.object_type(),
826 hash: id.hex(),
827 });
828 }
829 Ok(id.as_bytes().try_into().unwrap())
830}
831
832fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
833 if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
834 BackendError::ObjectNotFound {
835 object_type: id.object_type(),
836 hash: id.hex(),
837 source: Box::new(err),
838 }
839 } else {
840 to_read_object_err(err, id)
841 }
842}
843
844fn to_read_object_err(
845 err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
846 id: &impl ObjectId,
847) -> BackendError {
848 BackendError::ReadObject {
849 object_type: id.object_type(),
850 hash: id.hex(),
851 source: err.into(),
852 }
853}
854
855fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError {
856 BackendError::InvalidUtf8 {
857 object_type: id.object_type(),
858 hash: id.hex(),
859 source,
860 }
861}
862
863fn import_extra_metadata_entries_from_heads(
864 git_repo: &gix::Repository,
865 mut_table: &mut MutableTable,
866 _table_lock: &FileLock,
867 head_ids: &HashSet<&CommitId>,
868 uses_tree_conflict_format: bool,
869) -> BackendResult<()> {
870 let shallow_commits = git_repo
871 .shallow_commits()
872 .map_err(|e| BackendError::Other(Box::new(e)))?;
873
874 let mut work_ids = head_ids
875 .iter()
876 .filter(|&id| mut_table.get_value(id.as_bytes()).is_none())
877 .map(|&id| id.clone())
878 .collect_vec();
879 while let Some(id) = work_ids.pop() {
880 let git_object = git_repo
881 .find_object(validate_git_object_id(&id)?)
882 .map_err(|err| map_not_found_err(err, &id))?;
883 let is_shallow = shallow_commits
884 .as_ref()
885 .is_some_and(|shallow| shallow.contains(&git_object.id));
886 // TODO(#1624): Should we read the root tree here and check if it has a
887 // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
888 // change the description of a commit with tree-level conflicts.
889 let commit = commit_from_git_without_root_parent(
890 &id,
891 &git_object,
892 uses_tree_conflict_format,
893 is_shallow,
894 )?;
895 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
896 work_ids.extend(
897 commit
898 .parents
899 .into_iter()
900 .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
901 );
902 }
903 Ok(())
904}
905
906impl Debug for GitBackend {
907 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
908 f.debug_struct("GitBackend")
909 .field("path", &self.git_repo_path())
910 .finish()
911 }
912}
913
914#[async_trait]
915impl Backend for GitBackend {
916 fn as_any(&self) -> &dyn Any {
917 self
918 }
919
920 fn name(&self) -> &str {
921 Self::name()
922 }
923
924 fn commit_id_length(&self) -> usize {
925 HASH_LENGTH
926 }
927
928 fn change_id_length(&self) -> usize {
929 CHANGE_ID_LENGTH
930 }
931
932 fn root_commit_id(&self) -> &CommitId {
933 &self.root_commit_id
934 }
935
936 fn root_change_id(&self) -> &ChangeId {
937 &self.root_change_id
938 }
939
940 fn empty_tree_id(&self) -> &TreeId {
941 &self.empty_tree_id
942 }
943
944 fn concurrency(&self) -> usize {
945 1
946 }
947
948 async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
949 self.read_file_sync(id)
950 }
951
952 async fn write_file(
953 &self,
954 _path: &RepoPath,
955 contents: &mut (dyn Read + Send),
956 ) -> BackendResult<FileId> {
957 let mut bytes = Vec::new();
958 contents.read_to_end(&mut bytes).unwrap();
959 let locked_repo = self.lock_git_repo();
960 let oid = locked_repo
961 .write_blob(bytes)
962 .map_err(|err| BackendError::WriteObject {
963 object_type: "file",
964 source: Box::new(err),
965 })?;
966 Ok(FileId::new(oid.as_bytes().to_vec()))
967 }
968
969 async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> {
970 let git_blob_id = validate_git_object_id(id)?;
971 let locked_repo = self.lock_git_repo();
972 let mut blob = locked_repo
973 .find_object(git_blob_id)
974 .map_err(|err| map_not_found_err(err, id))?
975 .try_into_blob()
976 .map_err(|err| to_read_object_err(err, id))?;
977 let target = String::from_utf8(blob.take_data())
978 .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?;
979 Ok(target)
980 }
981
982 async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> {
983 let locked_repo = self.lock_git_repo();
984 let oid =
985 locked_repo
986 .write_blob(target.as_bytes())
987 .map_err(|err| BackendError::WriteObject {
988 object_type: "symlink",
989 source: Box::new(err),
990 })?;
991 Ok(SymlinkId::new(oid.as_bytes().to_vec()))
992 }
993
994 async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
995 if id == &self.empty_tree_id {
996 return Ok(Tree::default());
997 }
998 let git_tree_id = validate_git_object_id(id)?;
999
1000 let locked_repo = self.lock_git_repo();
1001 let git_tree = locked_repo
1002 .find_object(git_tree_id)
1003 .map_err(|err| map_not_found_err(err, id))?
1004 .try_into_tree()
1005 .map_err(|err| to_read_object_err(err, id))?;
1006 let mut tree = Tree::default();
1007 for entry in git_tree.iter() {
1008 let entry = entry.map_err(|err| to_read_object_err(err, id))?;
1009 let name =
1010 str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
1011 let (name, value) = match entry.mode().kind() {
1012 gix::object::tree::EntryKind::Tree => {
1013 let id = TreeId::from_bytes(entry.oid().as_bytes());
1014 (name, TreeValue::Tree(id))
1015 }
1016 gix::object::tree::EntryKind::Blob => {
1017 let id = FileId::from_bytes(entry.oid().as_bytes());
1018 if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
1019 (
1020 basename,
1021 TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
1022 )
1023 } else {
1024 (
1025 name,
1026 TreeValue::File {
1027 id,
1028 executable: false,
1029 },
1030 )
1031 }
1032 }
1033 gix::object::tree::EntryKind::BlobExecutable => {
1034 let id = FileId::from_bytes(entry.oid().as_bytes());
1035 (
1036 name,
1037 TreeValue::File {
1038 id,
1039 executable: true,
1040 },
1041 )
1042 }
1043 gix::object::tree::EntryKind::Link => {
1044 let id = SymlinkId::from_bytes(entry.oid().as_bytes());
1045 (name, TreeValue::Symlink(id))
1046 }
1047 gix::object::tree::EntryKind::Commit => {
1048 let id = CommitId::from_bytes(entry.oid().as_bytes());
1049 (name, TreeValue::GitSubmodule(id))
1050 }
1051 };
1052 tree.set(RepoPathComponentBuf::from(name), value);
1053 }
1054 Ok(tree)
1055 }
1056
1057 async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
1058 // Tree entries to be written must be sorted by Entry::filename(), which
1059 // is slightly different from the order of our backend::Tree.
1060 let entries = contents
1061 .entries()
1062 .map(|entry| {
1063 let name = entry.name().as_internal_str();
1064 match entry.value() {
1065 TreeValue::File {
1066 id,
1067 executable: false,
1068 } => gix::objs::tree::Entry {
1069 mode: gix::object::tree::EntryKind::Blob.into(),
1070 filename: name.into(),
1071 oid: id.as_bytes().try_into().unwrap(),
1072 },
1073 TreeValue::File {
1074 id,
1075 executable: true,
1076 } => gix::objs::tree::Entry {
1077 mode: gix::object::tree::EntryKind::BlobExecutable.into(),
1078 filename: name.into(),
1079 oid: id.as_bytes().try_into().unwrap(),
1080 },
1081 TreeValue::Symlink(id) => gix::objs::tree::Entry {
1082 mode: gix::object::tree::EntryKind::Link.into(),
1083 filename: name.into(),
1084 oid: id.as_bytes().try_into().unwrap(),
1085 },
1086 TreeValue::Tree(id) => gix::objs::tree::Entry {
1087 mode: gix::object::tree::EntryKind::Tree.into(),
1088 filename: name.into(),
1089 oid: id.as_bytes().try_into().unwrap(),
1090 },
1091 TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
1092 mode: gix::object::tree::EntryKind::Commit.into(),
1093 filename: name.into(),
1094 oid: id.as_bytes().try_into().unwrap(),
1095 },
1096 TreeValue::Conflict(id) => gix::objs::tree::Entry {
1097 mode: gix::object::tree::EntryKind::Blob.into(),
1098 filename: (name.to_owned() + CONFLICT_SUFFIX).into(),
1099 oid: id.as_bytes().try_into().unwrap(),
1100 },
1101 }
1102 })
1103 .sorted_unstable()
1104 .collect();
1105 let locked_repo = self.lock_git_repo();
1106 let oid = locked_repo
1107 .write_object(gix::objs::Tree { entries })
1108 .map_err(|err| BackendError::WriteObject {
1109 object_type: "tree",
1110 source: Box::new(err),
1111 })?;
1112 Ok(TreeId::from_bytes(oid.as_bytes()))
1113 }
1114
1115 fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
1116 let mut file = self.read_file_sync(&FileId::new(id.to_bytes()))?;
1117 let mut data = String::new();
1118 file.read_to_string(&mut data)
1119 .map_err(|err| BackendError::ReadObject {
1120 object_type: "conflict".to_owned(),
1121 hash: id.hex(),
1122 source: err.into(),
1123 })?;
1124 let json: serde_json::Value = serde_json::from_str(&data).unwrap();
1125 Ok(Conflict {
1126 removes: conflict_term_list_from_json(json.get("removes").unwrap()),
1127 adds: conflict_term_list_from_json(json.get("adds").unwrap()),
1128 })
1129 }
1130
1131 fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
1132 let json = serde_json::json!({
1133 "removes": conflict_term_list_to_json(&conflict.removes),
1134 "adds": conflict_term_list_to_json(&conflict.adds),
1135 });
1136 let json_string = json.to_string();
1137 let bytes = json_string.as_bytes();
1138 let locked_repo = self.lock_git_repo();
1139 let oid = locked_repo
1140 .write_blob(bytes)
1141 .map_err(|err| BackendError::WriteObject {
1142 object_type: "conflict",
1143 source: Box::new(err),
1144 })?;
1145 Ok(ConflictId::from_bytes(oid.as_bytes()))
1146 }
1147
1148 #[tracing::instrument(skip(self))]
1149 async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
1150 if *id == self.root_commit_id {
1151 return Ok(make_root_commit(
1152 self.root_change_id().clone(),
1153 self.empty_tree_id.clone(),
1154 ));
1155 }
1156 let git_commit_id = validate_git_object_id(id)?;
1157
1158 let mut commit = {
1159 let locked_repo = self.lock_git_repo();
1160 let git_object = locked_repo
1161 .find_object(git_commit_id)
1162 .map_err(|err| map_not_found_err(err, id))?;
1163 let is_shallow = locked_repo
1164 .shallow_commits()
1165 .ok()
1166 .flatten()
1167 .is_some_and(|shallow| shallow.contains(&git_object.id));
1168 commit_from_git_without_root_parent(id, &git_object, false, is_shallow)?
1169 };
1170 if commit.parents.is_empty() {
1171 commit.parents.push(self.root_commit_id.clone());
1172 };
1173
1174 let table = self.cached_extra_metadata_table()?;
1175 if let Some(extras) = table.get_value(id.as_bytes()) {
1176 deserialize_extras(&mut commit, extras);
1177 } else {
1178 // TODO: Remove this hack and map to ObjectNotFound error if we're sure that
1179 // there are no reachable ancestor commits without extras metadata. Git commits
1180 // imported by jj < 0.8.0 might not have extras (#924).
1181 // https://github.com/jj-vcs/jj/issues/2343
1182 tracing::info!("unimported Git commit found");
1183 self.import_head_commits([id])?;
1184 let table = self.cached_extra_metadata_table()?;
1185 let extras = table.get_value(id.as_bytes()).unwrap();
1186 deserialize_extras(&mut commit, extras);
1187 }
1188 Ok(commit)
1189 }
1190
1191 async fn write_commit(
1192 &self,
1193 mut contents: Commit,
1194 mut sign_with: Option<&mut SigningFn>,
1195 ) -> BackendResult<(CommitId, Commit)> {
1196 assert!(contents.secure_sig.is_none(), "commit.secure_sig was set");
1197
1198 let locked_repo = self.lock_git_repo();
1199 let git_tree_id = match &contents.root_tree {
1200 MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
1201 MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
1202 Some(tree_id) => validate_git_object_id(tree_id)?,
1203 None => write_tree_conflict(&locked_repo, tree_ids)?,
1204 },
1205 };
1206 let author = signature_to_git(&contents.author);
1207 let mut committer = signature_to_git(&contents.committer);
1208 let message = &contents.description;
1209 if contents.parents.is_empty() {
1210 return Err(BackendError::Other(
1211 "Cannot write a commit with no parents".into(),
1212 ));
1213 }
1214 let mut parents = SmallVec::new();
1215 for parent_id in &contents.parents {
1216 if *parent_id == self.root_commit_id {
1217 // Git doesn't have a root commit, so if the parent is the root commit, we don't
1218 // add it to the list of parents to write in the Git commit. We also check that
1219 // there are no other parents since Git cannot represent a merge between a root
1220 // commit and another commit.
1221 if contents.parents.len() > 1 {
1222 return Err(BackendError::Unsupported(
1223 "The Git backend does not support creating merge commits with the root \
1224 commit as one of the parents."
1225 .to_owned(),
1226 ));
1227 }
1228 } else {
1229 parents.push(validate_git_object_id(parent_id)?);
1230 }
1231 }
1232 let mut extra_headers = vec![];
1233 if let MergedTreeId::Merge(tree_ids) = &contents.root_tree {
1234 if !tree_ids.is_resolved() {
1235 let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes();
1236 extra_headers.push((
1237 BString::new(JJ_TREES_COMMIT_HEADER.to_vec()),
1238 BString::new(value),
1239 ));
1240 }
1241 }
1242 let extras = serialize_extras(&contents);
1243
1244 // If two writers write commits of the same id with different metadata, they
1245 // will both succeed and the metadata entries will be "merged" later. Since
1246 // metadata entry is keyed by the commit id, one of the entries would be lost.
1247 // To prevent such race condition locally, we extend the scope covered by the
1248 // table lock. This is still racy if multiple machines are involved and the
1249 // repository is rsync-ed.
1250 let (table, table_lock) = self.read_extra_metadata_table_locked()?;
1251 let id = loop {
1252 let mut commit = gix::objs::Commit {
1253 message: message.to_owned().into(),
1254 tree: git_tree_id,
1255 author: author.into(),
1256 committer: committer.into(),
1257 encoding: None,
1258 parents: parents.clone(),
1259 extra_headers: extra_headers.clone(),
1260 };
1261
1262 if let Some(sign) = &mut sign_with {
1263 // we don't use gix pool, but at least use their heuristic
1264 let mut data = Vec::with_capacity(512);
1265 commit.write_to(&mut data).unwrap();
1266
1267 let sig = sign(&data).map_err(|err| BackendError::WriteObject {
1268 object_type: "commit",
1269 source: Box::new(err),
1270 })?;
1271 commit
1272 .extra_headers
1273 .push(("gpgsig".into(), sig.clone().into()));
1274 contents.secure_sig = Some(SecureSig { data, sig });
1275 }
1276
1277 let git_id =
1278 locked_repo
1279 .write_object(&commit)
1280 .map_err(|err| BackendError::WriteObject {
1281 object_type: "commit",
1282 source: Box::new(err),
1283 })?;
1284
1285 match table.get_value(git_id.as_bytes()) {
1286 Some(existing_extras) if existing_extras != extras => {
1287 // It's possible a commit already exists with the same commit id but different
1288 // change id. Adjust the timestamp until this is no longer the case.
1289 committer.time.seconds -= 1;
1290 }
1291 _ => break CommitId::from_bytes(git_id.as_bytes()),
1292 }
1293 };
1294
1295 // Everything up to this point had no permanent effect on the repo except
1296 // GC-able objects
1297 locked_repo
1298 .edit_reference(to_no_gc_ref_update(&id))
1299 .map_err(|err| BackendError::Other(Box::new(err)))?;
1300
1301 // Update the signature to match the one that was actually written to the object
1302 // store
1303 contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
1304 let mut mut_table = table.start_mutation();
1305 mut_table.add_entry(id.to_bytes(), extras);
1306 self.save_extra_metadata_table(mut_table, &table_lock)?;
1307 Ok((id, contents))
1308 }
1309
1310 fn get_copy_records(
1311 &self,
1312 paths: Option<&[RepoPathBuf]>,
1313 root_id: &CommitId,
1314 head_id: &CommitId,
1315 ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
1316 let repo = self.git_repo();
1317 let root_tree = self.read_tree_for_commit(&repo, root_id)?;
1318 let head_tree = self.read_tree_for_commit(&repo, head_id)?;
1319
1320 let change_to_copy_record =
1321 |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
1322 let gix::object::tree::diff::Change::Rewrite {
1323 source_location,
1324 source_id,
1325 location: dest_location,
1326 ..
1327 } = change
1328 else {
1329 return Ok(None);
1330 };
1331
1332 let source = str::from_utf8(source_location)
1333 .map_err(|err| to_invalid_utf8_err(err, root_id))?;
1334 let dest = str::from_utf8(dest_location)
1335 .map_err(|err| to_invalid_utf8_err(err, head_id))?;
1336
1337 let target = RepoPathBuf::from_internal_string(dest);
1338 if !paths.map_or(true, |paths| paths.contains(&target)) {
1339 return Ok(None);
1340 }
1341
1342 Ok(Some(CopyRecord {
1343 target,
1344 target_commit: head_id.clone(),
1345 source: RepoPathBuf::from_internal_string(source),
1346 source_file: FileId::from_bytes(source_id.as_bytes()),
1347 source_commit: root_id.clone(),
1348 }))
1349 };
1350
1351 let mut records: Vec<BackendResult<CopyRecord>> = Vec::new();
1352 root_tree
1353 .changes()
1354 .map_err(|err| BackendError::Other(err.into()))?
1355 .options(|opts| {
1356 opts.track_path().track_rewrites(Some(gix::diff::Rewrites {
1357 copies: Some(gix::diff::rewrites::Copies {
1358 source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles,
1359 percentage: Some(0.5),
1360 }),
1361 percentage: Some(0.5),
1362 limit: 1000,
1363 track_empty: false,
1364 }));
1365 })
1366 .for_each_to_obtain_tree_with_cache(
1367 &head_tree,
1368 &mut self.new_diff_platform()?,
1369 |change| -> BackendResult<_> {
1370 match change_to_copy_record(change) {
1371 Ok(None) => {}
1372 Ok(Some(change)) => records.push(Ok(change)),
1373 Err(err) => records.push(Err(err)),
1374 }
1375 Ok(gix::object::tree::diff::Action::Continue)
1376 },
1377 )
1378 .map_err(|err| BackendError::Other(err.into()))?;
1379 Ok(Box::pin(futures::stream::iter(records)))
1380 }
1381
1382 #[tracing::instrument(skip(self, index))]
1383 fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {
1384 let git_repo = self.lock_git_repo();
1385 let new_heads = index
1386 .all_heads_for_gc()
1387 .map_err(|err| BackendError::Other(err.into()))?
1388 .filter(|id| *id != self.root_commit_id);
1389 recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?;
1390 // TODO: remove unreachable entries from extras table if segment file
1391 // mtime <= keep_newer? (it won't be consistent with no-gc refs
1392 // preserved by the keep_newer timestamp though)
1393 // TODO: remove unreachable extras table segments
1394 // TODO: pass in keep_newer to "git gc" command
1395 run_git_gc(self.git_repo_path()).map_err(|err| BackendError::Other(err.into()))?;
1396 // Since "git gc" will move loose refs into packed refs, in-memory
1397 // packed-refs cache should be invalidated without relying on mtime.
1398 git_repo.refs.force_refresh_packed_buffer().ok();
1399 Ok(())
1400 }
1401}
1402
1403/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
1404/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
1405fn write_tree_conflict(
1406 repo: &gix::Repository,
1407 conflict: &Merge<TreeId>,
1408) -> BackendResult<gix::ObjectId> {
1409 // Tree entries to be written must be sorted by Entry::filename().
1410 let mut entries = itertools::chain(
1411 conflict
1412 .removes()
1413 .enumerate()
1414 .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
1415 conflict
1416 .adds()
1417 .enumerate()
1418 .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
1419 )
1420 .map(|(name, tree_id)| gix::objs::tree::Entry {
1421 mode: gix::object::tree::EntryKind::Tree.into(),
1422 filename: name.into(),
1423 oid: tree_id.as_bytes().try_into().unwrap(),
1424 })
1425 .collect_vec();
1426 let readme_id = repo
1427 .write_blob(
1428 r#"This commit was made by jj, https://github.com/jj-vcs/jj.
1429The commit contains file conflicts, and therefore looks wrong when used with plain
1430Git or other tools that are unfamiliar with jj.
1431
1432The .jjconflict-* directories represent the different inputs to the conflict.
1433For details, see
1434https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details
1435
1436If you see this file in your working copy, it probably means that you used a
1437regular `git` command to check out a conflicted commit. Use `jj abandon` to
1438recover.
1439"#,
1440 )
1441 .map_err(|err| {
1442 BackendError::Other(format!("Failed to write README for conflict tree: {err}").into())
1443 })?
1444 .detach();
1445 entries.push(gix::objs::tree::Entry {
1446 mode: gix::object::tree::EntryKind::Blob.into(),
1447 filename: "README".into(),
1448 oid: readme_id,
1449 });
1450 entries.sort_unstable();
1451 let id = repo
1452 .write_object(gix::objs::Tree { entries })
1453 .map_err(|err| BackendError::WriteObject {
1454 object_type: "tree",
1455 source: Box::new(err),
1456 })?;
1457 Ok(id.detach())
1458}
1459
1460fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
1461 serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
1462}
1463
1464fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
1465 json.as_array()
1466 .unwrap()
1467 .iter()
1468 .map(conflict_term_from_json)
1469 .collect()
1470}
1471
1472fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
1473 serde_json::json!({
1474 "value": tree_value_to_json(&part.value),
1475 })
1476}
1477
1478fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
1479 let json_value = json.get("value").unwrap();
1480 ConflictTerm {
1481 value: tree_value_from_json(json_value),
1482 }
1483}
1484
1485fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
1486 match value {
1487 TreeValue::File { id, executable } => serde_json::json!({
1488 "file": {
1489 "id": id.hex(),
1490 "executable": executable,
1491 },
1492 }),
1493 TreeValue::Symlink(id) => serde_json::json!({
1494 "symlink_id": id.hex(),
1495 }),
1496 TreeValue::Tree(id) => serde_json::json!({
1497 "tree_id": id.hex(),
1498 }),
1499 TreeValue::GitSubmodule(id) => serde_json::json!({
1500 "submodule_id": id.hex(),
1501 }),
1502 TreeValue::Conflict(id) => serde_json::json!({
1503 "conflict_id": id.hex(),
1504 }),
1505 }
1506}
1507
1508fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
1509 if let Some(json_file) = json.get("file") {
1510 TreeValue::File {
1511 id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
1512 executable: json_file.get("executable").unwrap().as_bool().unwrap(),
1513 }
1514 } else if let Some(json_id) = json.get("symlink_id") {
1515 TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
1516 } else if let Some(json_id) = json.get("tree_id") {
1517 TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
1518 } else if let Some(json_id) = json.get("submodule_id") {
1519 TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
1520 } else if let Some(json_id) = json.get("conflict_id") {
1521 TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
1522 } else {
1523 panic!("unexpected json value in conflict: {json:#?}");
1524 }
1525}
1526
1527fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
1528 hex::decode(value.as_str().unwrap()).unwrap()
1529}
1530
1531#[cfg(test)]
1532mod tests {
1533 use assert_matches::assert_matches;
1534 use git2::Oid;
1535 use hex::ToHex;
1536 use pollster::FutureExt;
1537 use test_case::test_case;
1538
1539 use super::*;
1540 use crate::config::StackedConfig;
1541 use crate::content_hash::blake2b_hash;
1542
1543 #[test_case(false; "legacy tree format")]
1544 #[test_case(true; "tree-level conflict format")]
1545 fn read_plain_git_commit(uses_tree_conflict_format: bool) {
1546 let settings = user_settings();
1547 let temp_dir = testutils::new_temp_dir();
1548 let store_path = temp_dir.path();
1549 let git_repo_path = temp_dir.path().join("git");
1550 let git_repo = git2::Repository::init(git_repo_path).unwrap();
1551
1552 // Add a commit with some files in
1553 let blob1 = git_repo.blob(b"content1").unwrap();
1554 let blob2 = git_repo.blob(b"normal").unwrap();
1555 let mut dir_tree_builder = git_repo.treebuilder(None).unwrap();
1556 dir_tree_builder.insert("normal", blob1, 0o100644).unwrap();
1557 dir_tree_builder.insert("symlink", blob2, 0o120000).unwrap();
1558 let dir_tree_id = dir_tree_builder.write().unwrap();
1559 let mut root_tree_builder = git_repo.treebuilder(None).unwrap();
1560 root_tree_builder
1561 .insert("dir", dir_tree_id, 0o040000)
1562 .unwrap();
1563 let root_tree_id = root_tree_builder.write().unwrap();
1564 let git_author = git2::Signature::new(
1565 "git author",
1566 "git.author@example.com",
1567 &git2::Time::new(1000, 60),
1568 )
1569 .unwrap();
1570 let git_committer = git2::Signature::new(
1571 "git committer",
1572 "git.committer@example.com",
1573 &git2::Time::new(2000, -480),
1574 )
1575 .unwrap();
1576 let git_tree = git_repo.find_tree(root_tree_id).unwrap();
1577 let git_commit_id = git_repo
1578 .commit(
1579 None,
1580 &git_author,
1581 &git_committer,
1582 "git commit message",
1583 &git_tree,
1584 &[],
1585 )
1586 .unwrap();
1587 let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
1588 // The change id is the leading reverse bits of the commit id
1589 let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
1590 // Check that the git commit above got the hash we expect
1591 assert_eq!(git_commit_id.as_bytes(), commit_id.as_bytes());
1592
1593 // Add an empty commit on top
1594 let git_commit_id2 = git_repo
1595 .commit(
1596 None,
1597 &git_author,
1598 &git_committer,
1599 "git commit message 2",
1600 &git_tree,
1601 &[&git_repo.find_commit(git_commit_id).unwrap()],
1602 )
1603 .unwrap();
1604 let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());
1605
1606 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1607
1608 // Import the head commit and its ancestors
1609 backend
1610 .import_head_commits_with_tree_conflicts([&commit_id2], uses_tree_conflict_format)
1611 .unwrap();
1612 // Ref should be created only for the head commit
1613 let git_refs = backend
1614 .open_git_repo()
1615 .unwrap()
1616 .references_glob("refs/jj/keep/*")
1617 .unwrap()
1618 .map(|git_ref| git_ref.unwrap().target().unwrap())
1619 .collect_vec();
1620 assert_eq!(git_refs, vec![git_commit_id2]);
1621
1622 let commit = backend.read_commit(&commit_id).block_on().unwrap();
1623 assert_eq!(&commit.change_id, &change_id);
1624 assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
1625 assert_eq!(commit.predecessors, vec![]);
1626 assert_eq!(
1627 commit.root_tree.to_merge(),
1628 Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1629 );
1630 if uses_tree_conflict_format {
1631 assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1632 } else {
1633 assert_matches!(commit.root_tree, MergedTreeId::Legacy(_));
1634 }
1635 assert_eq!(commit.description, "git commit message");
1636 assert_eq!(commit.author.name, "git author");
1637 assert_eq!(commit.author.email, "git.author@example.com");
1638 assert_eq!(
1639 commit.author.timestamp.timestamp,
1640 MillisSinceEpoch(1000 * 1000)
1641 );
1642 assert_eq!(commit.author.timestamp.tz_offset, 60);
1643 assert_eq!(commit.committer.name, "git committer");
1644 assert_eq!(commit.committer.email, "git.committer@example.com");
1645 assert_eq!(
1646 commit.committer.timestamp.timestamp,
1647 MillisSinceEpoch(2000 * 1000)
1648 );
1649 assert_eq!(commit.committer.timestamp.tz_offset, -480);
1650
1651 let root_tree = backend
1652 .read_tree(
1653 RepoPath::root(),
1654 &TreeId::from_bytes(root_tree_id.as_bytes()),
1655 )
1656 .block_on()
1657 .unwrap();
1658 let mut root_entries = root_tree.entries();
1659 let dir = root_entries.next().unwrap();
1660 assert_eq!(root_entries.next(), None);
1661 assert_eq!(dir.name().as_internal_str(), "dir");
1662 assert_eq!(
1663 dir.value(),
1664 &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
1665 );
1666
1667 let dir_tree = backend
1668 .read_tree(
1669 RepoPath::from_internal_string("dir"),
1670 &TreeId::from_bytes(dir_tree_id.as_bytes()),
1671 )
1672 .block_on()
1673 .unwrap();
1674 let mut entries = dir_tree.entries();
1675 let file = entries.next().unwrap();
1676 let symlink = entries.next().unwrap();
1677 assert_eq!(entries.next(), None);
1678 assert_eq!(file.name().as_internal_str(), "normal");
1679 assert_eq!(
1680 file.value(),
1681 &TreeValue::File {
1682 id: FileId::from_bytes(blob1.as_bytes()),
1683 executable: false
1684 }
1685 );
1686 assert_eq!(symlink.name().as_internal_str(), "symlink");
1687 assert_eq!(
1688 symlink.value(),
1689 &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
1690 );
1691
1692 let commit2 = backend.read_commit(&commit_id2).block_on().unwrap();
1693 assert_eq!(commit2.parents, vec![commit_id.clone()]);
1694 assert_eq!(commit.predecessors, vec![]);
1695 assert_eq!(
1696 commit.root_tree.to_merge(),
1697 Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
1698 );
1699 if uses_tree_conflict_format {
1700 assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
1701 } else {
1702 assert_matches!(commit.root_tree, MergedTreeId::Legacy(_));
1703 }
1704 }
1705
1706 #[test]
1707 fn read_git_commit_without_importing() {
1708 let settings = user_settings();
1709 let temp_dir = testutils::new_temp_dir();
1710 let store_path = temp_dir.path();
1711 let git_repo_path = temp_dir.path().join("git");
1712 let git_repo = git2::Repository::init(git_repo_path).unwrap();
1713
1714 let signature = git2::Signature::now("Someone", "someone@example.com").unwrap();
1715 let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1716 let empty_tree = git_repo.find_tree(empty_tree_id).unwrap();
1717 let git_commit_id = git_repo
1718 .commit(
1719 Some("refs/heads/main"),
1720 &signature,
1721 &signature,
1722 "git commit message",
1723 &empty_tree,
1724 &[],
1725 )
1726 .unwrap();
1727
1728 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1729
1730 // read_commit() without import_head_commits() works as of now. This might be
1731 // changed later.
1732 assert!(backend
1733 .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1734 .block_on()
1735 .is_ok());
1736 assert!(
1737 backend
1738 .cached_extra_metadata_table()
1739 .unwrap()
1740 .get_value(git_commit_id.as_bytes())
1741 .is_some(),
1742 "extra metadata should have been be created"
1743 );
1744 }
1745
1746 #[test]
1747 fn read_signed_git_commit() {
1748 let settings = user_settings();
1749 let temp_dir = testutils::new_temp_dir();
1750 let store_path = temp_dir.path();
1751 let git_repo_path = temp_dir.path().join("git");
1752 let git_repo = git2::Repository::init(git_repo_path).unwrap();
1753
1754 let signature = git2::Signature::now("Someone", "someone@example.com").unwrap();
1755 let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
1756 let empty_tree = git_repo.find_tree(empty_tree_id).unwrap();
1757
1758 let commit_buf = git_repo
1759 .commit_create_buffer(
1760 &signature,
1761 &signature,
1762 "git commit message",
1763 &empty_tree,
1764 &[],
1765 )
1766 .unwrap();
1767
1768 // libgit2-rs works with &strs here for some reason
1769 let commit_buf = std::str::from_utf8(&commit_buf).unwrap();
1770 let secure_sig =
1771 "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n";
1772
1773 // git2 appears to append newline unconditionally
1774 let git_commit_id = git_repo
1775 .commit_signed(commit_buf, secure_sig.trim_end_matches('\n'), None)
1776 .unwrap();
1777
1778 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1779
1780 let commit = backend
1781 .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
1782 .block_on()
1783 .unwrap();
1784
1785 let sig = commit.secure_sig.expect("failed to read the signature");
1786
1787 // converting to string for nicer assert diff
1788 assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
1789 assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_buf);
1790 }
1791
1792 #[test]
1793 fn read_empty_string_placeholder() {
1794 let git_signature1 = gix::actor::SignatureRef {
1795 name: EMPTY_STRING_PLACEHOLDER.into(),
1796 email: "git.author@example.com".into(),
1797 time: gix::date::Time::new(1000, 60 * 60),
1798 };
1799 let signature1 = signature_from_git(git_signature1);
1800 assert!(signature1.name.is_empty());
1801 assert_eq!(signature1.email, "git.author@example.com");
1802 let git_signature2 = gix::actor::SignatureRef {
1803 name: "git committer".into(),
1804 email: EMPTY_STRING_PLACEHOLDER.into(),
1805 time: gix::date::Time::new(2000, -480 * 60),
1806 };
1807 let signature2 = signature_from_git(git_signature2);
1808 assert_eq!(signature2.name, "git committer");
1809 assert!(signature2.email.is_empty());
1810 }
1811
1812 #[test]
1813 fn write_empty_string_placeholder() {
1814 let signature1 = Signature {
1815 name: "".to_string(),
1816 email: "someone@example.com".to_string(),
1817 timestamp: Timestamp {
1818 timestamp: MillisSinceEpoch(0),
1819 tz_offset: 0,
1820 },
1821 };
1822 let git_signature1 = signature_to_git(&signature1);
1823 assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
1824 assert_eq!(git_signature1.email, "someone@example.com");
1825 let signature2 = Signature {
1826 name: "Someone".to_string(),
1827 email: "".to_string(),
1828 timestamp: Timestamp {
1829 timestamp: MillisSinceEpoch(0),
1830 tz_offset: 0,
1831 },
1832 };
1833 let git_signature2 = signature_to_git(&signature2);
1834 assert_eq!(git_signature2.name, "Someone");
1835 assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
1836 }
1837
1838 /// Test that parents get written correctly
1839 #[test]
1840 fn git_commit_parents() {
1841 let settings = user_settings();
1842 let temp_dir = testutils::new_temp_dir();
1843 let store_path = temp_dir.path();
1844 let git_repo_path = temp_dir.path().join("git");
1845 let git_repo = git2::Repository::init(git_repo_path).unwrap();
1846
1847 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1848 let mut commit = Commit {
1849 parents: vec![],
1850 predecessors: vec![],
1851 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
1852 change_id: ChangeId::from_hex("abc123"),
1853 description: "".to_string(),
1854 author: create_signature(),
1855 committer: create_signature(),
1856 secure_sig: None,
1857 };
1858
1859 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
1860 backend.write_commit(commit, None).block_on()
1861 };
1862
1863 // No parents
1864 commit.parents = vec![];
1865 assert_matches!(
1866 write_commit(commit.clone()),
1867 Err(BackendError::Other(err)) if err.to_string().contains("no parents")
1868 );
1869
1870 // Only root commit as parent
1871 commit.parents = vec![backend.root_commit_id().clone()];
1872 let first_id = write_commit(commit.clone()).unwrap().0;
1873 let first_commit = backend.read_commit(&first_id).block_on().unwrap();
1874 assert_eq!(first_commit, commit);
1875 let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
1876 assert_eq!(first_git_commit.parent_ids().collect_vec(), vec![]);
1877
1878 // Only non-root commit as parent
1879 commit.parents = vec![first_id.clone()];
1880 let second_id = write_commit(commit.clone()).unwrap().0;
1881 let second_commit = backend.read_commit(&second_id).block_on().unwrap();
1882 assert_eq!(second_commit, commit);
1883 let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
1884 assert_eq!(
1885 second_git_commit.parent_ids().collect_vec(),
1886 vec![git_id(&first_id)]
1887 );
1888
1889 // Merge commit
1890 commit.parents = vec![first_id.clone(), second_id.clone()];
1891 let merge_id = write_commit(commit.clone()).unwrap().0;
1892 let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
1893 assert_eq!(merge_commit, commit);
1894 let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
1895 assert_eq!(
1896 merge_git_commit.parent_ids().collect_vec(),
1897 vec![git_id(&first_id), git_id(&second_id)]
1898 );
1899
1900 // Merge commit with root as one parent
1901 commit.parents = vec![first_id, backend.root_commit_id().clone()];
1902 assert_matches!(
1903 write_commit(commit),
1904 Err(BackendError::Unsupported(message)) if message.contains("root commit")
1905 );
1906 }
1907
1908 #[test]
1909 fn write_tree_conflicts() {
1910 let settings = user_settings();
1911 let temp_dir = testutils::new_temp_dir();
1912 let store_path = temp_dir.path();
1913 let git_repo_path = temp_dir.path().join("git");
1914 let git_repo = git2::Repository::init(git_repo_path).unwrap();
1915
1916 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
1917 let create_tree = |i| {
1918 let blob_id = git_repo.blob(b"content {i}").unwrap();
1919 let mut tree_builder = git_repo.treebuilder(None).unwrap();
1920 tree_builder
1921 .insert(format!("file{i}"), blob_id, 0o100644)
1922 .unwrap();
1923 TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
1924 };
1925
1926 let root_tree = Merge::from_removes_adds(
1927 vec![create_tree(0), create_tree(1)],
1928 vec![create_tree(2), create_tree(3), create_tree(4)],
1929 );
1930 let mut commit = Commit {
1931 parents: vec![backend.root_commit_id().clone()],
1932 predecessors: vec![],
1933 root_tree: MergedTreeId::Merge(root_tree.clone()),
1934 change_id: ChangeId::from_hex("abc123"),
1935 description: "".to_string(),
1936 author: create_signature(),
1937 committer: create_signature(),
1938 secure_sig: None,
1939 };
1940
1941 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
1942 backend.write_commit(commit, None).block_on()
1943 };
1944
1945 // When writing a tree-level conflict, the root tree on the git side has the
1946 // individual trees as subtrees.
1947 let read_commit_id = write_commit(commit.clone()).unwrap().0;
1948 let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
1949 assert_eq!(read_commit, commit);
1950 let git_commit = git_repo
1951 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap())
1952 .unwrap();
1953 let git_tree = git_repo.find_tree(git_commit.tree_id()).unwrap();
1954 assert!(git_tree
1955 .iter()
1956 .filter(|entry| entry.name() != Some("README"))
1957 .all(|entry| entry.filemode() == 0o040000));
1958 let mut iter = git_tree.iter();
1959 let entry = iter.next().unwrap();
1960 assert_eq!(entry.name(), Some(".jjconflict-base-0"));
1961 assert_eq!(
1962 entry.id().as_bytes(),
1963 root_tree.get_remove(0).unwrap().as_bytes()
1964 );
1965 let entry = iter.next().unwrap();
1966 assert_eq!(entry.name(), Some(".jjconflict-base-1"));
1967 assert_eq!(
1968 entry.id().as_bytes(),
1969 root_tree.get_remove(1).unwrap().as_bytes()
1970 );
1971 let entry = iter.next().unwrap();
1972 assert_eq!(entry.name(), Some(".jjconflict-side-0"));
1973 assert_eq!(
1974 entry.id().as_bytes(),
1975 root_tree.get_add(0).unwrap().as_bytes()
1976 );
1977 let entry = iter.next().unwrap();
1978 assert_eq!(entry.name(), Some(".jjconflict-side-1"));
1979 assert_eq!(
1980 entry.id().as_bytes(),
1981 root_tree.get_add(1).unwrap().as_bytes()
1982 );
1983 let entry = iter.next().unwrap();
1984 assert_eq!(entry.name(), Some(".jjconflict-side-2"));
1985 assert_eq!(
1986 entry.id().as_bytes(),
1987 root_tree.get_add(2).unwrap().as_bytes()
1988 );
1989 let entry = iter.next().unwrap();
1990 assert_eq!(entry.name(), Some("README"));
1991 assert_eq!(entry.filemode(), 0o100644);
1992 assert!(iter.next().is_none());
1993
1994 // When writing a single tree using the new format, it's represented by a
1995 // regular git tree.
1996 commit.root_tree = MergedTreeId::resolved(create_tree(5));
1997 let read_commit_id = write_commit(commit.clone()).unwrap().0;
1998 let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap();
1999 assert_eq!(read_commit, commit);
2000 let git_commit = git_repo
2001 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap())
2002 .unwrap();
2003 assert_eq!(
2004 MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().as_bytes())),
2005 commit.root_tree
2006 );
2007 }
2008
2009 #[test]
2010 fn commit_has_ref() {
2011 let settings = user_settings();
2012 let temp_dir = testutils::new_temp_dir();
2013 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2014 let git_repo = backend.open_git_repo().unwrap();
2015 let signature = Signature {
2016 name: "Someone".to_string(),
2017 email: "someone@example.com".to_string(),
2018 timestamp: Timestamp {
2019 timestamp: MillisSinceEpoch(0),
2020 tz_offset: 0,
2021 },
2022 };
2023 let commit = Commit {
2024 parents: vec![backend.root_commit_id().clone()],
2025 predecessors: vec![],
2026 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2027 change_id: ChangeId::new(vec![]),
2028 description: "initial".to_string(),
2029 author: signature.clone(),
2030 committer: signature,
2031 secure_sig: None,
2032 };
2033 let commit_id = backend.write_commit(commit, None).block_on().unwrap().0;
2034 let git_refs: Vec<_> = git_repo
2035 .references_glob("refs/jj/keep/*")
2036 .unwrap()
2037 .try_collect()
2038 .unwrap();
2039 assert!(git_refs
2040 .iter()
2041 .any(|git_ref| git_ref.target().unwrap() == git_id(&commit_id)));
2042
2043 // Concurrently-running GC deletes the ref, leaving the extra metadata.
2044 for mut git_ref in git_refs {
2045 git_ref.delete().unwrap();
2046 }
2047 // Re-imported commit should have new ref.
2048 backend.import_head_commits([&commit_id]).unwrap();
2049 let git_refs: Vec<_> = git_repo
2050 .references_glob("refs/jj/keep/*")
2051 .unwrap()
2052 .try_collect()
2053 .unwrap();
2054 assert!(git_refs
2055 .iter()
2056 .any(|git_ref| git_ref.target().unwrap() == git_id(&commit_id)));
2057 }
2058
2059 #[test]
2060 fn import_head_commits_duplicates() {
2061 let settings = user_settings();
2062 let temp_dir = testutils::new_temp_dir();
2063 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2064 let git_repo = backend.open_git_repo().unwrap();
2065
2066 let signature = git2::Signature::now("Someone", "someone@example.com").unwrap();
2067 let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
2068 let empty_tree = git_repo.find_tree(empty_tree_id).unwrap();
2069 let git_commit_id = git_repo
2070 .commit(
2071 Some("refs/heads/main"),
2072 &signature,
2073 &signature,
2074 "git commit message",
2075 &empty_tree,
2076 &[],
2077 )
2078 .unwrap();
2079 let commit_id = CommitId::from_bytes(git_commit_id.as_bytes());
2080
2081 // Ref creation shouldn't fail because of duplicated head ids.
2082 backend
2083 .import_head_commits([&commit_id, &commit_id])
2084 .unwrap();
2085 let git_refs: Vec<_> = git_repo
2086 .references_glob("refs/jj/keep/*")
2087 .unwrap()
2088 .try_collect()
2089 .unwrap();
2090 assert!(git_refs
2091 .iter()
2092 .any(|git_ref| git_ref.target().unwrap() == git_commit_id));
2093 }
2094
2095 #[test]
2096 fn overlapping_git_commit_id() {
2097 let settings = user_settings();
2098 let temp_dir = testutils::new_temp_dir();
2099 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2100 let mut commit1 = Commit {
2101 parents: vec![backend.root_commit_id().clone()],
2102 predecessors: vec![],
2103 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2104 change_id: ChangeId::new(vec![]),
2105 description: "initial".to_string(),
2106 author: create_signature(),
2107 committer: create_signature(),
2108 secure_sig: None,
2109 };
2110
2111 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> {
2112 backend.write_commit(commit, None).block_on()
2113 };
2114
2115 // libgit2 doesn't seem to preserve negative timestamps, so set it to at least 1
2116 // second after the epoch, so the timestamp adjustment can remove 1
2117 // second and it will still be nonnegative
2118 commit1.committer.timestamp.timestamp = MillisSinceEpoch(1000);
2119 let (commit_id1, mut commit2) = write_commit(commit1).unwrap();
2120 commit2.predecessors.push(commit_id1.clone());
2121 // `write_commit` should prevent the ids from being the same by changing the
2122 // committer timestamp of the commit it actually writes.
2123 let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap();
2124 // The returned matches the ID
2125 assert_eq!(
2126 backend.read_commit(&commit_id2).block_on().unwrap(),
2127 actual_commit2
2128 );
2129 assert_ne!(commit_id2, commit_id1);
2130 // The committer timestamp should differ
2131 assert_ne!(
2132 actual_commit2.committer.timestamp.timestamp,
2133 commit2.committer.timestamp.timestamp
2134 );
2135 // The rest of the commit should be the same
2136 actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp;
2137 assert_eq!(actual_commit2, commit2);
2138 }
2139
2140 #[test]
2141 fn write_signed_commit() {
2142 let settings = user_settings();
2143 let temp_dir = testutils::new_temp_dir();
2144 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap();
2145
2146 let commit = Commit {
2147 parents: vec![backend.root_commit_id().clone()],
2148 predecessors: vec![],
2149 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
2150 change_id: ChangeId::new(vec![]),
2151 description: "initial".to_string(),
2152 author: create_signature(),
2153 committer: create_signature(),
2154 secure_sig: None,
2155 };
2156
2157 let mut signer = |data: &_| {
2158 let hash: String = blake2b_hash(data).encode_hex();
2159 Ok(format!("test sig\n\n\nhash={hash}\n").into_bytes())
2160 };
2161
2162 let (id, commit) = backend
2163 .write_commit(commit, Some(&mut signer as &mut SigningFn))
2164 .block_on()
2165 .unwrap();
2166
2167 let git_repo = backend.git_repo();
2168 let obj = git_repo
2169 .find_object(gix::ObjectId::try_from(id.as_bytes()).unwrap())
2170 .unwrap();
2171 insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r###"
2172 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2173 author Someone <someone@example.com> 0 +0000
2174 committer Someone <someone@example.com> 0 +0000
2175 gpgsig test sig
2176
2177
2178 hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2179
2180 initial
2181 "###);
2182
2183 let returned_sig = commit.secure_sig.expect("failed to return the signature");
2184
2185 let commit = backend.read_commit(&id).block_on().unwrap();
2186
2187 let sig = commit.secure_sig.expect("failed to read the signature");
2188 assert_eq!(&sig, &returned_sig);
2189
2190 insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r###"
2191 test sig
2192
2193
2194 hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518
2195 "###);
2196 insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r###"
2197 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904
2198 author Someone <someone@example.com> 0 +0000
2199 committer Someone <someone@example.com> 0 +0000
2200
2201 initial
2202 "###);
2203 }
2204
2205 fn git_id(commit_id: &CommitId) -> Oid {
2206 Oid::from_bytes(commit_id.as_bytes()).unwrap()
2207 }
2208
2209 fn create_signature() -> Signature {
2210 Signature {
2211 name: "Someone".to_string(),
2212 email: "someone@example.com".to_string(),
2213 timestamp: Timestamp {
2214 timestamp: MillisSinceEpoch(0),
2215 tz_offset: 0,
2216 },
2217 }
2218 }
2219
2220 // Not using testutils::user_settings() because there is a dependency cycle
2221 // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct
2222 // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas
2223 // our UserSettings type comes from jj_lib (1).
2224 fn user_settings() -> UserSettings {
2225 let config = StackedConfig::with_defaults();
2226 UserSettings::from_config(config).unwrap()
2227 }
2228}