just playing with tangled
at globpattern 599 lines 21 kB view raw
1// Copyright 2023 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#![allow(missing_docs)] 16 17use std::any::Any; 18use std::cmp::max; 19use std::collections::BTreeMap; 20use std::collections::HashMap; 21use std::io; 22use std::io::Write as _; 23use std::ops::Bound; 24use std::path::Path; 25use std::sync::Arc; 26 27use blake2::Blake2b512; 28use digest::Digest as _; 29use itertools::Itertools as _; 30use smallvec::smallvec; 31use smallvec::SmallVec; 32use tempfile::NamedTempFile; 33 34use super::composite::AsCompositeIndex; 35use super::composite::ChangeIdIndexImpl; 36use super::composite::CompositeIndex; 37use super::composite::DynIndexSegment; 38use super::composite::IndexSegment; 39use super::entry::IndexPosition; 40use super::entry::LocalPosition; 41use super::entry::SmallIndexPositionsVec; 42use super::entry::SmallLocalPositionsVec; 43use super::readonly::DefaultReadonlyIndex; 44use super::readonly::ReadonlyIndexSegment; 45use super::readonly::INDEX_SEGMENT_FILE_FORMAT_VERSION; 46use super::readonly::OVERFLOW_FLAG; 47use crate::backend::ChangeId; 48use crate::backend::CommitId; 49use crate::commit::Commit; 50use crate::file_util::persist_content_addressed_temp_file; 51use crate::index::AllHeadsForGcUnsupported; 52use crate::index::ChangeIdIndex; 53use crate::index::Index; 54use crate::index::IndexError; 55use crate::index::MutableIndex; 56use crate::index::ReadonlyIndex; 57use crate::object_id::HexPrefix; 58use crate::object_id::ObjectId; 59use crate::object_id::PrefixResolution; 60use crate::revset::ResolvedExpression; 61use crate::revset::Revset; 62use crate::revset::RevsetEvaluationError; 63use crate::store::Store; 64 65#[derive(Debug)] 66struct MutableGraphEntry { 67 commit_id: CommitId, 68 change_id: ChangeId, 69 generation_number: u32, 70 parent_positions: SmallIndexPositionsVec, 71} 72 73pub(super) struct MutableIndexSegment { 74 parent_file: Option<Arc<ReadonlyIndexSegment>>, 75 num_parent_commits: u32, 76 commit_id_length: usize, 77 change_id_length: usize, 78 graph: Vec<MutableGraphEntry>, 79 commit_lookup: BTreeMap<CommitId, LocalPosition>, 80 change_lookup: BTreeMap<ChangeId, SmallLocalPositionsVec>, 81} 82 83impl MutableIndexSegment { 84 pub(super) fn full(commit_id_length: usize, change_id_length: usize) -> Self { 85 Self { 86 parent_file: None, 87 num_parent_commits: 0, 88 commit_id_length, 89 change_id_length, 90 graph: vec![], 91 commit_lookup: BTreeMap::new(), 92 change_lookup: BTreeMap::new(), 93 } 94 } 95 96 pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self { 97 let num_parent_commits = parent_file.as_composite().num_commits(); 98 let commit_id_length = parent_file.commit_id_length(); 99 let change_id_length = parent_file.change_id_length(); 100 Self { 101 parent_file: Some(parent_file), 102 num_parent_commits, 103 commit_id_length, 104 change_id_length, 105 graph: vec![], 106 commit_lookup: BTreeMap::new(), 107 change_lookup: BTreeMap::new(), 108 } 109 } 110 111 pub(super) fn as_composite(&self) -> &CompositeIndex { 112 CompositeIndex::new(self) 113 } 114 115 pub(super) fn add_commit(&mut self, commit: &Commit) { 116 self.add_commit_data( 117 commit.id().clone(), 118 commit.change_id().clone(), 119 commit.parent_ids(), 120 ); 121 } 122 123 pub(super) fn add_commit_data( 124 &mut self, 125 commit_id: CommitId, 126 change_id: ChangeId, 127 parent_ids: &[CommitId], 128 ) { 129 if self.as_composite().has_id(&commit_id) { 130 return; 131 } 132 let mut entry = MutableGraphEntry { 133 commit_id, 134 change_id, 135 generation_number: 0, 136 parent_positions: SmallVec::new(), 137 }; 138 for parent_id in parent_ids { 139 let parent_entry = self 140 .as_composite() 141 .entry_by_id(parent_id) 142 .expect("parent commit is not indexed"); 143 entry.generation_number = max( 144 entry.generation_number, 145 parent_entry.generation_number() + 1, 146 ); 147 entry.parent_positions.push(parent_entry.position()); 148 } 149 let local_pos = LocalPosition(u32::try_from(self.graph.len()).unwrap()); 150 self.commit_lookup 151 .insert(entry.commit_id.clone(), local_pos); 152 self.change_lookup 153 .entry(entry.change_id.clone()) 154 // positions are inherently sorted 155 .and_modify(|positions| positions.push(local_pos)) 156 .or_insert(smallvec![local_pos]); 157 self.graph.push(entry); 158 } 159 160 pub(super) fn add_commits_from(&mut self, other_segment: &DynIndexSegment) { 161 let other = CompositeIndex::new(other_segment); 162 for pos in other_segment.num_parent_commits()..other.num_commits() { 163 let entry = other.entry_by_pos(IndexPosition(pos)); 164 let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec(); 165 self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids); 166 } 167 } 168 169 pub(super) fn merge_in(&mut self, other: Arc<ReadonlyIndexSegment>) { 170 let mut maybe_own_ancestor = self.parent_file.clone(); 171 let mut maybe_other_ancestor = Some(other); 172 let mut files_to_add = vec![]; 173 loop { 174 if maybe_other_ancestor.is_none() { 175 break; 176 } 177 let other_ancestor = maybe_other_ancestor.as_ref().unwrap(); 178 if maybe_own_ancestor.is_none() { 179 files_to_add.push(other_ancestor.clone()); 180 maybe_other_ancestor = other_ancestor.parent_file().cloned(); 181 continue; 182 } 183 let own_ancestor = maybe_own_ancestor.as_ref().unwrap(); 184 if own_ancestor.name() == other_ancestor.name() { 185 break; 186 } 187 if own_ancestor.as_composite().num_commits() 188 < other_ancestor.as_composite().num_commits() 189 { 190 files_to_add.push(other_ancestor.clone()); 191 maybe_other_ancestor = other_ancestor.parent_file().cloned(); 192 } else { 193 maybe_own_ancestor = own_ancestor.parent_file().cloned(); 194 } 195 } 196 197 for file in files_to_add.iter().rev() { 198 self.add_commits_from(file.as_ref()); 199 } 200 } 201 202 fn serialize_parent_filename(&self, buf: &mut Vec<u8>) { 203 if let Some(parent_file) = &self.parent_file { 204 buf.extend( 205 u32::try_from(parent_file.name().len()) 206 .unwrap() 207 .to_le_bytes(), 208 ); 209 buf.extend_from_slice(parent_file.name().as_bytes()); 210 } else { 211 buf.extend(0_u32.to_le_bytes()); 212 } 213 } 214 215 fn serialize_local_entries(&self, buf: &mut Vec<u8>) { 216 assert_eq!(self.graph.len(), self.commit_lookup.len()); 217 debug_assert_eq!( 218 self.graph.len(), 219 self.change_lookup.values().flatten().count() 220 ); 221 222 let num_commits = u32::try_from(self.graph.len()).unwrap(); 223 buf.extend(num_commits.to_le_bytes()); 224 let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap(); 225 buf.extend(num_change_ids.to_le_bytes()); 226 // We'll write the actual values later 227 let parent_overflow_offset = buf.len(); 228 buf.extend(0_u32.to_le_bytes()); 229 let change_overflow_offset = buf.len(); 230 buf.extend(0_u32.to_le_bytes()); 231 232 // Positions of change ids in the sorted table 233 let change_id_pos_map: HashMap<&ChangeId, u32> = self 234 .change_lookup 235 .keys() 236 .enumerate() 237 .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap())) 238 .collect(); 239 240 let mut parent_overflow = vec![]; 241 for entry in &self.graph { 242 buf.extend(entry.generation_number.to_le_bytes()); 243 244 match entry.parent_positions.as_slice() { 245 [] => { 246 buf.extend((!0_u32).to_le_bytes()); 247 buf.extend((!0_u32).to_le_bytes()); 248 } 249 [IndexPosition(pos1)] => { 250 assert!(*pos1 < OVERFLOW_FLAG); 251 buf.extend(pos1.to_le_bytes()); 252 buf.extend((!0_u32).to_le_bytes()); 253 } 254 [IndexPosition(pos1), IndexPosition(pos2)] => { 255 assert!(*pos1 < OVERFLOW_FLAG); 256 assert!(*pos2 < OVERFLOW_FLAG); 257 buf.extend(pos1.to_le_bytes()); 258 buf.extend(pos2.to_le_bytes()); 259 } 260 positions => { 261 let overflow_pos = u32::try_from(parent_overflow.len()).unwrap(); 262 let num_parents = u32::try_from(positions.len()).unwrap(); 263 assert!(overflow_pos < OVERFLOW_FLAG); 264 assert!(num_parents < OVERFLOW_FLAG); 265 buf.extend((!overflow_pos).to_le_bytes()); 266 buf.extend((!num_parents).to_le_bytes()); 267 parent_overflow.extend_from_slice(positions); 268 } 269 } 270 271 buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes()); 272 273 assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length); 274 buf.extend_from_slice(entry.commit_id.as_bytes()); 275 } 276 277 for LocalPosition(pos) in self.commit_lookup.values() { 278 buf.extend(pos.to_le_bytes()); 279 } 280 281 for change_id in self.change_lookup.keys() { 282 assert_eq!(change_id.as_bytes().len(), self.change_id_length); 283 buf.extend_from_slice(change_id.as_bytes()); 284 } 285 286 let mut change_overflow = vec![]; 287 for positions in self.change_lookup.values() { 288 match positions.as_slice() { 289 [] => panic!("change id lookup entry must not be empty"), 290 // Optimize for imported commits 291 [LocalPosition(pos1)] => { 292 assert!(*pos1 < OVERFLOW_FLAG); 293 buf.extend(pos1.to_le_bytes()); 294 } 295 positions => { 296 let overflow_pos = u32::try_from(change_overflow.len()).unwrap(); 297 assert!(overflow_pos < OVERFLOW_FLAG); 298 buf.extend((!overflow_pos).to_le_bytes()); 299 change_overflow.extend_from_slice(positions); 300 } 301 } 302 } 303 304 let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap(); 305 buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes()); 306 for IndexPosition(pos) in parent_overflow { 307 buf.extend(pos.to_le_bytes()); 308 } 309 310 let num_change_overflow = u32::try_from(change_overflow.len()).unwrap(); 311 buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes()); 312 for LocalPosition(pos) in change_overflow { 313 buf.extend(pos.to_le_bytes()); 314 } 315 } 316 317 /// If the MutableIndex has more than half the commits of its parent 318 /// ReadonlyIndex, return MutableIndex with the commits from both. This 319 /// is done recursively, so the stack of index files has O(log n) files. 320 fn maybe_squash_with_ancestors(self) -> MutableIndexSegment { 321 let mut num_new_commits = self.num_local_commits(); 322 let mut files_to_squash = vec![]; 323 let mut base_parent_file = None; 324 for parent_file in self.as_composite().ancestor_files_without_local() { 325 // TODO: We should probably also squash if the parent file has less than N 326 // commits, regardless of how many (few) are in `self`. 327 if 2 * num_new_commits < parent_file.num_local_commits() { 328 base_parent_file = Some(parent_file.clone()); 329 break; 330 } 331 num_new_commits += parent_file.num_local_commits(); 332 files_to_squash.push(parent_file.clone()); 333 } 334 335 if files_to_squash.is_empty() { 336 return self; 337 } 338 339 let mut squashed = if let Some(parent_file) = base_parent_file { 340 MutableIndexSegment::incremental(parent_file) 341 } else { 342 MutableIndexSegment::full(self.commit_id_length, self.change_id_length) 343 }; 344 for parent_file in files_to_squash.iter().rev() { 345 squashed.add_commits_from(parent_file.as_ref()); 346 } 347 squashed.add_commits_from(&self); 348 squashed 349 } 350 351 pub(super) fn save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> { 352 if self.num_local_commits() == 0 && self.parent_file.is_some() { 353 return Ok(self.parent_file.unwrap()); 354 } 355 356 let mut buf = Vec::new(); 357 buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes()); 358 self.serialize_parent_filename(&mut buf); 359 let local_entries_offset = buf.len(); 360 self.serialize_local_entries(&mut buf); 361 let mut hasher = Blake2b512::new(); 362 hasher.update(&buf); 363 let index_file_id_hex = hex::encode(hasher.finalize()); 364 let index_file_path = dir.join(&index_file_id_hex); 365 366 let mut temp_file = NamedTempFile::new_in(dir)?; 367 let file = temp_file.as_file_mut(); 368 file.write_all(&buf)?; 369 persist_content_addressed_temp_file(temp_file, index_file_path)?; 370 371 Ok(ReadonlyIndexSegment::load_with_parent_file( 372 &mut &buf[local_entries_offset..], 373 index_file_id_hex, 374 self.parent_file, 375 self.commit_id_length, 376 self.change_id_length, 377 ) 378 .expect("in-memory index data should be valid and readable")) 379 } 380} 381 382impl IndexSegment for MutableIndexSegment { 383 fn num_parent_commits(&self) -> u32 { 384 self.num_parent_commits 385 } 386 387 fn num_local_commits(&self) -> u32 { 388 self.graph.len().try_into().unwrap() 389 } 390 391 fn parent_file(&self) -> Option<&Arc<ReadonlyIndexSegment>> { 392 self.parent_file.as_ref() 393 } 394 395 fn name(&self) -> Option<String> { 396 None 397 } 398 399 fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition> { 400 self.commit_lookup.get(commit_id).copied() 401 } 402 403 fn resolve_neighbor_commit_ids( 404 &self, 405 commit_id: &CommitId, 406 ) -> (Option<CommitId>, Option<CommitId>) { 407 let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id); 408 (prev_id.cloned(), next_id.cloned()) 409 } 410 411 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> { 412 let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); 413 resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone()) 414 } 415 416 fn resolve_neighbor_change_ids( 417 &self, 418 change_id: &ChangeId, 419 ) -> (Option<ChangeId>, Option<ChangeId>) { 420 let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id); 421 (prev_id.cloned(), next_id.cloned()) 422 } 423 424 fn resolve_change_id_prefix( 425 &self, 426 prefix: &HexPrefix, 427 ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> { 428 let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes()); 429 resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix) 430 .map(|(id, positions)| (id.clone(), positions.clone())) 431 } 432 433 fn generation_number(&self, local_pos: LocalPosition) -> u32 { 434 self.graph[local_pos.0 as usize].generation_number 435 } 436 437 fn commit_id(&self, local_pos: LocalPosition) -> CommitId { 438 self.graph[local_pos.0 as usize].commit_id.clone() 439 } 440 441 fn change_id(&self, local_pos: LocalPosition) -> ChangeId { 442 self.graph[local_pos.0 as usize].change_id.clone() 443 } 444 445 fn num_parents(&self, local_pos: LocalPosition) -> u32 { 446 self.graph[local_pos.0 as usize] 447 .parent_positions 448 .len() 449 .try_into() 450 .unwrap() 451 } 452 453 fn parent_positions(&self, local_pos: LocalPosition) -> SmallIndexPositionsVec { 454 self.graph[local_pos.0 as usize].parent_positions.clone() 455 } 456} 457 458/// In-memory mutable records for the on-disk commit index backend. 459pub struct DefaultMutableIndex(MutableIndexSegment); 460 461impl DefaultMutableIndex { 462 pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self { 463 let mutable_segment = MutableIndexSegment::full(commit_id_length, change_id_length); 464 DefaultMutableIndex(mutable_segment) 465 } 466 467 pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self { 468 let mutable_segment = MutableIndexSegment::incremental(parent_file); 469 DefaultMutableIndex(mutable_segment) 470 } 471 472 #[cfg(test)] 473 pub(crate) fn add_commit_data( 474 &mut self, 475 commit_id: CommitId, 476 change_id: ChangeId, 477 parent_ids: &[CommitId], 478 ) { 479 self.0.add_commit_data(commit_id, change_id, parent_ids); 480 } 481 482 pub(super) fn squash_and_save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> { 483 self.0.maybe_squash_with_ancestors().save_in(dir) 484 } 485} 486 487impl AsCompositeIndex for DefaultMutableIndex { 488 fn as_composite(&self) -> &CompositeIndex { 489 self.0.as_composite() 490 } 491} 492 493impl Index for DefaultMutableIndex { 494 fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { 495 self.as_composite() 496 .shortest_unique_commit_id_prefix_len(commit_id) 497 } 498 499 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> { 500 self.as_composite().resolve_commit_id_prefix(prefix) 501 } 502 503 fn has_id(&self, commit_id: &CommitId) -> bool { 504 self.as_composite().has_id(commit_id) 505 } 506 507 fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { 508 self.as_composite().is_ancestor(ancestor_id, descendant_id) 509 } 510 511 fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> { 512 self.as_composite().common_ancestors(set1, set2) 513 } 514 515 fn all_heads_for_gc( 516 &self, 517 ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> { 518 Ok(Box::new(self.as_composite().all_heads())) 519 } 520 521 fn heads( 522 &self, 523 candidates: &mut dyn Iterator<Item = &CommitId>, 524 ) -> Result<Vec<CommitId>, IndexError> { 525 self.as_composite().heads(candidates) 526 } 527 528 fn evaluate_revset<'index>( 529 &'index self, 530 expression: &ResolvedExpression, 531 store: &Arc<Store>, 532 ) -> Result<Box<dyn Revset + 'index>, RevsetEvaluationError> { 533 self.as_composite().evaluate_revset(expression, store) 534 } 535} 536 537impl MutableIndex for DefaultMutableIndex { 538 fn as_any(&self) -> &dyn Any { 539 self 540 } 541 542 fn into_any(self: Box<Self>) -> Box<dyn Any> { 543 Box::new(*self) 544 } 545 546 fn as_index(&self) -> &dyn Index { 547 self 548 } 549 550 fn change_id_index( 551 &self, 552 heads: &mut dyn Iterator<Item = &CommitId>, 553 ) -> Box<dyn ChangeIdIndex + '_> { 554 Box::new(ChangeIdIndexImpl::new(self, heads)) 555 } 556 557 fn add_commit(&mut self, commit: &Commit) { 558 self.0.add_commit(commit); 559 } 560 561 fn merge_in(&mut self, other: &dyn ReadonlyIndex) { 562 let other = other 563 .as_any() 564 .downcast_ref::<DefaultReadonlyIndex>() 565 .expect("index to merge in must be a DefaultReadonlyIndex"); 566 self.0.merge_in(other.as_segment().clone()); 567 } 568} 569 570fn resolve_neighbor_ids<'a, K: Ord, V>( 571 lookup_table: &'a BTreeMap<K, V>, 572 id: &K, 573) -> (Option<&'a K>, Option<&'a K>) { 574 let prev_id = lookup_table 575 .range((Bound::Unbounded, Bound::Excluded(id))) 576 .next_back() 577 .map(|(id, _)| id); 578 let next_id = lookup_table 579 .range((Bound::Excluded(id), Bound::Unbounded)) 580 .next() 581 .map(|(id, _)| id); 582 (prev_id, next_id) 583} 584 585fn resolve_id_prefix<'a, K: ObjectId + Ord, V>( 586 lookup_table: &'a BTreeMap<K, V>, 587 prefix: &HexPrefix, 588 min_bytes_prefix: &K, 589) -> PrefixResolution<(&'a K, &'a V)> { 590 let mut matches = lookup_table 591 .range((Bound::Included(min_bytes_prefix), Bound::Unbounded)) 592 .take_while(|&(id, _)| prefix.matches(id)) 593 .fuse(); 594 match (matches.next(), matches.next()) { 595 (Some(entry), None) => PrefixResolution::SingleMatch(entry), 596 (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, 597 (None, _) => PrefixResolution::NoMatch, 598 } 599}