just playing with tangled
at globpattern 464 lines 17 kB view raw
1// Copyright 2024 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15//! Methods that allow annotation (attribution and blame) for a file in a 16//! repository. 17//! 18//! TODO: Add support for different blame layers with a trait in the future. 19//! Like commit metadata and more. 20 21use std::collections::hash_map; 22use std::collections::HashMap; 23use std::iter; 24use std::ops::Range; 25use std::rc::Rc; 26 27use bstr::BStr; 28use bstr::BString; 29use itertools::Itertools as _; 30use pollster::FutureExt as _; 31 32use crate::backend::BackendError; 33use crate::backend::CommitId; 34use crate::commit::Commit; 35use crate::conflicts::materialize_merge_result_to_bytes; 36use crate::conflicts::materialize_tree_value; 37use crate::conflicts::ConflictMarkerStyle; 38use crate::conflicts::MaterializedTreeValue; 39use crate::diff::Diff; 40use crate::diff::DiffHunkKind; 41use crate::fileset::FilesetExpression; 42use crate::graph::GraphEdge; 43use crate::graph::GraphEdgeType; 44use crate::merged_tree::MergedTree; 45use crate::repo::Repo; 46use crate::repo_path::RepoPath; 47use crate::revset::ResolvedRevsetExpression; 48use crate::revset::RevsetEvaluationError; 49use crate::revset::RevsetExpression; 50use crate::revset::RevsetFilterPredicate; 51use crate::store::Store; 52 53/// Annotation results for a specific file 54#[derive(Clone, Debug)] 55pub struct FileAnnotation { 56 line_map: OriginalLineMap, 57 text: BString, 58} 59 60impl FileAnnotation { 61 /// Returns iterator over `(commit_id, line)`s. 62 /// 63 /// For each line, `Ok(commit_id)` points to the originator commit of the 64 /// line. If no originator commit was found within the domain, 65 /// `Err(commit_id)` should be set. It points to the root (or boundary) 66 /// commit where the line exists. 67 /// 68 /// The `line` includes newline character. 69 pub fn lines(&self) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, &BStr)> { 70 itertools::zip_eq(&self.line_map, self.text.split_inclusive(|b| *b == b'\n')) 71 .map(|(commit_id, line)| (commit_id.as_ref(), line.as_ref())) 72 } 73 74 /// Returns iterator over `(commit_id, line_range)`s. 75 /// 76 /// See [`Self::lines()`] for `commit_id`s. 77 /// 78 /// The `line_range` is a slice range in the file `text`. Consecutive ranges 79 /// having the same `commit_id` are not compacted. 80 pub fn line_ranges( 81 &self, 82 ) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, Range<usize>)> { 83 let ranges = self 84 .text 85 .split_inclusive(|b| *b == b'\n') 86 .scan(0, |total, line| { 87 let start = *total; 88 *total += line.len(); 89 Some(start..*total) 90 }); 91 itertools::zip_eq(&self.line_map, ranges) 92 .map(|(commit_id, range)| (commit_id.as_ref(), range)) 93 } 94 95 /// Returns iterator over compacted `(commit_id, line_range)`s. 96 /// 97 /// Consecutive ranges having the same `commit_id` are merged into one. 98 pub fn compact_line_ranges( 99 &self, 100 ) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, Range<usize>)> { 101 let mut ranges = self.line_ranges(); 102 let mut acc = ranges.next(); 103 iter::from_fn(move || { 104 let (acc_commit_id, acc_range) = acc.as_mut()?; 105 for (cur_commit_id, cur_range) in ranges.by_ref() { 106 if *acc_commit_id == cur_commit_id { 107 acc_range.end = cur_range.end; 108 } else { 109 return acc.replace((cur_commit_id, cur_range)); 110 } 111 } 112 acc.take() 113 }) 114 } 115 116 /// File content at the starting commit. 117 pub fn text(&self) -> &BStr { 118 self.text.as_ref() 119 } 120} 121 122/// A map from commits to file line mappings and contents. 123type CommitSourceMap = HashMap<CommitId, Source>; 124 125/// Line mapping and file content at a certain commit. 126#[derive(Clone, Debug)] 127struct Source { 128 /// Mapping of line numbers in the file at the current commit to the 129 /// original file, sorted by the line numbers at the current commit. 130 line_map: Vec<(usize, usize)>, 131 /// File content at the current commit. 132 text: BString, 133} 134 135impl Source { 136 fn new(text: BString) -> Self { 137 Source { 138 line_map: Vec::new(), 139 text, 140 } 141 } 142 143 fn load(commit: &Commit, file_path: &RepoPath) -> Result<Self, BackendError> { 144 let tree = commit.tree()?; 145 let text = get_file_contents(commit.store(), file_path, &tree)?; 146 Ok(Self::new(text)) 147 } 148 149 fn fill_line_map(&mut self) { 150 let lines = self.text.split_inclusive(|b| *b == b'\n'); 151 self.line_map = lines.enumerate().map(|(i, _)| (i, i)).collect(); 152 } 153} 154 155/// List of commit IDs that originated lines, indexed by line numbers in the 156/// original file. 157type OriginalLineMap = Vec<Result<CommitId, CommitId>>; 158 159/// Get line by line annotations for a specific file path in the repo. 160/// 161/// The `domain` expression narrows the range of ancestors to search. It will be 162/// intersected as `domain & ::starting_commit & files(file_path)`. The 163/// `starting_commit` is assumed to be included in the `domain`. 164/// 165/// If the file is not found, returns empty results. 166pub fn get_annotation_for_file( 167 repo: &dyn Repo, 168 starting_commit: &Commit, 169 domain: &Rc<ResolvedRevsetExpression>, 170 file_path: &RepoPath, 171) -> Result<FileAnnotation, RevsetEvaluationError> { 172 let source = Source::load(starting_commit, file_path)?; 173 compute_file_annotation(repo, starting_commit.id(), domain, file_path, source) 174} 175 176/// Get line by line annotations for a specific file path starting with the 177/// given content. 178/// 179/// The file content at the `starting_commit` is set to `starting_text`. This is 180/// typically one of the file contents in the conflict or merged-parent tree. 181/// 182/// See [`get_annotation_for_file()`] for the other arguments. 183pub fn get_annotation_with_file_content( 184 repo: &dyn Repo, 185 starting_commit_id: &CommitId, 186 domain: &Rc<ResolvedRevsetExpression>, 187 file_path: &RepoPath, 188 starting_text: impl Into<Vec<u8>>, 189) -> Result<FileAnnotation, RevsetEvaluationError> { 190 let source = Source::new(BString::new(starting_text.into())); 191 compute_file_annotation(repo, starting_commit_id, domain, file_path, source) 192} 193 194fn compute_file_annotation( 195 repo: &dyn Repo, 196 starting_commit_id: &CommitId, 197 domain: &Rc<ResolvedRevsetExpression>, 198 file_path: &RepoPath, 199 mut source: Source, 200) -> Result<FileAnnotation, RevsetEvaluationError> { 201 source.fill_line_map(); 202 let text = source.text.clone(); 203 let line_map = process_commits(repo, starting_commit_id, source, domain, file_path)?; 204 Ok(FileAnnotation { line_map, text }) 205} 206 207/// Starting at the starting commit, compute changes at that commit relative to 208/// it's direct parents, updating the mappings as we go. We return the final 209/// original line map that represents where each line of the original came from. 210fn process_commits( 211 repo: &dyn Repo, 212 starting_commit_id: &CommitId, 213 starting_source: Source, 214 domain: &Rc<ResolvedRevsetExpression>, 215 file_name: &RepoPath, 216) -> Result<OriginalLineMap, RevsetEvaluationError> { 217 let predicate = RevsetFilterPredicate::File(FilesetExpression::file_path(file_name.to_owned())); 218 // TODO: If the domain isn't a contiguous range, changes masked out by it 219 // might not be caught by the closest ancestor revision. For example, 220 // domain=merges() would pick up almost nothing because merge revisions 221 // are usually empty. Perhaps, we want to query `files(file_path, 222 // within_sub_graph=domain)`, not `domain & files(file_path)`. 223 let ancestors = RevsetExpression::commit(starting_commit_id.clone()).ancestors(); 224 let revset = RevsetExpression::commit(starting_commit_id.clone()) 225 .union(&domain.intersection(&ancestors).filtered(predicate)) 226 .evaluate(repo)?; 227 228 let mut original_line_map = 229 vec![Err(starting_commit_id.clone()); starting_source.line_map.len()]; 230 let mut commit_source_map = HashMap::from([(starting_commit_id.clone(), starting_source)]); 231 232 for node in revset.iter_graph() { 233 let (commit_id, edge_list) = node?; 234 process_commit( 235 repo, 236 file_name, 237 &mut original_line_map, 238 &mut commit_source_map, 239 &commit_id, 240 &edge_list, 241 )?; 242 if commit_source_map.is_empty() { 243 // No more lines to propagate to ancestors. 244 break; 245 } 246 } 247 Ok(original_line_map) 248} 249 250/// For a given commit, for each parent, we compare the version in the parent 251/// tree with the current version, updating the mappings for any lines in 252/// common. If the parent doesn't have the file, we skip it. 253fn process_commit( 254 repo: &dyn Repo, 255 file_name: &RepoPath, 256 original_line_map: &mut OriginalLineMap, 257 commit_source_map: &mut CommitSourceMap, 258 current_commit_id: &CommitId, 259 edges: &[GraphEdge<CommitId>], 260) -> Result<(), BackendError> { 261 let Some(mut current_source) = commit_source_map.remove(current_commit_id) else { 262 return Ok(()); 263 }; 264 265 for parent_edge in edges { 266 let parent_commit_id = &parent_edge.target; 267 let parent_source = match commit_source_map.entry(parent_commit_id.clone()) { 268 hash_map::Entry::Occupied(entry) => entry.into_mut(), 269 hash_map::Entry::Vacant(entry) => { 270 let commit = repo.store().get_commit(entry.key())?; 271 entry.insert(Source::load(&commit, file_name)?) 272 } 273 }; 274 275 // For two versions of the same file, for all the lines in common, 276 // overwrite the new mapping in the results for the new commit. Let's 277 // say I have a file in commit A and commit B. We know that according to 278 // local line_map, in commit A, line 3 corresponds to line 7 of the 279 // original file. Now, line 3 in Commit A corresponds to line 6 in 280 // commit B. Then, we update local line_map to say that "Commit B line 6 281 // goes to line 7 of the original file". We repeat this for all lines in 282 // common in the two commits. 283 let mut current_lines = current_source.line_map.iter().copied().peekable(); 284 let mut new_current_line_map = Vec::new(); 285 let mut new_parent_line_map = Vec::new(); 286 copy_same_lines_with( 287 &current_source.text, 288 &parent_source.text, 289 |current_start, parent_start, count| { 290 new_current_line_map 291 .extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start)); 292 while let Some((current, original)) = 293 current_lines.next_if(|&(cur, _)| cur < current_start + count) 294 { 295 let parent = parent_start + (current - current_start); 296 new_parent_line_map.push((parent, original)); 297 } 298 }, 299 ); 300 new_current_line_map.extend(current_lines); 301 current_source.line_map = new_current_line_map; 302 parent_source.line_map = if parent_source.line_map.is_empty() { 303 new_parent_line_map 304 } else { 305 itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect() 306 }; 307 // If an omitted parent had the file, leave these lines unresolved. The 308 // origin of the unresolved lines is represented as Err(root_commit_id). 309 if parent_edge.edge_type == GraphEdgeType::Missing { 310 for (_, original_line_number) in parent_source.line_map.drain(..) { 311 original_line_map[original_line_number] = Err(current_commit_id.clone()); 312 } 313 } 314 if parent_source.line_map.is_empty() { 315 commit_source_map.remove(parent_commit_id); 316 } 317 } 318 319 // Once we've looked at all parents of a commit, any leftover lines must be 320 // original to the current commit, so we save this information in 321 // original_line_map. 322 for (_, original_line_number) in current_source.line_map { 323 original_line_map[original_line_number] = Ok(current_commit_id.clone()); 324 } 325 326 Ok(()) 327} 328 329/// For two files, calls `copy(current_start, parent_start, count)` for each 330/// range of contiguous lines in common (e.g. line 8-10 maps to line 9-11.) 331fn copy_same_lines_with( 332 current_contents: &[u8], 333 parent_contents: &[u8], 334 mut copy: impl FnMut(usize, usize, usize), 335) { 336 let diff = Diff::by_line([current_contents, parent_contents]); 337 let mut current_line_counter: usize = 0; 338 let mut parent_line_counter: usize = 0; 339 for hunk in diff.hunks() { 340 match hunk.kind { 341 DiffHunkKind::Matching => { 342 let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count(); 343 copy(current_line_counter, parent_line_counter, count); 344 current_line_counter += count; 345 parent_line_counter += count; 346 } 347 DiffHunkKind::Different => { 348 let current_output = hunk.contents[0]; 349 let parent_output = hunk.contents[1]; 350 current_line_counter += current_output.split_inclusive(|b| *b == b'\n').count(); 351 parent_line_counter += parent_output.split_inclusive(|b| *b == b'\n').count(); 352 } 353 } 354 } 355} 356 357fn get_file_contents( 358 store: &Store, 359 path: &RepoPath, 360 tree: &MergedTree, 361) -> Result<BString, BackendError> { 362 let file_value = tree.path_value(path)?; 363 let effective_file_value = materialize_tree_value(store, path, file_value).block_on()?; 364 match effective_file_value { 365 MaterializedTreeValue::File { mut reader, id, .. } => { 366 let mut file_contents = Vec::new(); 367 reader 368 .read_to_end(&mut file_contents) 369 .map_err(|e| BackendError::ReadFile { 370 path: path.to_owned(), 371 id, 372 source: Box::new(e), 373 })?; 374 Ok(file_contents.into()) 375 } 376 MaterializedTreeValue::FileConflict { contents, .. } => Ok( 377 materialize_merge_result_to_bytes(&contents, ConflictMarkerStyle::default()), 378 ), 379 _ => Ok(BString::default()), 380 } 381} 382 383#[cfg(test)] 384mod tests { 385 use super::*; 386 387 #[test] 388 fn test_lines_iterator_empty() { 389 let annotation = FileAnnotation { 390 line_map: vec![], 391 text: "".into(), 392 }; 393 assert_eq!(annotation.lines().collect_vec(), vec![]); 394 assert_eq!(annotation.line_ranges().collect_vec(), vec![]); 395 assert_eq!(annotation.compact_line_ranges().collect_vec(), vec![]); 396 } 397 398 #[test] 399 fn test_lines_iterator_with_content() { 400 let commit_id1 = CommitId::from_hex("111111"); 401 let commit_id2 = CommitId::from_hex("222222"); 402 let commit_id3 = CommitId::from_hex("333333"); 403 let annotation = FileAnnotation { 404 line_map: vec![ 405 Ok(commit_id1.clone()), 406 Ok(commit_id2.clone()), 407 Ok(commit_id3.clone()), 408 ], 409 text: "foo\n\nbar\n".into(), 410 }; 411 assert_eq!( 412 annotation.lines().collect_vec(), 413 vec![ 414 (Ok(&commit_id1), "foo\n".as_ref()), 415 (Ok(&commit_id2), "\n".as_ref()), 416 (Ok(&commit_id3), "bar\n".as_ref()), 417 ] 418 ); 419 assert_eq!( 420 annotation.line_ranges().collect_vec(), 421 vec![ 422 (Ok(&commit_id1), 0..4), 423 (Ok(&commit_id2), 4..5), 424 (Ok(&commit_id3), 5..9), 425 ] 426 ); 427 assert_eq!( 428 annotation.compact_line_ranges().collect_vec(), 429 vec![ 430 (Ok(&commit_id1), 0..4), 431 (Ok(&commit_id2), 4..5), 432 (Ok(&commit_id3), 5..9), 433 ] 434 ); 435 } 436 437 #[test] 438 fn test_lines_iterator_compaction() { 439 let commit_id1 = CommitId::from_hex("111111"); 440 let commit_id2 = CommitId::from_hex("222222"); 441 let commit_id3 = CommitId::from_hex("333333"); 442 let annotation = FileAnnotation { 443 line_map: vec![ 444 Ok(commit_id1.clone()), 445 Ok(commit_id1.clone()), 446 Ok(commit_id2.clone()), 447 Ok(commit_id1.clone()), 448 Ok(commit_id3.clone()), 449 Ok(commit_id3.clone()), 450 Ok(commit_id3.clone()), 451 ], 452 text: "\n".repeat(7).into(), 453 }; 454 assert_eq!( 455 annotation.compact_line_ranges().collect_vec(), 456 vec![ 457 (Ok(&commit_id1), 0..2), 458 (Ok(&commit_id2), 2..3), 459 (Ok(&commit_id1), 3..4), 460 (Ok(&commit_id3), 4..7), 461 ] 462 ); 463 } 464}