just playing with tangled
at docs-prerelease 450 lines 17 kB view raw
1// Copyright 2024 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15//! Methods that allow annotation (attribution and blame) for a file in a 16//! repository. 17//! 18//! TODO: Add support for different blame layers with a trait in the future. 19//! Like commit metadata and more. 20 21use std::collections::hash_map; 22use std::collections::HashMap; 23use std::iter; 24use std::ops::Range; 25use std::rc::Rc; 26 27use bstr::BStr; 28use bstr::BString; 29use itertools::Itertools as _; 30use pollster::FutureExt; 31 32use crate::backend::BackendError; 33use crate::backend::CommitId; 34use crate::commit::Commit; 35use crate::conflicts::materialize_merge_result_to_bytes; 36use crate::conflicts::materialize_tree_value; 37use crate::conflicts::ConflictMarkerStyle; 38use crate::conflicts::MaterializedTreeValue; 39use crate::diff::Diff; 40use crate::diff::DiffHunkKind; 41use crate::fileset::FilesetExpression; 42use crate::graph::GraphEdge; 43use crate::graph::GraphEdgeType; 44use crate::merged_tree::MergedTree; 45use crate::repo::Repo; 46use crate::repo_path::RepoPath; 47use crate::revset::ResolvedRevsetExpression; 48use crate::revset::RevsetEvaluationError; 49use crate::revset::RevsetExpression; 50use crate::revset::RevsetFilterPredicate; 51use crate::store::Store; 52 53/// Annotation results for a specific file 54#[derive(Clone, Debug)] 55pub struct FileAnnotation { 56 line_map: OriginalLineMap, 57 text: BString, 58} 59 60impl FileAnnotation { 61 /// Returns iterator over `(commit_id, line)`s. 62 /// 63 /// For each line, the `commit_id` points to the originator commit of the 64 /// line. The `line` includes newline character. 65 pub fn lines(&self) -> impl Iterator<Item = (Option<&CommitId>, &BStr)> { 66 itertools::zip_eq(&self.line_map, self.text.split_inclusive(|b| *b == b'\n')) 67 .map(|(commit_id, line)| (commit_id.as_ref(), line.as_ref())) 68 } 69 70 /// Returns iterator over `(commit_id, line_range)`s. 71 /// 72 /// For each line, the `commit_id` points to the originator commit of the 73 /// line. The `line_range` is a slice range in the file `text`. Consecutive 74 /// ranges having the same `commit_id` are not compacted. 75 pub fn line_ranges(&self) -> impl Iterator<Item = (Option<&CommitId>, Range<usize>)> { 76 let ranges = self 77 .text 78 .split_inclusive(|b| *b == b'\n') 79 .scan(0, |total, line| { 80 let start = *total; 81 *total += line.len(); 82 Some(start..*total) 83 }); 84 itertools::zip_eq(&self.line_map, ranges) 85 .map(|(commit_id, range)| (commit_id.as_ref(), range)) 86 } 87 88 /// Returns iterator over compacted `(commit_id, line_range)`s. 89 /// 90 /// Consecutive ranges having the same `commit_id` are merged into one. 91 pub fn compact_line_ranges(&self) -> impl Iterator<Item = (Option<&CommitId>, Range<usize>)> { 92 let mut ranges = self.line_ranges(); 93 let mut acc = ranges.next(); 94 iter::from_fn(move || { 95 let (acc_commit_id, acc_range) = acc.as_mut()?; 96 for (cur_commit_id, cur_range) in ranges.by_ref() { 97 if *acc_commit_id == cur_commit_id { 98 acc_range.end = cur_range.end; 99 } else { 100 return acc.replace((cur_commit_id, cur_range)); 101 } 102 } 103 acc.take() 104 }) 105 } 106 107 /// File content at the starting commit. 108 pub fn text(&self) -> &BStr { 109 self.text.as_ref() 110 } 111} 112 113/// A map from commits to file line mappings and contents. 114type CommitSourceMap = HashMap<CommitId, Source>; 115 116/// Line mapping and file content at a certain commit. 117#[derive(Clone, Debug)] 118struct Source { 119 /// Mapping of line numbers in the file at the current commit to the 120 /// original file, sorted by the line numbers at the current commit. 121 line_map: Vec<(usize, usize)>, 122 /// File content at the current commit. 123 text: BString, 124} 125 126impl Source { 127 fn new(text: BString) -> Self { 128 Source { 129 line_map: Vec::new(), 130 text, 131 } 132 } 133 134 fn load(commit: &Commit, file_path: &RepoPath) -> Result<Self, BackendError> { 135 let tree = commit.tree()?; 136 let text = get_file_contents(commit.store(), file_path, &tree)?; 137 Ok(Self::new(text)) 138 } 139 140 fn fill_line_map(&mut self) { 141 let lines = self.text.split_inclusive(|b| *b == b'\n'); 142 self.line_map = lines.enumerate().map(|(i, _)| (i, i)).collect(); 143 } 144} 145 146/// List of commit IDs that originated lines, indexed by line numbers in the 147/// original file. 148type OriginalLineMap = Vec<Option<CommitId>>; 149 150/// Get line by line annotations for a specific file path in the repo. 151/// 152/// The `domain` expression narrows the range of ancestors to search. It will be 153/// intersected as `domain & ::starting_commit & files(file_path)`. The 154/// `starting_commit` is assumed to be included in the `domain`. 155/// 156/// If the file is not found, returns empty results. 157pub fn get_annotation_for_file( 158 repo: &dyn Repo, 159 starting_commit: &Commit, 160 domain: &Rc<ResolvedRevsetExpression>, 161 file_path: &RepoPath, 162) -> Result<FileAnnotation, RevsetEvaluationError> { 163 let source = Source::load(starting_commit, file_path)?; 164 compute_file_annotation(repo, starting_commit.id(), domain, file_path, source) 165} 166 167/// Get line by line annotations for a specific file path starting with the 168/// given content. 169/// 170/// The file content at the `starting_commit` is set to `starting_text`. This is 171/// typically one of the file contents in the conflict or merged-parent tree. 172/// 173/// See [`get_annotation_for_file()`] for the other arguments. 174pub fn get_annotation_with_file_content( 175 repo: &dyn Repo, 176 starting_commit_id: &CommitId, 177 domain: &Rc<ResolvedRevsetExpression>, 178 file_path: &RepoPath, 179 starting_text: impl Into<Vec<u8>>, 180) -> Result<FileAnnotation, RevsetEvaluationError> { 181 let source = Source::new(BString::new(starting_text.into())); 182 compute_file_annotation(repo, starting_commit_id, domain, file_path, source) 183} 184 185fn compute_file_annotation( 186 repo: &dyn Repo, 187 starting_commit_id: &CommitId, 188 domain: &Rc<ResolvedRevsetExpression>, 189 file_path: &RepoPath, 190 mut source: Source, 191) -> Result<FileAnnotation, RevsetEvaluationError> { 192 source.fill_line_map(); 193 let text = source.text.clone(); 194 let line_map = process_commits(repo, starting_commit_id, source, domain, file_path)?; 195 Ok(FileAnnotation { line_map, text }) 196} 197 198/// Starting at the starting commit, compute changes at that commit relative to 199/// it's direct parents, updating the mappings as we go. We return the final 200/// original line map that represents where each line of the original came from. 201fn process_commits( 202 repo: &dyn Repo, 203 starting_commit_id: &CommitId, 204 starting_source: Source, 205 domain: &Rc<ResolvedRevsetExpression>, 206 file_name: &RepoPath, 207) -> Result<OriginalLineMap, RevsetEvaluationError> { 208 let predicate = RevsetFilterPredicate::File(FilesetExpression::file_path(file_name.to_owned())); 209 // TODO: If the domain isn't a contiguous range, changes masked out by it 210 // might not be caught by the closest ancestor revision. For example, 211 // domain=merges() would pick up almost nothing because merge revisions 212 // are usually empty. Perhaps, we want to query `files(file_path, 213 // within_sub_graph=domain)`, not `domain & files(file_path)`. 214 let ancestors = RevsetExpression::commit(starting_commit_id.clone()).ancestors(); 215 let revset = RevsetExpression::commit(starting_commit_id.clone()) 216 .union(&domain.intersection(&ancestors).filtered(predicate)) 217 .evaluate(repo)?; 218 219 let mut original_line_map = vec![None; starting_source.line_map.len()]; 220 let mut commit_source_map = HashMap::from([(starting_commit_id.clone(), starting_source)]); 221 222 for node in revset.iter_graph() { 223 let (commit_id, edge_list) = node?; 224 process_commit( 225 repo, 226 file_name, 227 &mut original_line_map, 228 &mut commit_source_map, 229 &commit_id, 230 &edge_list, 231 )?; 232 if commit_source_map.is_empty() { 233 // No more lines to propagate to ancestors. 234 break; 235 } 236 } 237 Ok(original_line_map) 238} 239 240/// For a given commit, for each parent, we compare the version in the parent 241/// tree with the current version, updating the mappings for any lines in 242/// common. If the parent doesn't have the file, we skip it. 243fn process_commit( 244 repo: &dyn Repo, 245 file_name: &RepoPath, 246 original_line_map: &mut OriginalLineMap, 247 commit_source_map: &mut CommitSourceMap, 248 current_commit_id: &CommitId, 249 edges: &[GraphEdge<CommitId>], 250) -> Result<(), BackendError> { 251 let Some(mut current_source) = commit_source_map.remove(current_commit_id) else { 252 return Ok(()); 253 }; 254 255 for parent_edge in edges { 256 let parent_commit_id = &parent_edge.target; 257 let parent_source = match commit_source_map.entry(parent_commit_id.clone()) { 258 hash_map::Entry::Occupied(entry) => entry.into_mut(), 259 hash_map::Entry::Vacant(entry) => { 260 let commit = repo.store().get_commit(entry.key())?; 261 entry.insert(Source::load(&commit, file_name)?) 262 } 263 }; 264 265 // For two versions of the same file, for all the lines in common, 266 // overwrite the new mapping in the results for the new commit. Let's 267 // say I have a file in commit A and commit B. We know that according to 268 // local line_map, in commit A, line 3 corresponds to line 7 of the 269 // original file. Now, line 3 in Commit A corresponds to line 6 in 270 // commit B. Then, we update local line_map to say that "Commit B line 6 271 // goes to line 7 of the original file". We repeat this for all lines in 272 // common in the two commits. 273 let mut current_lines = current_source.line_map.iter().copied().peekable(); 274 let mut new_current_line_map = Vec::new(); 275 let mut new_parent_line_map = Vec::new(); 276 copy_same_lines_with( 277 &current_source.text, 278 &parent_source.text, 279 |current_start, parent_start, count| { 280 new_current_line_map 281 .extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start)); 282 while let Some((current, original)) = 283 current_lines.next_if(|&(cur, _)| cur < current_start + count) 284 { 285 let parent = parent_start + (current - current_start); 286 new_parent_line_map.push((parent, original)); 287 } 288 }, 289 ); 290 new_current_line_map.extend(current_lines); 291 current_source.line_map = new_current_line_map; 292 parent_source.line_map = if parent_source.line_map.is_empty() { 293 new_parent_line_map 294 } else { 295 itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect() 296 }; 297 // If an omitted parent had the file, leave these lines unresolved. 298 // TODO: These unresolved lines could be copied to the original_line_map 299 // as Err(commit_id) or something instead of None. 300 if parent_source.line_map.is_empty() || parent_edge.edge_type == GraphEdgeType::Missing { 301 commit_source_map.remove(parent_commit_id); 302 } 303 } 304 305 // Once we've looked at all parents of a commit, any leftover lines must be 306 // original to the current commit, so we save this information in 307 // original_line_map. 308 for (_, original_line_number) in current_source.line_map { 309 original_line_map[original_line_number] = Some(current_commit_id.clone()); 310 } 311 312 Ok(()) 313} 314 315/// For two files, calls `copy(current_start, parent_start, count)` for each 316/// range of contiguous lines in common (e.g. line 8-10 maps to line 9-11.) 317fn copy_same_lines_with( 318 current_contents: &[u8], 319 parent_contents: &[u8], 320 mut copy: impl FnMut(usize, usize, usize), 321) { 322 let diff = Diff::by_line([current_contents, parent_contents]); 323 let mut current_line_counter: usize = 0; 324 let mut parent_line_counter: usize = 0; 325 for hunk in diff.hunks() { 326 match hunk.kind { 327 DiffHunkKind::Matching => { 328 let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count(); 329 copy(current_line_counter, parent_line_counter, count); 330 current_line_counter += count; 331 parent_line_counter += count; 332 } 333 DiffHunkKind::Different => { 334 let current_output = hunk.contents[0]; 335 let parent_output = hunk.contents[1]; 336 current_line_counter += current_output.split_inclusive(|b| *b == b'\n').count(); 337 parent_line_counter += parent_output.split_inclusive(|b| *b == b'\n').count(); 338 } 339 } 340 } 341} 342 343fn get_file_contents( 344 store: &Store, 345 path: &RepoPath, 346 tree: &MergedTree, 347) -> Result<BString, BackendError> { 348 let file_value = tree.path_value(path)?; 349 let effective_file_value = materialize_tree_value(store, path, file_value).block_on()?; 350 match effective_file_value { 351 MaterializedTreeValue::File { mut reader, id, .. } => { 352 let mut file_contents = Vec::new(); 353 reader 354 .read_to_end(&mut file_contents) 355 .map_err(|e| BackendError::ReadFile { 356 path: path.to_owned(), 357 id, 358 source: Box::new(e), 359 })?; 360 Ok(file_contents.into()) 361 } 362 MaterializedTreeValue::FileConflict { contents, .. } => Ok( 363 materialize_merge_result_to_bytes(&contents, ConflictMarkerStyle::default()), 364 ), 365 _ => Ok(BString::default()), 366 } 367} 368 369#[cfg(test)] 370mod tests { 371 use super::*; 372 373 #[test] 374 fn test_lines_iterator_empty() { 375 let annotation = FileAnnotation { 376 line_map: vec![], 377 text: "".into(), 378 }; 379 assert_eq!(annotation.lines().collect_vec(), vec![]); 380 assert_eq!(annotation.line_ranges().collect_vec(), vec![]); 381 assert_eq!(annotation.compact_line_ranges().collect_vec(), vec![]); 382 } 383 384 #[test] 385 fn test_lines_iterator_with_content() { 386 let commit_id1 = CommitId::from_hex("111111"); 387 let commit_id2 = CommitId::from_hex("222222"); 388 let commit_id3 = CommitId::from_hex("333333"); 389 let annotation = FileAnnotation { 390 line_map: vec![ 391 Some(commit_id1.clone()), 392 Some(commit_id2.clone()), 393 Some(commit_id3.clone()), 394 ], 395 text: "foo\n\nbar\n".into(), 396 }; 397 assert_eq!( 398 annotation.lines().collect_vec(), 399 vec![ 400 (Some(&commit_id1), "foo\n".as_ref()), 401 (Some(&commit_id2), "\n".as_ref()), 402 (Some(&commit_id3), "bar\n".as_ref()), 403 ] 404 ); 405 assert_eq!( 406 annotation.line_ranges().collect_vec(), 407 vec![ 408 (Some(&commit_id1), 0..4), 409 (Some(&commit_id2), 4..5), 410 (Some(&commit_id3), 5..9), 411 ] 412 ); 413 assert_eq!( 414 annotation.compact_line_ranges().collect_vec(), 415 vec![ 416 (Some(&commit_id1), 0..4), 417 (Some(&commit_id2), 4..5), 418 (Some(&commit_id3), 5..9), 419 ] 420 ); 421 } 422 423 #[test] 424 fn test_lines_iterator_compaction() { 425 let commit_id1 = CommitId::from_hex("111111"); 426 let commit_id2 = CommitId::from_hex("222222"); 427 let commit_id3 = CommitId::from_hex("333333"); 428 let annotation = FileAnnotation { 429 line_map: vec![ 430 Some(commit_id1.clone()), 431 Some(commit_id1.clone()), 432 Some(commit_id2.clone()), 433 Some(commit_id1.clone()), 434 Some(commit_id3.clone()), 435 Some(commit_id3.clone()), 436 Some(commit_id3.clone()), 437 ], 438 text: "\n".repeat(7).into(), 439 }; 440 assert_eq!( 441 annotation.compact_line_ranges().collect_vec(), 442 vec![ 443 (Some(&commit_id1), 0..2), 444 (Some(&commit_id2), 2..3), 445 (Some(&commit_id1), 3..4), 446 (Some(&commit_id3), 4..7), 447 ] 448 ); 449 } 450}