just playing with tangled
at ig/vimdiffwarn 323 lines 13 kB view raw
1// Copyright 2024 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15//! API for transforming file content, for example to apply formatting, and 16//! propagate those changes across revisions. 17 18use std::collections::HashMap; 19use std::collections::HashSet; 20use std::sync::mpsc::channel; 21 22use futures::StreamExt as _; 23use itertools::Itertools as _; 24use jj_lib::backend::BackendError; 25use jj_lib::backend::CommitId; 26use jj_lib::backend::FileId; 27use jj_lib::backend::TreeValue; 28use jj_lib::matchers::Matcher; 29use jj_lib::merged_tree::MergedTree; 30use jj_lib::merged_tree::MergedTreeBuilder; 31use jj_lib::merged_tree::TreeDiffEntry; 32use jj_lib::repo::MutableRepo; 33use jj_lib::repo::Repo as _; 34use jj_lib::repo_path::RepoPathBuf; 35use jj_lib::revset::RevsetExpression; 36use jj_lib::revset::RevsetIteratorExt as _; 37use jj_lib::store::Store; 38use jj_lib::tree::Tree; 39use pollster::FutureExt as _; 40use rayon::iter::IntoParallelIterator as _; 41use rayon::prelude::ParallelIterator as _; 42 43use crate::revset::RevsetEvaluationError; 44 45/// Represents a file whose content may be transformed by a FileFixer. 46// TODO: Add the set of changed line/byte ranges, so those can be passed into code formatters via 47// flags. This will help avoid introducing unrelated changes when working on code with out of date 48// formatting. 49#[derive(Debug, PartialEq, Eq, Hash, Clone)] 50pub struct FileToFix { 51 /// Unique identifier for the file content. 52 pub file_id: FileId, 53 54 /// The path is provided to allow the FileFixer to potentially: 55 /// - Choose different behaviors for different file names, extensions, etc. 56 /// - Update parts of the file's content that should be derived from the 57 /// file's path. 58 pub repo_path: RepoPathBuf, 59} 60 61/// Error fixing files. 62#[derive(Debug, thiserror::Error)] 63pub enum FixError { 64 /// Error while contacting the Backend. 65 #[error(transparent)] 66 Backend(#[from] BackendError), 67 /// Error resolving commit ancestry. 68 #[error(transparent)] 69 RevsetEvaluation(#[from] RevsetEvaluationError), 70 /// Error occurred while reading/writing file content. 71 #[error(transparent)] 72 IO(#[from] std::io::Error), 73 /// Error occurred while processing the file content. 74 #[error(transparent)] 75 FixContent(Box<dyn std::error::Error + Send + Sync>), 76} 77 78/// Fixes a set of files. 79/// 80/// Fixing a file is implementation dependent. For example it may format source 81/// code using a code formatter. 82pub trait FileFixer { 83 /// Fixes a set of files. Stores the resulting file content (for modified 84 /// files). 85 /// 86 /// Returns a map describing the subset of `files_to_fix` that resulted in 87 /// changed file content (unchanged files should not be present in the map), 88 /// pointing to the new FileId for the file. 89 /// 90 /// TODO: Better error handling so we can tell the user what went wrong with 91 /// each failed input. 92 fn fix_files<'a>( 93 &self, 94 store: &Store, 95 files_to_fix: &'a HashSet<FileToFix>, 96 ) -> Result<HashMap<&'a FileToFix, FileId>, FixError>; 97} 98 99/// Aggregate information about the outcome of the file fixer. 100#[derive(Debug, Default)] 101pub struct FixSummary { 102 /// The commits that were rewritten. Maps old commit id to new commit id. 103 pub rewrites: HashMap<CommitId, CommitId>, 104 105 /// The number of commits that had files that were passed to the file fixer. 106 pub num_checked_commits: i32, 107 /// The number of new commits created due to file content changed by the 108 /// fixer. 109 pub num_fixed_commits: i32, 110} 111 112/// A [FileFixer] that applies fix_fn to each file, in parallel. 113/// 114/// The implementation is currently based on [rayon]. 115// TODO: Consider switching to futures, or document the decision not to. We 116// don't need threads unless the threads will be doing more than waiting for 117// pipes. 118pub struct ParallelFileFixer<T> { 119 fix_fn: T, 120} 121 122impl<T> ParallelFileFixer<T> 123where 124 T: Fn(&Store, &FileToFix) -> Result<Option<FileId>, FixError> + Sync + Send, 125{ 126 /// Creates a ParallelFileFixer. 127 pub fn new(fix_fn: T) -> Self { 128 Self { fix_fn } 129 } 130} 131 132impl<T> FileFixer for ParallelFileFixer<T> 133where 134 T: Fn(&Store, &FileToFix) -> Result<Option<FileId>, FixError> + Sync + Send, 135{ 136 /// Applies `fix_fn()` to the inputs and stores the resulting file content. 137 fn fix_files<'a>( 138 &self, 139 store: &Store, 140 files_to_fix: &'a HashSet<FileToFix>, 141 ) -> Result<HashMap<&'a FileToFix, FileId>, FixError> { 142 let (updates_tx, updates_rx) = channel(); 143 files_to_fix.into_par_iter().try_for_each_init( 144 || updates_tx.clone(), 145 |updates_tx, file_to_fix| -> Result<(), FixError> { 146 let result = (self.fix_fn)(store, file_to_fix)?; 147 match result { 148 Some(new_file_id) => { 149 updates_tx.send((file_to_fix, new_file_id)).unwrap(); 150 Ok(()) 151 } 152 None => Ok(()), 153 } 154 }, 155 )?; 156 drop(updates_tx); 157 let mut result = HashMap::new(); 158 while let Ok((file_to_fix, new_file_id)) = updates_rx.recv() { 159 result.insert(file_to_fix, new_file_id); 160 } 161 Ok(result) 162 } 163} 164 165/// Updates files with formatting fixes or other changes, using the given 166/// FileFixer. 167/// 168/// The primary use case is to apply the results of automatic code formatting 169/// tools to revisions that may not be properly formatted yet. It can also be 170/// used to modify files with other tools like `sed` or `sort`. 171/// 172/// After the FileFixer is done, descendants are also updated, which ensures 173/// that the fixes are not lost. This will never result in new conflicts. Files 174/// with existing conflicts are updated on all sides of the conflict, which 175/// can potentially increase or decrease the number of conflict markers. 176pub fn fix_files( 177 root_commits: Vec<CommitId>, 178 matcher: &dyn Matcher, 179 include_unchanged_files: bool, 180 repo_mut: &mut MutableRepo, 181 file_fixer: &impl FileFixer, 182) -> Result<FixSummary, FixError> { 183 let mut summary = FixSummary::default(); 184 185 // Collect all of the unique `FileToFix`s we're going to use. file_fixer should 186 // be deterministic, and should not consider outside information, so it is 187 // safe to deduplicate inputs that correspond to multiple files or commits. 188 // This is typically more efficient, but it does prevent certain use cases 189 // like providing commit IDs as inputs to be inserted into files. We also 190 // need to record the mapping between files-to-fix and paths/commits, to 191 // efficiently rewrite the commits later. 192 // 193 // If a path is being fixed in a particular commit, it must also be fixed in all 194 // that commit's descendants. We do this as a way of propagating changes, 195 // under the assumption that it is more useful than performing a rebase and 196 // risking merge conflicts. In the case of code formatters, rebasing wouldn't 197 // reliably produce well formatted code anyway. Deduplicating inputs helps 198 // to prevent quadratic growth in the number of tool executions required for 199 // doing this in long chains of commits with disjoint sets of modified files. 200 let commits: Vec<_> = RevsetExpression::commits(root_commits.clone()) 201 .descendants() 202 .evaluate(repo_mut)? 203 .iter() 204 .commits(repo_mut.store()) 205 .try_collect()?; 206 tracing::debug!( 207 ?root_commits, 208 ?commits, 209 "looking for files to fix in commits:" 210 ); 211 212 let mut unique_files_to_fix: HashSet<FileToFix> = HashSet::new(); 213 let mut commit_paths: HashMap<CommitId, HashSet<RepoPathBuf>> = HashMap::new(); 214 for commit in commits.iter().rev() { 215 let mut paths: HashSet<RepoPathBuf> = HashSet::new(); 216 217 // If --include-unchanged-files, we always fix every matching file in the tree. 218 // Otherwise, we fix the matching changed files in this commit, plus any that 219 // were fixed in ancestors, so we don't lose those changes. We do this 220 // instead of rebasing onto those changes, to avoid merge conflicts. 221 let parent_tree = if include_unchanged_files { 222 MergedTree::resolved(Tree::empty(repo_mut.store().clone(), RepoPathBuf::root())) 223 } else { 224 for parent_id in commit.parent_ids() { 225 if let Some(parent_paths) = commit_paths.get(parent_id) { 226 paths.extend(parent_paths.iter().cloned()); 227 } 228 } 229 commit.parent_tree(repo_mut)? 230 }; 231 // TODO: handle copy tracking 232 let mut diff_stream = parent_tree.diff_stream(&commit.tree()?, &matcher); 233 async { 234 while let Some(TreeDiffEntry { 235 path: repo_path, 236 values, 237 }) = diff_stream.next().await 238 { 239 let (_before, after) = values?; 240 // Deleted files have no file content to fix, and they have no terms in `after`, 241 // so we don't add any files-to-fix for them. Conflicted files produce one 242 // file-to-fix for each side of the conflict. 243 for term in after.into_iter().flatten() { 244 // We currently only support fixing the content of normal files, so we skip 245 // directories and symlinks, and we ignore the executable bit. 246 if let TreeValue::File { id, executable: _ } = term { 247 // TODO: Skip the file if its content is larger than some configured size, 248 // preferably without actually reading it yet. 249 let file_to_fix = FileToFix { 250 file_id: id.clone(), 251 repo_path: repo_path.clone(), 252 }; 253 unique_files_to_fix.insert(file_to_fix.clone()); 254 paths.insert(repo_path.clone()); 255 } 256 } 257 } 258 Ok::<(), BackendError>(()) 259 } 260 .block_on()?; 261 262 commit_paths.insert(commit.id().clone(), paths); 263 } 264 265 tracing::debug!( 266 ?include_unchanged_files, 267 ?unique_files_to_fix, 268 "invoking file fixer on these files:" 269 ); 270 271 // Fix all of the chosen inputs. 272 let fixed_file_ids = file_fixer.fix_files(repo_mut.store().as_ref(), &unique_files_to_fix)?; 273 tracing::debug!(?fixed_file_ids, "file fixer fixed these files:"); 274 275 // Substitute the fixed file IDs into all of the affected commits. Currently, 276 // fixes cannot delete or rename files, change the executable bit, or modify 277 // other parts of the commit like the description. 278 repo_mut.transform_descendants(root_commits, |mut rewriter| { 279 // TODO: Build the trees in parallel before `transform_descendants()` and only 280 // keep the tree IDs in memory, so we can pass them to the rewriter. 281 let old_commit_id = rewriter.old_commit().id().clone(); 282 let repo_paths = commit_paths.get(&old_commit_id).unwrap(); 283 let old_tree = rewriter.old_commit().tree()?; 284 let mut tree_builder = MergedTreeBuilder::new(old_tree.id().clone()); 285 let mut changes = 0; 286 for repo_path in repo_paths { 287 let old_value = old_tree.path_value(repo_path)?; 288 let new_value = old_value.map(|old_term| { 289 if let Some(TreeValue::File { id, executable }) = old_term { 290 let file_to_fix = FileToFix { 291 file_id: id.clone(), 292 repo_path: repo_path.clone(), 293 }; 294 if let Some(new_id) = fixed_file_ids.get(&file_to_fix) { 295 return Some(TreeValue::File { 296 id: new_id.clone(), 297 executable: *executable, 298 }); 299 } 300 } 301 old_term.clone() 302 }); 303 if new_value != old_value { 304 tree_builder.set_or_remove(repo_path.clone(), new_value); 305 changes += 1; 306 } 307 } 308 summary.num_checked_commits += 1; 309 if changes > 0 { 310 summary.num_fixed_commits += 1; 311 let new_tree = tree_builder.write_tree(rewriter.mut_repo().store())?; 312 let builder = rewriter.reparent(); 313 let new_commit = builder.set_tree_id(new_tree).write()?; 314 summary 315 .rewrites 316 .insert(old_commit_id, new_commit.id().clone()); 317 } 318 Ok(()) 319 })?; 320 321 tracing::debug!(?summary); 322 Ok(summary) 323}