just playing with tangled
at globpattern 456 lines 16 kB view raw
1// Copyright 2021 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#![allow(missing_docs)] 16 17use std::fs; 18use std::io; 19use std::iter; 20use std::path::Path; 21use std::path::PathBuf; 22use std::sync::Arc; 23 24use ignore::gitignore; 25use thiserror::Error; 26 27#[derive(Debug, Error)] 28pub enum GitIgnoreError { 29 #[error("Failed to read ignore patterns from file {path}")] 30 ReadFile { path: PathBuf, source: io::Error }, 31 #[error("Invalid UTF-8 for ignore pattern in {path} on line #{line_num_for_display}: {line}")] 32 InvalidUtf8 { 33 path: PathBuf, 34 line_num_for_display: usize, 35 line: String, 36 source: std::str::Utf8Error, 37 }, 38 #[error("Failed to parse ignore patterns from file {path}")] 39 Underlying { 40 path: PathBuf, 41 source: ignore::Error, 42 }, 43} 44 45/// Models the effective contents of multiple .gitignore files. 46#[derive(Debug)] 47pub struct GitIgnoreFile { 48 parent: Option<Arc<GitIgnoreFile>>, 49 matcher: gitignore::Gitignore, 50} 51 52impl GitIgnoreFile { 53 pub fn empty() -> Arc<GitIgnoreFile> { 54 Arc::new(GitIgnoreFile { 55 parent: None, 56 matcher: gitignore::Gitignore::empty(), 57 }) 58 } 59 60 /// Concatenates new `.gitignore` content at the `prefix` directory. 61 /// 62 /// The `prefix` should be a slash-separated path relative to the workspace 63 /// root. 64 pub fn chain( 65 self: &Arc<GitIgnoreFile>, 66 prefix: &str, 67 ignore_path: &Path, 68 input: &[u8], 69 ) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> { 70 let mut builder = gitignore::GitignoreBuilder::new(prefix); 71 for (i, input_line) in input.split(|b| *b == b'\n').enumerate() { 72 let line = 73 std::str::from_utf8(input_line).map_err(|err| GitIgnoreError::InvalidUtf8 { 74 path: ignore_path.to_path_buf(), 75 line_num_for_display: i + 1, 76 line: String::from_utf8_lossy(input_line).to_string(), 77 source: err, 78 })?; 79 // The `from` argument doesn't provide any diagnostics or correctness, so it is 80 // not required. It only allows retrieving the path from the `Glob` later, which 81 // we never do. 82 builder 83 .add_line(None, line) 84 .map_err(|err| GitIgnoreError::Underlying { 85 path: ignore_path.to_path_buf(), 86 source: err, 87 })?; 88 } 89 let matcher = builder.build().map_err(|err| GitIgnoreError::Underlying { 90 path: ignore_path.to_path_buf(), 91 source: err, 92 })?; 93 let parent = if self.matcher.is_empty() { 94 self.parent.clone() // omit the empty root 95 } else { 96 Some(self.clone()) 97 }; 98 Ok(Arc::new(GitIgnoreFile { parent, matcher })) 99 } 100 101 /// Concatenates new `.gitignore` file at the `prefix` directory. 102 /// 103 /// The `prefix` should be a slash-separated path relative to the workspace 104 /// root. 105 pub fn chain_with_file( 106 self: &Arc<GitIgnoreFile>, 107 prefix: &str, 108 file: PathBuf, 109 ) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> { 110 if file.is_file() { 111 let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile { 112 path: file.clone(), 113 source: err, 114 })?; 115 self.chain(prefix, &file, &buf) 116 } else { 117 Ok(self.clone()) 118 } 119 } 120 121 fn matches_helper(&self, path: &str, is_dir: bool) -> bool { 122 iter::successors(Some(self), |file| file.parent.as_deref()) 123 .find_map(|file| { 124 // TODO: the documentation warns that 125 // `matched_path_or_any_parents` is slower than `matched`; 126 // ideally, we would switch to that. 127 match file.matcher.matched_path_or_any_parents(path, is_dir) { 128 ignore::Match::None => None, 129 ignore::Match::Ignore(_) => Some(true), 130 ignore::Match::Whitelist(_) => Some(false), 131 } 132 }) 133 .unwrap_or_default() 134 } 135 136 /// Returns whether specified path (not just file!) should be ignored. This 137 /// method does not directly define which files should not be tracked in 138 /// the repository. Instead, it performs a simple matching against the 139 /// last applicable .gitignore line. The effective set of paths 140 /// ignored in the repository should take into account that all (untracked) 141 /// files within a ignored directory should be ignored unconditionally. 142 /// The code in this file does not take that into account. 143 pub fn matches(&self, path: &str) -> bool { 144 //If path ends with slash, consider it as a directory. 145 let (path, is_dir) = match path.strip_suffix('/') { 146 Some(path) => (path, true), 147 None => (path, false), 148 }; 149 self.matches_helper(path, is_dir) 150 } 151} 152 153#[cfg(test)] 154mod tests { 155 156 use super::*; 157 158 fn matches(input: &[u8], path: &str) -> bool { 159 let file = GitIgnoreFile::empty() 160 .chain("", Path::new(""), input) 161 .unwrap(); 162 file.matches(path) 163 } 164 165 #[test] 166 fn test_gitignore_empty_file() { 167 let file = GitIgnoreFile::empty(); 168 assert!(!file.matches("foo")); 169 } 170 171 #[test] 172 fn test_gitignore_empty_file_with_prefix() { 173 let file = GitIgnoreFile::empty() 174 .chain("dir/", Path::new(""), b"") 175 .unwrap(); 176 assert!(!file.matches("dir/foo")); 177 } 178 179 #[test] 180 fn test_gitignore_literal() { 181 let file = GitIgnoreFile::empty() 182 .chain("", Path::new(""), b"foo\n") 183 .unwrap(); 184 assert!(file.matches("foo")); 185 assert!(file.matches("dir/foo")); 186 assert!(file.matches("dir/subdir/foo")); 187 assert!(!file.matches("food")); 188 assert!(!file.matches("dir/food")); 189 } 190 191 #[test] 192 fn test_gitignore_literal_with_prefix() { 193 let file = GitIgnoreFile::empty() 194 .chain("./dir/", Path::new(""), b"foo\n") 195 .unwrap(); 196 assert!(file.matches("dir/foo")); 197 assert!(file.matches("dir/subdir/foo")); 198 } 199 200 #[test] 201 fn test_gitignore_pattern_same_as_prefix() { 202 let file = GitIgnoreFile::empty() 203 .chain("dir/", Path::new(""), b"dir\n") 204 .unwrap(); 205 assert!(file.matches("dir/dir")); 206 // We don't want the "dir" pattern to apply to the parent directory 207 assert!(!file.matches("dir/foo")); 208 } 209 210 #[test] 211 fn test_gitignore_rooted_literal() { 212 let file = GitIgnoreFile::empty() 213 .chain("", Path::new(""), b"/foo\n") 214 .unwrap(); 215 assert!(file.matches("foo")); 216 assert!(!file.matches("dir/foo")); 217 } 218 219 #[test] 220 fn test_gitignore_rooted_literal_with_prefix() { 221 let file = GitIgnoreFile::empty() 222 .chain("dir/", Path::new(""), b"/foo\n") 223 .unwrap(); 224 assert!(file.matches("dir/foo")); 225 assert!(!file.matches("dir/subdir/foo")); 226 } 227 228 #[test] 229 fn test_gitignore_deep_dir() { 230 let file = GitIgnoreFile::empty() 231 .chain("", Path::new(""), b"/dir1/dir2/dir3\n") 232 .unwrap(); 233 assert!(!file.matches("foo")); 234 assert!(!file.matches("dir1/foo")); 235 assert!(!file.matches("dir1/dir2/foo")); 236 assert!(file.matches("dir1/dir2/dir3/foo")); 237 assert!(file.matches("dir1/dir2/dir3/dir4/foo")); 238 } 239 240 #[test] 241 fn test_gitignore_deep_dir_chained() { 242 // Prefix is relative to root, not to parent file 243 let file = GitIgnoreFile::empty() 244 .chain("", Path::new(""), b"/dummy\n") 245 .unwrap() 246 .chain("dir1/", Path::new(""), b"/dummy\n") 247 .unwrap() 248 .chain("dir1/dir2/", Path::new(""), b"/dir3\n") 249 .unwrap(); 250 assert!(!file.matches("foo")); 251 assert!(!file.matches("dir1/foo")); 252 assert!(!file.matches("dir1/dir2/foo")); 253 assert!(file.matches("dir1/dir2/dir3/foo")); 254 assert!(file.matches("dir1/dir2/dir3/dir4/foo")); 255 } 256 257 #[test] 258 fn test_gitignore_match_only_dir() { 259 let file = GitIgnoreFile::empty() 260 .chain("", Path::new(""), b"/dir/\n") 261 .unwrap(); 262 assert!(!file.matches("dir")); 263 assert!(file.matches("dir/foo")); 264 assert!(file.matches("dir/subdir/foo")); 265 } 266 267 #[test] 268 fn test_gitignore_unusual_symbols() { 269 assert!(matches(b"\\*\n", "*")); 270 assert!(!matches(b"\\*\n", "foo")); 271 assert!(matches(b"\\!\n", "!")); 272 assert!(matches(b"\\?\n", "?")); 273 assert!(!matches(b"\\?\n", "x")); 274 assert!(matches(b"\\w\n", "w")); 275 assert!(GitIgnoreFile::empty() 276 .chain("", Path::new(""), b"\\\n") 277 .is_err()); 278 } 279 280 #[test] 281 #[cfg(not(target_os = "windows"))] 282 fn test_gitignore_backslash_path() { 283 assert!(!matches(b"/foo/bar", "/foo\\bar")); 284 assert!(!matches(b"/foo/bar", "/foo/bar\\")); 285 286 assert!(!matches(b"/foo/bar/", "/foo\\bar/")); 287 assert!(!matches(b"/foo/bar/", "/foo\\bar\\/")); 288 289 // Invalid escapes are treated like literal backslashes 290 assert!(!matches(b"\\w\n", "\\w")); 291 assert!(matches(b"\\\\ \n", "\\ ")); 292 assert!(matches(b"\\\\\\ \n", "\\ ")); 293 } 294 295 #[test] 296 #[cfg(target_os = "windows")] 297 /// ignore crate consider backslashes as a directory divider only on 298 /// Windows. 299 fn test_gitignore_backslash_path() { 300 assert!(matches(b"/foo/bar", "/foo\\bar")); 301 assert!(matches(b"/foo/bar", "/foo/bar\\")); 302 303 assert!(matches(b"/foo/bar/", "/foo\\bar/")); 304 assert!(matches(b"/foo/bar/", "/foo\\bar\\/")); 305 306 assert!(matches(b"\\w\n", "\\w")); 307 assert!(!matches(b"\\\\ \n", "\\ ")); 308 assert!(!matches(b"\\\\\\ \n", "\\ ")); 309 } 310 311 #[test] 312 fn test_gitignore_whitespace() { 313 assert!(!matches(b" \n", " ")); 314 assert!(matches(b"\\ \n", " ")); 315 assert!(!matches(b"\\\\ \n", " ")); 316 assert!(matches(b" a\n", " a")); 317 assert!(matches(b"a b\n", "a b")); 318 assert!(matches(b"a b \n", "a b")); 319 assert!(!matches(b"a b \n", "a b ")); 320 assert!(matches(b"a b\\ \\ \n", "a b ")); 321 // Trail CRs at EOL is ignored 322 assert!(matches(b"a\r\n", "a")); 323 assert!(!matches(b"a\r\n", "a\r")); 324 assert!(!matches(b"a\r\r\n", "a\r")); 325 assert!(matches(b"a\r\r\n", "a")); 326 assert!(!matches(b"a\r\r\n", "a\r\r")); 327 assert!(matches(b"a\r\r\n", "a")); 328 assert!(matches(b"\ra\n", "\ra")); 329 assert!(!matches(b"\ra\n", "a")); 330 assert!(GitIgnoreFile::empty() 331 .chain("", Path::new(""), b"a b \\ \n") 332 .is_err()); 333 } 334 335 #[test] 336 fn test_gitignore_glob() { 337 assert!(!matches(b"*.o\n", "foo")); 338 assert!(matches(b"*.o\n", "foo.o")); 339 assert!(!matches(b"foo.?\n", "foo")); 340 assert!(!matches(b"foo.?\n", "foo.")); 341 assert!(matches(b"foo.?\n", "foo.o")); 342 } 343 344 #[test] 345 fn test_gitignore_range() { 346 assert!(!matches(b"foo.[az]\n", "foo")); 347 assert!(matches(b"foo.[az]\n", "foo.a")); 348 assert!(!matches(b"foo.[az]\n", "foo.g")); 349 assert!(matches(b"foo.[az]\n", "foo.z")); 350 assert!(!matches(b"foo.[a-z]\n", "foo")); 351 assert!(matches(b"foo.[a-z]\n", "foo.a")); 352 assert!(matches(b"foo.[a-z]\n", "foo.g")); 353 assert!(matches(b"foo.[a-z]\n", "foo.z")); 354 assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.5")); 355 assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.c")); 356 assert!(matches(b"foo.[0-9a-fA-F]\n", "foo.E")); 357 assert!(!matches(b"foo.[0-9a-fA-F]\n", "foo._")); 358 } 359 360 #[test] 361 fn test_gitignore_leading_dir_glob() { 362 assert!(matches(b"**/foo\n", "foo")); 363 assert!(matches(b"**/foo\n", "dir1/dir2/foo")); 364 assert!(matches(b"**/foo\n", "foo/file")); 365 assert!(matches(b"**/dir/foo\n", "dir/foo")); 366 assert!(matches(b"**/dir/foo\n", "dir1/dir2/dir/foo")); 367 } 368 369 #[test] 370 fn test_gitignore_leading_dir_glob_with_prefix() { 371 let file = GitIgnoreFile::empty() 372 .chain("dir1/dir2/", Path::new(""), b"**/foo\n") 373 .unwrap(); 374 assert!(file.matches("dir1/dir2/foo")); 375 assert!(!file.matches("dir1/dir2/bar")); 376 assert!(file.matches("dir1/dir2/sub1/sub2/foo")); 377 assert!(!file.matches("dir1/dir2/sub1/sub2/bar")); 378 } 379 380 #[test] 381 fn test_gitignore_trailing_dir_glob() { 382 assert!(!matches(b"abc/**\n", "abc")); 383 assert!(matches(b"abc/**\n", "abc/file")); 384 assert!(matches(b"abc/**\n", "abc/dir/file")); 385 } 386 387 #[test] 388 fn test_gitignore_internal_dir_glob() { 389 assert!(matches(b"a/**/b\n", "a/b")); 390 assert!(matches(b"a/**/b\n", "a/x/b")); 391 assert!(matches(b"a/**/b\n", "a/x/y/b")); 392 assert!(!matches(b"a/**/b\n", "ax/y/b")); 393 assert!(!matches(b"a/**/b\n", "a/x/yb")); 394 assert!(!matches(b"a/**/b\n", "ab")); 395 } 396 397 #[test] 398 fn test_gitignore_internal_dir_glob_not_really() { 399 assert!(!matches(b"a/x**y/b\n", "a/b")); 400 assert!(matches(b"a/x**y/b\n", "a/xy/b")); 401 assert!(matches(b"a/x**y/b\n", "a/xzzzy/b")); 402 } 403 404 #[test] 405 fn test_gitignore_line_ordering() { 406 assert!(matches(b"foo\n!foo/bar\n", "foo")); 407 assert!(!matches(b"foo\n!foo/bar\n", "foo/bar")); 408 assert!(matches(b"foo\n!foo/bar\n", "foo/baz")); 409 assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo")); 410 assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar")); 411 assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz")); 412 assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux")); 413 assert!(!matches(b"foo/*\n!foo/bar", "foo/bar")); 414 } 415 416 #[test] 417 fn test_gitignore_file_ordering() { 418 let file1 = GitIgnoreFile::empty() 419 .chain("", Path::new(""), b"/foo\n") 420 .unwrap(); 421 let file2 = file1.chain("foo/", Path::new(""), b"!/bar").unwrap(); 422 let file3 = file2.chain("foo/bar/", Path::new(""), b"/baz").unwrap(); 423 assert!(file1.matches("foo")); 424 assert!(file1.matches("foo/bar")); 425 assert!(!file2.matches("foo/bar")); 426 assert!(!file2.matches("foo/bar/baz")); 427 assert!(file2.matches("foo/baz")); 428 assert!(file3.matches("foo/bar/baz")); 429 assert!(!file3.matches("foo/bar/qux")); 430 } 431 432 #[test] 433 fn test_gitignore_negative_parent_directory() { 434 // The following script shows that Git ignores the file: 435 // 436 // ```bash 437 // $ rm -rf test-repo && \ 438 // git init test-repo &>/dev/null && \ 439 // cd test-repo && \ 440 // printf 'A/B.*\n!/A/\n' >.gitignore && \ 441 // mkdir A && \ 442 // touch A/B.ext && \ 443 // git check-ignore A/B.ext 444 // A/B.ext 445 // ``` 446 let ignore = GitIgnoreFile::empty() 447 .chain("", Path::new(""), b"foo/bar.*\n!/foo/\n") 448 .unwrap(); 449 assert!(ignore.matches("foo/bar.ext")); 450 451 let ignore = GitIgnoreFile::empty() 452 .chain("", Path::new(""), b"!/foo/\nfoo/bar.*\n") 453 .unwrap(); 454 assert!(ignore.matches("foo/bar.ext")); 455 } 456}