Git fork
at reftables-rust 2018 lines 51 kB view raw
1#define DISABLE_SIGN_COMPARE_WARNINGS 2 3#include "git-compat-util.h" 4#include "config.h" 5#include "gettext.h" 6#include "grep.h" 7#include "hex.h" 8#include "odb.h" 9#include "pretty.h" 10#include "userdiff.h" 11#include "xdiff-interface.h" 12#include "diff.h" 13#include "diffcore.h" 14#include "quote.h" 15#include "help.h" 16 17static int grep_source_load(struct grep_source *gs); 18static int grep_source_is_binary(struct grep_source *gs, 19 struct index_state *istate); 20 21static void std_output(struct grep_opt *opt UNUSED, const void *buf, size_t size) 22{ 23 fwrite(buf, size, 1, stdout); 24} 25 26static const char *color_grep_slots[] = { 27 [GREP_COLOR_CONTEXT] = "context", 28 [GREP_COLOR_FILENAME] = "filename", 29 [GREP_COLOR_FUNCTION] = "function", 30 [GREP_COLOR_LINENO] = "lineNumber", 31 [GREP_COLOR_COLUMNNO] = "column", 32 [GREP_COLOR_MATCH_CONTEXT] = "matchContext", 33 [GREP_COLOR_MATCH_SELECTED] = "matchSelected", 34 [GREP_COLOR_SELECTED] = "selected", 35 [GREP_COLOR_SEP] = "separator", 36}; 37 38static int parse_pattern_type_arg(const char *opt, const char *arg) 39{ 40 if (!strcmp(arg, "default")) 41 return GREP_PATTERN_TYPE_UNSPECIFIED; 42 else if (!strcmp(arg, "basic")) 43 return GREP_PATTERN_TYPE_BRE; 44 else if (!strcmp(arg, "extended")) 45 return GREP_PATTERN_TYPE_ERE; 46 else if (!strcmp(arg, "fixed")) 47 return GREP_PATTERN_TYPE_FIXED; 48 else if (!strcmp(arg, "perl")) 49 return GREP_PATTERN_TYPE_PCRE; 50 die("bad %s argument: %s", opt, arg); 51} 52 53define_list_config_array_extra(color_grep_slots, {"match"}); 54 55/* 56 * Read the configuration file once and store it in 57 * the grep_defaults template. 58 */ 59int grep_config(const char *var, const char *value, 60 const struct config_context *ctx, void *cb) 61{ 62 struct grep_opt *opt = cb; 63 const char *slot; 64 65 if (userdiff_config(var, value) < 0) 66 return -1; 67 68 if (!strcmp(var, "grep.extendedregexp")) { 69 opt->extended_regexp_option = git_config_bool(var, value); 70 return 0; 71 } 72 73 if (!strcmp(var, "grep.patterntype")) { 74 opt->pattern_type_option = parse_pattern_type_arg(var, value); 75 return 0; 76 } 77 78 if (!strcmp(var, "grep.linenumber")) { 79 opt->linenum = git_config_bool(var, value); 80 return 0; 81 } 82 if (!strcmp(var, "grep.column")) { 83 opt->columnnum = git_config_bool(var, value); 84 return 0; 85 } 86 87 if (!strcmp(var, "grep.fullname")) { 88 opt->relative = !git_config_bool(var, value); 89 return 0; 90 } 91 92 if (!strcmp(var, "color.grep")) 93 opt->color = git_config_colorbool(var, value); 94 if (!strcmp(var, "color.grep.match")) { 95 if (grep_config("color.grep.matchcontext", value, ctx, cb) < 0) 96 return -1; 97 if (grep_config("color.grep.matchselected", value, ctx, cb) < 0) 98 return -1; 99 } else if (skip_prefix(var, "color.grep.", &slot)) { 100 int i = LOOKUP_CONFIG(color_grep_slots, slot); 101 char *color; 102 103 if (i < 0) 104 return -1; 105 color = opt->colors[i]; 106 if (!value) 107 return config_error_nonbool(var); 108 return color_parse(value, color); 109 } 110 return 0; 111} 112 113void grep_init(struct grep_opt *opt, struct repository *repo) 114{ 115 struct grep_opt blank = GREP_OPT_INIT; 116 memcpy(opt, &blank, sizeof(*opt)); 117 118 opt->repo = repo; 119 opt->pattern_tail = &opt->pattern_list; 120 opt->header_tail = &opt->header_list; 121} 122 123static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, 124 const char *origin, int no, 125 enum grep_pat_token t, 126 enum grep_header_field field) 127{ 128 struct grep_pat *p = xcalloc(1, sizeof(*p)); 129 p->pattern = xmemdupz(pat, patlen); 130 p->patternlen = patlen; 131 p->origin = origin; 132 p->no = no; 133 p->token = t; 134 p->field = field; 135 return p; 136} 137 138static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p) 139{ 140 **tail = p; 141 *tail = &p->next; 142 p->next = NULL; 143 144 switch (p->token) { 145 case GREP_PATTERN: /* atom */ 146 case GREP_PATTERN_HEAD: 147 case GREP_PATTERN_BODY: 148 for (;;) { 149 struct grep_pat *new_pat; 150 size_t len = 0; 151 char *cp = p->pattern + p->patternlen, *nl = NULL; 152 while (++len <= p->patternlen) { 153 if (*(--cp) == '\n') { 154 nl = cp; 155 break; 156 } 157 } 158 if (!nl) 159 break; 160 new_pat = create_grep_pat(nl + 1, len - 1, p->origin, 161 p->no, p->token, p->field); 162 new_pat->next = p->next; 163 if (!p->next) 164 *tail = &new_pat->next; 165 p->next = new_pat; 166 *nl = '\0'; 167 p->patternlen -= len; 168 } 169 break; 170 default: 171 break; 172 } 173} 174 175void append_header_grep_pattern(struct grep_opt *opt, 176 enum grep_header_field field, const char *pat) 177{ 178 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0, 179 GREP_PATTERN_HEAD, field); 180 if (field == GREP_HEADER_REFLOG) 181 opt->use_reflog_filter = 1; 182 do_append_grep_pat(&opt->header_tail, p); 183} 184 185void append_grep_pattern(struct grep_opt *opt, const char *pat, 186 const char *origin, int no, enum grep_pat_token t) 187{ 188 append_grep_pat(opt, pat, strlen(pat), origin, no, t); 189} 190 191void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, 192 const char *origin, int no, enum grep_pat_token t) 193{ 194 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0); 195 do_append_grep_pat(&opt->pattern_tail, p); 196} 197 198struct grep_opt *grep_opt_dup(const struct grep_opt *opt) 199{ 200 struct grep_pat *pat; 201 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt)); 202 *ret = *opt; 203 204 ret->pattern_list = NULL; 205 ret->pattern_tail = &ret->pattern_list; 206 207 for(pat = opt->pattern_list; pat != NULL; pat = pat->next) 208 { 209 if(pat->token == GREP_PATTERN_HEAD) 210 append_header_grep_pattern(ret, pat->field, 211 pat->pattern); 212 else 213 append_grep_pat(ret, pat->pattern, pat->patternlen, 214 pat->origin, pat->no, pat->token); 215 } 216 217 return ret; 218} 219 220static NORETURN void compile_regexp_failed(const struct grep_pat *p, 221 const char *error) 222{ 223 char where[1024]; 224 225 if (p->no) 226 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no); 227 else if (p->origin) 228 xsnprintf(where, sizeof(where), "%s, ", p->origin); 229 else 230 where[0] = 0; 231 232 die("%s'%s': %s", where, p->pattern, error); 233} 234 235static int is_fixed(const char *s, size_t len) 236{ 237 size_t i; 238 239 for (i = 0; i < len; i++) { 240 if (is_regex_special(s[i])) 241 return 0; 242 } 243 244 return 1; 245} 246 247#ifdef USE_LIBPCRE2 248#define GREP_PCRE2_DEBUG_MALLOC 0 249 250static void *pcre2_malloc(PCRE2_SIZE size, void *memory_data UNUSED) 251{ 252 void *pointer = malloc(size); 253#if GREP_PCRE2_DEBUG_MALLOC 254 static int count = 1; 255 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size); 256#endif 257 return pointer; 258} 259 260static void pcre2_free(void *pointer, void *memory_data UNUSED) 261{ 262#if GREP_PCRE2_DEBUG_MALLOC 263 static int count = 1; 264 if (pointer) 265 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++); 266#endif 267 free(pointer); 268} 269 270static int pcre2_jit_functional(void) 271{ 272 static int jit_working = -1; 273 pcre2_code *code; 274 size_t off; 275 int err; 276 277 if (jit_working != -1) 278 return jit_working; 279 280 /* 281 * Try to JIT compile a simple pattern to probe if the JIT is 282 * working in general. It might fail for systems where creating 283 * memory mappings for runtime code generation is restricted. 284 */ 285 code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL); 286 if (!code) 287 return 0; 288 289 jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0; 290 pcre2_code_free(code); 291 292 return jit_working; 293} 294 295static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) 296{ 297 int error; 298 PCRE2_UCHAR errbuf[256]; 299 PCRE2_SIZE erroffset; 300 int options = PCRE2_MULTILINE; 301 int jitret; 302 int patinforet; 303 size_t jitsizearg; 304 int literal = !opt->ignore_case && (p->fixed || p->is_fixed); 305 306 /* 307 * Call pcre2_general_context_create() before calling any 308 * other pcre2_*(). It sets up our malloc()/free() functions 309 * with which everything else is allocated. 310 */ 311 p->pcre2_general_context = pcre2_general_context_create( 312 pcre2_malloc, pcre2_free, NULL); 313 if (!p->pcre2_general_context) 314 die("Couldn't allocate PCRE2 general context"); 315 316 if (opt->ignore_case) { 317 if (!opt->ignore_locale && has_non_ascii(p->pattern)) { 318 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context); 319 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context); 320 pcre2_set_character_tables(p->pcre2_compile_context, 321 p->pcre2_tables); 322 } 323 options |= PCRE2_CASELESS; 324 } 325 if (!opt->ignore_locale && is_utf8_locale() && !literal) 326 options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF); 327 328#ifndef GIT_PCRE2_VERSION_10_35_OR_HIGHER 329 /* 330 * Work around a JIT bug related to invalid Unicode character handling 331 * fixed in 10.35: 332 * https://github.com/PCRE2Project/pcre2/commit/c21bd977547d 333 */ 334 options &= ~PCRE2_UCP; 335#endif 336 337#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER 338 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */ 339 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) 340 options |= PCRE2_NO_START_OPTIMIZE; 341#endif 342 343 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, 344 p->patternlen, options, &error, &erroffset, 345 p->pcre2_compile_context); 346 347 if (p->pcre2_pattern) { 348 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context); 349 if (!p->pcre2_match_data) 350 die("Couldn't allocate PCRE2 match data"); 351 } else { 352 pcre2_get_error_message(error, errbuf, sizeof(errbuf)); 353 compile_regexp_failed(p, (const char *)&errbuf); 354 } 355 356 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); 357 if (p->pcre2_jit_on) { 358 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); 359 if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) { 360 /* 361 * Even though pcre2_config(PCRE2_CONFIG_JIT, ...) 362 * indicated JIT support, the library might still 363 * fail to generate JIT code for various reasons, 364 * e.g. when SELinux's 'deny_execmem' or PaX's 365 * MPROTECT prevent creating W|X memory mappings. 366 * 367 * Instead of faling hard, fall back to interpreter 368 * mode, just as if the pattern was prefixed with 369 * '(*NO_JIT)'. 370 */ 371 p->pcre2_jit_on = 0; 372 return; 373 } else if (jitret) { 374 int need_clip = p->patternlen > 64; 375 int clip_len = need_clip ? 64 : p->patternlen; 376 die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s", 377 clip_len, p->pattern, need_clip ? "..." : "", jitret, 378 pcre2_jit_functional() 379 ? "\nPerhaps prefix (*NO_JIT) to your pattern?" 380 : ""); 381 } 382 383 /* 384 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just 385 * tells us whether the library itself supports JIT, 386 * but to see whether we're going to be actually using 387 * JIT we need to extract PCRE2_INFO_JITSIZE from the 388 * pattern *after* we do pcre2_jit_compile() above. 389 * 390 * This is because if the pattern contains the 391 * (*NO_JIT) verb (see pcre2syntax(3)) 392 * pcre2_jit_compile() will exit early with 0. If we 393 * then proceed to call pcre2_jit_match() further down 394 * the line instead of pcre2_match() we'll either 395 * segfault (pre PCRE 10.31) or run into a fatal error 396 * (post PCRE2 10.31) 397 */ 398 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg); 399 if (patinforet) 400 BUG("pcre2_pattern_info() failed: %d", patinforet); 401 if (jitsizearg == 0) { 402 p->pcre2_jit_on = 0; 403 return; 404 } 405 } 406} 407 408static int pcre2match(struct grep_pat *p, const char *line, const char *eol, 409 regmatch_t *match, int eflags) 410{ 411 int ret, flags = 0; 412 PCRE2_SIZE *ovector; 413 PCRE2_UCHAR errbuf[256]; 414 415 if (eflags & REG_NOTBOL) 416 flags |= PCRE2_NOTBOL; 417 418 if (p->pcre2_jit_on) 419 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line, 420 eol - line, 0, flags, p->pcre2_match_data, 421 NULL); 422 else 423 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line, 424 eol - line, 0, flags, p->pcre2_match_data, 425 NULL); 426 427 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) { 428 pcre2_get_error_message(ret, errbuf, sizeof(errbuf)); 429 die("%s failed with error code %d: %s", 430 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret, 431 errbuf); 432 } 433 if (ret > 0) { 434 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data); 435 ret = 0; 436 match->rm_so = (int)ovector[0]; 437 match->rm_eo = (int)ovector[1]; 438 } 439 440 return ret; 441} 442 443static void free_pcre2_pattern(struct grep_pat *p) 444{ 445 pcre2_compile_context_free(p->pcre2_compile_context); 446 pcre2_code_free(p->pcre2_pattern); 447 pcre2_match_data_free(p->pcre2_match_data); 448#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER 449 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables); 450#else 451 free((void *)p->pcre2_tables); 452#endif 453 pcre2_general_context_free(p->pcre2_general_context); 454} 455#else /* !USE_LIBPCRE2 */ 456static void compile_pcre2_pattern(struct grep_pat *p UNUSED, 457 const struct grep_opt *opt UNUSED) 458{ 459 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); 460} 461 462static int pcre2match(struct grep_pat *p UNUSED, const char *line UNUSED, 463 const char *eol UNUSED, regmatch_t *match UNUSED, 464 int eflags UNUSED) 465{ 466 return 1; 467} 468 469static void free_pcre2_pattern(struct grep_pat *p UNUSED) 470{ 471} 472 473static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) 474{ 475 struct strbuf sb = STRBUF_INIT; 476 int err; 477 int regflags = 0; 478 479 basic_regex_quote_buf(&sb, p->pattern); 480 if (opt->ignore_case) 481 regflags |= REG_ICASE; 482 err = regcomp(&p->regexp, sb.buf, regflags); 483 strbuf_release(&sb); 484 if (err) { 485 char errbuf[1024]; 486 regerror(err, &p->regexp, errbuf, sizeof(errbuf)); 487 compile_regexp_failed(p, errbuf); 488 } 489} 490#endif /* !USE_LIBPCRE2 */ 491 492static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) 493{ 494 int err; 495 int regflags = REG_NEWLINE; 496 497 if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED) 498 opt->pattern_type_option = (opt->extended_regexp_option 499 ? GREP_PATTERN_TYPE_ERE 500 : GREP_PATTERN_TYPE_BRE); 501 502 p->word_regexp = opt->word_regexp; 503 p->ignore_case = opt->ignore_case; 504 p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED; 505 506 if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE && 507 memchr(p->pattern, 0, p->patternlen)) 508 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2")); 509 510 p->is_fixed = is_fixed(p->pattern, p->patternlen); 511#ifdef USE_LIBPCRE2 512 if (!p->fixed && !p->is_fixed) { 513 const char *no_jit = "(*NO_JIT)"; 514 const int no_jit_len = strlen(no_jit); 515 if (starts_with(p->pattern, no_jit) && 516 is_fixed(p->pattern + no_jit_len, 517 p->patternlen - no_jit_len)) 518 p->is_fixed = 1; 519 } 520#endif 521 if (p->fixed || p->is_fixed) { 522#ifdef USE_LIBPCRE2 523 if (p->is_fixed) { 524 compile_pcre2_pattern(p, opt); 525 } else { 526 /* 527 * E.g. t7811-grep-open.sh relies on the 528 * pattern being restored. 529 */ 530 char *old_pattern = p->pattern; 531 size_t old_patternlen = p->patternlen; 532 struct strbuf sb = STRBUF_INIT; 533 534 /* 535 * There is the PCRE2_LITERAL flag, but it's 536 * only in PCRE v2 10.30 and later. Needing to 537 * ifdef our way around that and dealing with 538 * it + PCRE2_MULTILINE being an error is more 539 * complex than just quoting this ourselves. 540 */ 541 strbuf_add(&sb, "\\Q", 2); 542 strbuf_add(&sb, p->pattern, p->patternlen); 543 strbuf_add(&sb, "\\E", 2); 544 545 p->pattern = sb.buf; 546 p->patternlen = sb.len; 547 compile_pcre2_pattern(p, opt); 548 p->pattern = old_pattern; 549 p->patternlen = old_patternlen; 550 strbuf_release(&sb); 551 } 552#else /* !USE_LIBPCRE2 */ 553 compile_fixed_regexp(p, opt); 554#endif /* !USE_LIBPCRE2 */ 555 return; 556 } 557 558 if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) { 559 compile_pcre2_pattern(p, opt); 560 return; 561 } 562 563 if (p->ignore_case) 564 regflags |= REG_ICASE; 565 if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE) 566 regflags |= REG_EXTENDED; 567 err = regcomp(&p->regexp, p->pattern, regflags); 568 if (err) { 569 char errbuf[1024]; 570 regerror(err, &p->regexp, errbuf, 1024); 571 compile_regexp_failed(p, errbuf); 572 } 573} 574 575static struct grep_expr *grep_not_expr(struct grep_expr *expr) 576{ 577 struct grep_expr *z = xcalloc(1, sizeof(*z)); 578 z->node = GREP_NODE_NOT; 579 z->u.unary = expr; 580 return z; 581} 582 583static struct grep_expr *grep_binexp(enum grep_expr_node kind, 584 struct grep_expr *left, 585 struct grep_expr *right) 586{ 587 struct grep_expr *z = xcalloc(1, sizeof(*z)); 588 z->node = kind; 589 z->u.binary.left = left; 590 z->u.binary.right = right; 591 return z; 592} 593 594static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right) 595{ 596 return grep_binexp(GREP_NODE_OR, left, right); 597} 598 599static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right) 600{ 601 return grep_binexp(GREP_NODE_AND, left, right); 602} 603 604static struct grep_expr *compile_pattern_or(struct grep_pat **); 605static struct grep_expr *compile_pattern_atom(struct grep_pat **list) 606{ 607 struct grep_pat *p; 608 struct grep_expr *x; 609 610 p = *list; 611 if (!p) 612 return NULL; 613 switch (p->token) { 614 case GREP_PATTERN: /* atom */ 615 case GREP_PATTERN_HEAD: 616 case GREP_PATTERN_BODY: 617 CALLOC_ARRAY(x, 1); 618 x->node = GREP_NODE_ATOM; 619 x->u.atom = p; 620 *list = p->next; 621 return x; 622 case GREP_OPEN_PAREN: 623 *list = p->next; 624 x = compile_pattern_or(list); 625 if (!*list || (*list)->token != GREP_CLOSE_PAREN) 626 die("unmatched ( for expression group"); 627 *list = (*list)->next; 628 return x; 629 default: 630 return NULL; 631 } 632} 633 634static struct grep_expr *compile_pattern_not(struct grep_pat **list) 635{ 636 struct grep_pat *p; 637 struct grep_expr *x; 638 639 p = *list; 640 if (!p) 641 return NULL; 642 switch (p->token) { 643 case GREP_NOT: 644 if (!p->next) 645 die("--not not followed by pattern expression"); 646 *list = p->next; 647 x = compile_pattern_not(list); 648 if (!x) 649 die("--not followed by non pattern expression"); 650 return grep_not_expr(x); 651 default: 652 return compile_pattern_atom(list); 653 } 654} 655 656static struct grep_expr *compile_pattern_and(struct grep_pat **list) 657{ 658 struct grep_pat *p; 659 struct grep_expr *x, *y; 660 661 x = compile_pattern_not(list); 662 p = *list; 663 if (p && p->token == GREP_AND) { 664 if (!x) 665 die("--and not preceded by pattern expression"); 666 if (!p->next) 667 die("--and not followed by pattern expression"); 668 *list = p->next; 669 y = compile_pattern_and(list); 670 if (!y) 671 die("--and not followed by pattern expression"); 672 return grep_and_expr(x, y); 673 } 674 return x; 675} 676 677static struct grep_expr *compile_pattern_or(struct grep_pat **list) 678{ 679 struct grep_pat *p; 680 struct grep_expr *x, *y; 681 682 x = compile_pattern_and(list); 683 p = *list; 684 if (x && p && p->token != GREP_CLOSE_PAREN) { 685 y = compile_pattern_or(list); 686 if (!y) 687 die("not a pattern expression %s", p->pattern); 688 return grep_or_expr(x, y); 689 } 690 return x; 691} 692 693static struct grep_expr *compile_pattern_expr(struct grep_pat **list) 694{ 695 return compile_pattern_or(list); 696} 697 698static struct grep_expr *grep_true_expr(void) 699{ 700 struct grep_expr *z = xcalloc(1, sizeof(*z)); 701 z->node = GREP_NODE_TRUE; 702 return z; 703} 704 705static struct grep_expr *prep_header_patterns(struct grep_opt *opt) 706{ 707 struct grep_pat *p; 708 struct grep_expr *header_expr; 709 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]); 710 enum grep_header_field fld; 711 712 if (!opt->header_list) 713 return NULL; 714 715 for (p = opt->header_list; p; p = p->next) { 716 if (p->token != GREP_PATTERN_HEAD) 717 BUG("a non-header pattern in grep header list."); 718 if (p->field < GREP_HEADER_FIELD_MIN || 719 GREP_HEADER_FIELD_MAX <= p->field) 720 BUG("unknown header field %d", p->field); 721 compile_regexp(p, opt); 722 } 723 724 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) 725 header_group[fld] = NULL; 726 727 for (p = opt->header_list; p; p = p->next) { 728 struct grep_expr *h; 729 struct grep_pat *pp = p; 730 731 h = compile_pattern_atom(&pp); 732 if (!h || pp != p->next) 733 BUG("malformed header expr"); 734 if (!header_group[p->field]) { 735 header_group[p->field] = h; 736 continue; 737 } 738 header_group[p->field] = grep_or_expr(h, header_group[p->field]); 739 } 740 741 header_expr = NULL; 742 743 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) { 744 if (!header_group[fld]) 745 continue; 746 if (!header_expr) 747 header_expr = grep_true_expr(); 748 header_expr = grep_or_expr(header_group[fld], header_expr); 749 } 750 return header_expr; 751} 752 753static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y) 754{ 755 struct grep_expr *z = x; 756 757 while (x) { 758 assert(x->node == GREP_NODE_OR); 759 if (x->u.binary.right && 760 x->u.binary.right->node == GREP_NODE_TRUE) { 761 free(x->u.binary.right); 762 x->u.binary.right = y; 763 break; 764 } 765 x = x->u.binary.right; 766 } 767 return z; 768} 769 770void compile_grep_patterns(struct grep_opt *opt) 771{ 772 struct grep_pat *p; 773 struct grep_expr *header_expr = prep_header_patterns(opt); 774 int extended = 0; 775 776 for (p = opt->pattern_list; p; p = p->next) { 777 switch (p->token) { 778 case GREP_PATTERN: /* atom */ 779 case GREP_PATTERN_HEAD: 780 case GREP_PATTERN_BODY: 781 compile_regexp(p, opt); 782 break; 783 default: 784 extended = 1; 785 break; 786 } 787 } 788 789 if (opt->all_match || opt->no_body_match || header_expr) 790 extended = 1; 791 else if (!extended) 792 return; 793 794 p = opt->pattern_list; 795 if (p) 796 opt->pattern_expression = compile_pattern_expr(&p); 797 if (p) 798 die("incomplete pattern expression group: %s", p->pattern); 799 800 if (opt->no_body_match && opt->pattern_expression) 801 opt->pattern_expression = grep_not_expr(opt->pattern_expression); 802 803 if (!header_expr) 804 return; 805 806 if (!opt->pattern_expression) 807 opt->pattern_expression = header_expr; 808 else if (opt->all_match) 809 opt->pattern_expression = grep_splice_or(header_expr, 810 opt->pattern_expression); 811 else 812 opt->pattern_expression = grep_or_expr(opt->pattern_expression, 813 header_expr); 814 opt->all_match = 1; 815} 816 817static void free_pattern_expr(struct grep_expr *x) 818{ 819 switch (x->node) { 820 case GREP_NODE_TRUE: 821 case GREP_NODE_ATOM: 822 break; 823 case GREP_NODE_NOT: 824 free_pattern_expr(x->u.unary); 825 break; 826 case GREP_NODE_AND: 827 case GREP_NODE_OR: 828 free_pattern_expr(x->u.binary.left); 829 free_pattern_expr(x->u.binary.right); 830 break; 831 } 832 free(x); 833} 834 835static void free_grep_pat(struct grep_pat *pattern) 836{ 837 struct grep_pat *p, *n; 838 839 for (p = pattern; p; p = n) { 840 n = p->next; 841 switch (p->token) { 842 case GREP_PATTERN: /* atom */ 843 case GREP_PATTERN_HEAD: 844 case GREP_PATTERN_BODY: 845 if (p->pcre2_pattern) 846 free_pcre2_pattern(p); 847 else 848 regfree(&p->regexp); 849 break; 850 default: 851 break; 852 } 853 free(p->pattern); 854 free(p); 855 } 856} 857 858void free_grep_patterns(struct grep_opt *opt) 859{ 860 free_grep_pat(opt->pattern_list); 861 free_grep_pat(opt->header_list); 862 863 if (opt->pattern_expression) 864 free_pattern_expr(opt->pattern_expression); 865} 866 867static const char *end_of_line(const char *cp, unsigned long *left) 868{ 869 unsigned long l = *left; 870 while (l && *cp != '\n') { 871 l--; 872 cp++; 873 } 874 *left = l; 875 return cp; 876} 877 878static int word_char(char ch) 879{ 880 return isalnum(ch) || ch == '_'; 881} 882 883static void output_color(struct grep_opt *opt, const void *data, size_t size, 884 const char *color) 885{ 886 if (want_color(opt->color) && color && color[0]) { 887 opt->output(opt, color, strlen(color)); 888 opt->output(opt, data, size); 889 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET)); 890 } else 891 opt->output(opt, data, size); 892} 893 894static void output_sep(struct grep_opt *opt, char sign) 895{ 896 if (opt->null_following_name) 897 opt->output(opt, "\0", 1); 898 else 899 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]); 900} 901 902static void show_name(struct grep_opt *opt, const char *name) 903{ 904 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); 905 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); 906} 907 908static int patmatch(struct grep_pat *p, 909 const char *line, const char *eol, 910 regmatch_t *match, int eflags) 911{ 912 if (p->pcre2_pattern) 913 return !pcre2match(p, line, eol, match, eflags); 914 915 switch (regexec_buf(&p->regexp, line, eol - line, 1, match, eflags)) { 916 case 0: 917 return 1; 918 case REG_NOMATCH: 919 return 0; 920 default: 921 return -1; 922 } 923} 924 925static void strip_timestamp(const char *bol, const char **eol_p) 926{ 927 const char *eol = *eol_p; 928 929 while (bol < --eol) { 930 if (*eol != '>') 931 continue; 932 *eol_p = ++eol; 933 break; 934 } 935} 936 937static struct { 938 const char *field; 939 size_t len; 940} header_field[] = { 941 { "author ", 7 }, 942 { "committer ", 10 }, 943 { "reflog ", 7 }, 944}; 945 946static int headerless_match_one_pattern(struct grep_pat *p, 947 const char *bol, const char *eol, 948 enum grep_context ctx, 949 regmatch_t *pmatch, int eflags) 950{ 951 int hit = 0; 952 const char *start = bol; 953 954 if ((p->token != GREP_PATTERN) && 955 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD))) 956 return 0; 957 958 again: 959 hit = patmatch(p, bol, eol, pmatch, eflags); 960 if (hit < 0) 961 hit = 0; 962 963 if (hit && p->word_regexp) { 964 if ((pmatch[0].rm_so < 0) || 965 (eol - bol) < pmatch[0].rm_so || 966 (pmatch[0].rm_eo < 0) || 967 (eol - bol) < pmatch[0].rm_eo) 968 die("regexp returned nonsense"); 969 970 /* Match beginning must be either beginning of the 971 * line, or at word boundary (i.e. the last char must 972 * not be a word char). Similarly, match end must be 973 * either end of the line, or at word boundary 974 * (i.e. the next char must not be a word char). 975 */ 976 if ( ((pmatch[0].rm_so == 0) || 977 !word_char(bol[pmatch[0].rm_so-1])) && 978 ((pmatch[0].rm_eo == (eol-bol)) || 979 !word_char(bol[pmatch[0].rm_eo])) ) 980 ; 981 else 982 hit = 0; 983 984 /* Words consist of at least one character. */ 985 if (pmatch->rm_so == pmatch->rm_eo) 986 hit = 0; 987 988 if (!hit && pmatch[0].rm_so + bol + 1 < eol) { 989 /* There could be more than one match on the 990 * line, and the first match might not be 991 * strict word match. But later ones could be! 992 * Forward to the next possible start, i.e. the 993 * next position following a non-word char. 994 */ 995 bol = pmatch[0].rm_so + bol + 1; 996 while (word_char(bol[-1]) && bol < eol) 997 bol++; 998 eflags |= REG_NOTBOL; 999 if (bol < eol) 1000 goto again; 1001 } 1002 } 1003 if (hit) { 1004 pmatch[0].rm_so += bol - start; 1005 pmatch[0].rm_eo += bol - start; 1006 } 1007 return hit; 1008} 1009 1010static int match_one_pattern(struct grep_pat *p, 1011 const char *bol, const char *eol, 1012 enum grep_context ctx, regmatch_t *pmatch, 1013 int eflags) 1014{ 1015 const char *field; 1016 size_t len; 1017 1018 if (p->token == GREP_PATTERN_HEAD) { 1019 assert(p->field < ARRAY_SIZE(header_field)); 1020 field = header_field[p->field].field; 1021 len = header_field[p->field].len; 1022 if (strncmp(bol, field, len)) 1023 return 0; 1024 bol += len; 1025 1026 switch (p->field) { 1027 case GREP_HEADER_AUTHOR: 1028 case GREP_HEADER_COMMITTER: 1029 strip_timestamp(bol, &eol); 1030 break; 1031 default: 1032 break; 1033 } 1034 } 1035 1036 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags); 1037} 1038 1039 1040static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, 1041 const char *bol, const char *eol, 1042 enum grep_context ctx, ssize_t *col, 1043 ssize_t *icol, int collect_hits) 1044{ 1045 int h = 0; 1046 1047 switch (x->node) { 1048 case GREP_NODE_TRUE: 1049 h = 1; 1050 break; 1051 case GREP_NODE_ATOM: 1052 { 1053 regmatch_t tmp; 1054 h = match_one_pattern(x->u.atom, bol, eol, ctx, 1055 &tmp, 0); 1056 if (h && (*col < 0 || tmp.rm_so < *col)) 1057 *col = tmp.rm_so; 1058 } 1059 if (x->u.atom->token == GREP_PATTERN_BODY) 1060 opt->body_hit |= h; 1061 break; 1062 case GREP_NODE_NOT: 1063 /* 1064 * Upon visiting a GREP_NODE_NOT, col and icol become swapped. 1065 */ 1066 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col, 1067 0); 1068 break; 1069 case GREP_NODE_AND: 1070 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col, 1071 icol, 0); 1072 if (h || opt->columnnum) { 1073 /* 1074 * Don't short-circuit AND when given --column, since a 1075 * NOT earlier in the tree may turn this into an OR. In 1076 * this case, see the below comment. 1077 */ 1078 h &= match_expr_eval(opt, x->u.binary.right, bol, eol, 1079 ctx, col, icol, 0); 1080 } 1081 break; 1082 case GREP_NODE_OR: 1083 if (!(collect_hits || opt->columnnum)) { 1084 /* 1085 * Don't short-circuit OR when given --column (or 1086 * collecting hits) to ensure we don't skip a later 1087 * child that would produce an earlier match. 1088 */ 1089 return (match_expr_eval(opt, x->u.binary.left, bol, eol, 1090 ctx, col, icol, 0) || 1091 match_expr_eval(opt, x->u.binary.right, bol, 1092 eol, ctx, col, icol, 0)); 1093 } 1094 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col, 1095 icol, 0); 1096 if (collect_hits) 1097 x->u.binary.left->hit |= h; 1098 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col, 1099 icol, collect_hits); 1100 break; 1101 default: 1102 die("Unexpected node type (internal error) %d", x->node); 1103 } 1104 if (collect_hits) 1105 x->hit |= h; 1106 return h; 1107} 1108 1109static int match_expr(struct grep_opt *opt, 1110 const char *bol, const char *eol, 1111 enum grep_context ctx, ssize_t *col, 1112 ssize_t *icol, int collect_hits) 1113{ 1114 struct grep_expr *x = opt->pattern_expression; 1115 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits); 1116} 1117 1118static int match_line(struct grep_opt *opt, 1119 const char *bol, const char *eol, 1120 ssize_t *col, ssize_t *icol, 1121 enum grep_context ctx, int collect_hits) 1122{ 1123 struct grep_pat *p; 1124 int hit = 0; 1125 1126 if (opt->pattern_expression) 1127 return match_expr(opt, bol, eol, ctx, col, icol, 1128 collect_hits); 1129 1130 /* we do not call with collect_hits without being extended */ 1131 for (p = opt->pattern_list; p; p = p->next) { 1132 regmatch_t tmp; 1133 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) { 1134 hit |= 1; 1135 if (!opt->columnnum) { 1136 /* 1137 * Without --column, any single match on a line 1138 * is enough to know that it needs to be 1139 * printed. With --column, scan _all_ patterns 1140 * to find the earliest. 1141 */ 1142 break; 1143 } 1144 if (*col < 0 || tmp.rm_so < *col) 1145 *col = tmp.rm_so; 1146 } 1147 } 1148 return hit; 1149} 1150 1151static int match_next_pattern(struct grep_pat *p, 1152 const char *bol, const char *eol, 1153 enum grep_context ctx, 1154 regmatch_t *pmatch, int eflags) 1155{ 1156 regmatch_t match; 1157 1158 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags)) 1159 return 0; 1160 if (match.rm_so < 0 || match.rm_eo < 0) 1161 return 0; 1162 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) { 1163 if (match.rm_so > pmatch->rm_so) 1164 return 1; 1165 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo) 1166 return 1; 1167 } 1168 pmatch->rm_so = match.rm_so; 1169 pmatch->rm_eo = match.rm_eo; 1170 return 1; 1171} 1172 1173int grep_next_match(struct grep_opt *opt, 1174 const char *bol, const char *eol, 1175 enum grep_context ctx, regmatch_t *pmatch, 1176 enum grep_header_field field, int eflags) 1177{ 1178 struct grep_pat *p; 1179 int hit = 0; 1180 1181 pmatch->rm_so = pmatch->rm_eo = -1; 1182 if (bol < eol) { 1183 for (p = ((ctx == GREP_CONTEXT_HEAD) 1184 ? opt->header_list : opt->pattern_list); 1185 p; p = p->next) { 1186 switch (p->token) { 1187 case GREP_PATTERN_HEAD: 1188 if ((field != GREP_HEADER_FIELD_MAX) && 1189 (p->field != field)) 1190 continue; 1191 /* fall thru */ 1192 case GREP_PATTERN: /* atom */ 1193 case GREP_PATTERN_BODY: 1194 hit |= match_next_pattern(p, bol, eol, ctx, 1195 pmatch, eflags); 1196 break; 1197 default: 1198 break; 1199 } 1200 } 1201 } 1202 return hit; 1203} 1204 1205static void show_line_header(struct grep_opt *opt, const char *name, 1206 unsigned lno, ssize_t cno, char sign) 1207{ 1208 if (opt->heading && opt->last_shown == 0) { 1209 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); 1210 opt->output(opt, "\n", 1); 1211 } 1212 opt->last_shown = lno; 1213 1214 if (!opt->heading && opt->pathname) { 1215 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); 1216 output_sep(opt, sign); 1217 } 1218 if (opt->linenum) { 1219 char buf[32]; 1220 xsnprintf(buf, sizeof(buf), "%d", lno); 1221 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]); 1222 output_sep(opt, sign); 1223 } 1224 /* 1225 * Treat 'cno' as the 1-indexed offset from the start of a non-context 1226 * line to its first match. Otherwise, 'cno' is 0 indicating that we are 1227 * being called with a context line. 1228 */ 1229 if (opt->columnnum && cno) { 1230 char buf[32]; 1231 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno); 1232 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]); 1233 output_sep(opt, sign); 1234 } 1235} 1236 1237static void show_line(struct grep_opt *opt, 1238 const char *bol, const char *eol, 1239 const char *name, unsigned lno, ssize_t cno, char sign) 1240{ 1241 int rest = eol - bol; 1242 const char *match_color = NULL; 1243 const char *line_color = NULL; 1244 1245 if (opt->file_break && opt->last_shown == 0) { 1246 if (opt->show_hunk_mark) 1247 opt->output(opt, "\n", 1); 1248 } else if (opt->pre_context || opt->post_context || opt->funcbody) { 1249 if (opt->last_shown == 0) { 1250 if (opt->show_hunk_mark) { 1251 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]); 1252 opt->output(opt, "\n", 1); 1253 } 1254 } else if (lno > opt->last_shown + 1) { 1255 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]); 1256 opt->output(opt, "\n", 1); 1257 } 1258 } 1259 if (!opt->only_matching) { 1260 /* 1261 * In case the line we're being called with contains more than 1262 * one match, leave printing each header to the loop below. 1263 */ 1264 show_line_header(opt, name, lno, cno, sign); 1265 } 1266 if (want_color(opt->color) || opt->only_matching) { 1267 regmatch_t match; 1268 enum grep_context ctx = GREP_CONTEXT_BODY; 1269 int eflags = 0; 1270 1271 if (want_color(opt->color)) { 1272 if (sign == ':') 1273 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED]; 1274 else 1275 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT]; 1276 if (sign == ':') 1277 line_color = opt->colors[GREP_COLOR_SELECTED]; 1278 else if (sign == '-') 1279 line_color = opt->colors[GREP_COLOR_CONTEXT]; 1280 else if (sign == '=') 1281 line_color = opt->colors[GREP_COLOR_FUNCTION]; 1282 } 1283 while (grep_next_match(opt, bol, eol, ctx, &match, 1284 GREP_HEADER_FIELD_MAX, eflags)) { 1285 if (match.rm_so == match.rm_eo) 1286 break; 1287 1288 if (opt->only_matching) 1289 show_line_header(opt, name, lno, cno, sign); 1290 else 1291 output_color(opt, bol, match.rm_so, line_color); 1292 output_color(opt, bol + match.rm_so, 1293 match.rm_eo - match.rm_so, match_color); 1294 if (opt->only_matching) 1295 opt->output(opt, "\n", 1); 1296 bol += match.rm_eo; 1297 cno += match.rm_eo; 1298 rest -= match.rm_eo; 1299 eflags = REG_NOTBOL; 1300 } 1301 } 1302 if (!opt->only_matching) { 1303 output_color(opt, bol, rest, line_color); 1304 opt->output(opt, "\n", 1); 1305 } 1306} 1307 1308int grep_use_locks; 1309 1310/* 1311 * This lock protects access to the gitattributes machinery, which is 1312 * not thread-safe. 1313 */ 1314pthread_mutex_t grep_attr_mutex; 1315 1316static inline void grep_attr_lock(void) 1317{ 1318 if (grep_use_locks) 1319 pthread_mutex_lock(&grep_attr_mutex); 1320} 1321 1322static inline void grep_attr_unlock(void) 1323{ 1324 if (grep_use_locks) 1325 pthread_mutex_unlock(&grep_attr_mutex); 1326} 1327 1328static int match_funcname(struct grep_opt *opt, struct grep_source *gs, 1329 const char *bol, const char *eol) 1330{ 1331 xdemitconf_t *xecfg = opt->priv; 1332 if (xecfg && !xecfg->find_func) { 1333 grep_source_load_driver(gs, opt->repo->index); 1334 if (gs->driver->funcname.pattern) { 1335 const struct userdiff_funcname *pe = &gs->driver->funcname; 1336 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags); 1337 } else { 1338 xecfg = opt->priv = NULL; 1339 } 1340 } 1341 1342 if (xecfg) { 1343 char buf[1]; 1344 return xecfg->find_func(bol, eol - bol, buf, 1, 1345 xecfg->find_func_priv) >= 0; 1346 } 1347 1348 if (bol == eol) 1349 return 0; 1350 if (isalpha(*bol) || *bol == '_' || *bol == '$') 1351 return 1; 1352 return 0; 1353} 1354 1355static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, 1356 const char *bol, unsigned lno) 1357{ 1358 while (bol > gs->buf) { 1359 const char *eol = --bol; 1360 1361 while (bol > gs->buf && bol[-1] != '\n') 1362 bol--; 1363 lno--; 1364 1365 if (lno <= opt->last_shown) 1366 break; 1367 1368 if (match_funcname(opt, gs, bol, eol)) { 1369 show_line(opt, bol, eol, gs->name, lno, 0, '='); 1370 break; 1371 } 1372 } 1373} 1374 1375static int is_empty_line(const char *bol, const char *eol); 1376 1377static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, 1378 const char *bol, const char *end, unsigned lno) 1379{ 1380 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from; 1381 int funcname_needed = !!opt->funcname, comment_needed = 0; 1382 1383 if (opt->pre_context < lno) 1384 from = lno - opt->pre_context; 1385 if (from <= opt->last_shown) 1386 from = opt->last_shown + 1; 1387 orig_from = from; 1388 if (opt->funcbody) { 1389 if (match_funcname(opt, gs, bol, end)) 1390 comment_needed = 1; 1391 else 1392 funcname_needed = 1; 1393 from = opt->last_shown + 1; 1394 } 1395 1396 /* Rewind. */ 1397 while (bol > gs->buf && cur > from) { 1398 const char *next_bol = bol; 1399 const char *eol = --bol; 1400 1401 while (bol > gs->buf && bol[-1] != '\n') 1402 bol--; 1403 cur--; 1404 if (comment_needed && (is_empty_line(bol, eol) || 1405 match_funcname(opt, gs, bol, eol))) { 1406 comment_needed = 0; 1407 from = orig_from; 1408 if (cur < from) { 1409 cur++; 1410 bol = next_bol; 1411 break; 1412 } 1413 } 1414 if (funcname_needed && match_funcname(opt, gs, bol, eol)) { 1415 funcname_lno = cur; 1416 funcname_needed = 0; 1417 if (opt->funcbody) 1418 comment_needed = 1; 1419 else 1420 from = orig_from; 1421 } 1422 } 1423 1424 /* We need to look even further back to find a function signature. */ 1425 if (opt->funcname && funcname_needed) 1426 show_funcname_line(opt, gs, bol, cur); 1427 1428 /* Back forward. */ 1429 while (cur < lno) { 1430 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; 1431 1432 while (*eol != '\n') 1433 eol++; 1434 show_line(opt, bol, eol, gs->name, cur, 0, sign); 1435 bol = eol + 1; 1436 cur++; 1437 } 1438} 1439 1440static int should_lookahead(struct grep_opt *opt) 1441{ 1442 struct grep_pat *p; 1443 1444 if (opt->pattern_expression) 1445 return 0; /* punt for too complex stuff */ 1446 if (opt->invert) 1447 return 0; 1448 for (p = opt->pattern_list; p; p = p->next) { 1449 if (p->token != GREP_PATTERN) 1450 return 0; /* punt for "header only" and stuff */ 1451 } 1452 return 1; 1453} 1454 1455static int look_ahead(struct grep_opt *opt, 1456 unsigned long *left_p, 1457 unsigned *lno_p, 1458 const char **bol_p) 1459{ 1460 unsigned lno = *lno_p; 1461 const char *bol = *bol_p; 1462 struct grep_pat *p; 1463 const char *sp, *last_bol; 1464 regoff_t earliest = -1; 1465 1466 for (p = opt->pattern_list; p; p = p->next) { 1467 int hit; 1468 regmatch_t m; 1469 1470 hit = patmatch(p, bol, bol + *left_p, &m, 0); 1471 if (hit < 0) 1472 return -1; 1473 if (!hit || m.rm_so < 0 || m.rm_eo < 0) 1474 continue; 1475 if (earliest < 0 || m.rm_so < earliest) 1476 earliest = m.rm_so; 1477 } 1478 1479 if (earliest < 0) { 1480 *bol_p = bol + *left_p; 1481 *left_p = 0; 1482 return 1; 1483 } 1484 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--) 1485 ; /* find the beginning of the line */ 1486 last_bol = sp; 1487 1488 for (sp = bol; sp < last_bol; sp++) { 1489 if (*sp == '\n') 1490 lno++; 1491 } 1492 *left_p -= last_bol - bol; 1493 *bol_p = last_bol; 1494 *lno_p = lno; 1495 return 0; 1496} 1497 1498static int fill_textconv_grep(struct repository *r, 1499 struct userdiff_driver *driver, 1500 struct grep_source *gs) 1501{ 1502 struct diff_filespec *df; 1503 char *buf; 1504 size_t size; 1505 1506 if (!driver || !driver->textconv) 1507 return grep_source_load(gs); 1508 1509 /* 1510 * The textconv interface is intimately tied to diff_filespecs, so we 1511 * have to pretend to be one. If we could unify the grep_source 1512 * and diff_filespec structs, this mess could just go away. 1513 */ 1514 df = alloc_filespec(gs->path); 1515 switch (gs->type) { 1516 case GREP_SOURCE_OID: 1517 fill_filespec(df, gs->identifier, 1, 0100644); 1518 break; 1519 case GREP_SOURCE_FILE: 1520 fill_filespec(df, null_oid(r->hash_algo), 0, 0100644); 1521 break; 1522 default: 1523 BUG("attempt to textconv something without a path?"); 1524 } 1525 1526 /* 1527 * fill_textconv is not remotely thread-safe; it modifies the global 1528 * diff tempfile structure, writes to the_repo's odb and might 1529 * internally call thread-unsafe functions such as the 1530 * prepare_packed_git() lazy-initializator. Because of the last two, we 1531 * must ensure mutual exclusion between this call and the object reading 1532 * API, thus we use obj_read_lock() here. 1533 * 1534 * TODO: allowing text conversion to run in parallel with object 1535 * reading operations might increase performance in the multithreaded 1536 * non-worktreee git-grep with --textconv. 1537 */ 1538 obj_read_lock(); 1539 size = fill_textconv(r, driver, df, &buf); 1540 obj_read_unlock(); 1541 free_filespec(df); 1542 1543 /* 1544 * The normal fill_textconv usage by the diff machinery would just keep 1545 * the textconv'd buf separate from the diff_filespec. But much of the 1546 * grep code passes around a grep_source and assumes that its "buf" 1547 * pointer is the beginning of the thing we are searching. So let's 1548 * install our textconv'd version into the grep_source, taking care not 1549 * to leak any existing buffer. 1550 */ 1551 grep_source_clear_data(gs); 1552 gs->buf = buf; 1553 gs->size = size; 1554 1555 return 0; 1556} 1557 1558static int is_empty_line(const char *bol, const char *eol) 1559{ 1560 while (bol < eol && isspace(*bol)) 1561 bol++; 1562 return bol == eol; 1563} 1564 1565static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits) 1566{ 1567 const char *bol; 1568 const char *peek_bol = NULL; 1569 unsigned long left; 1570 unsigned lno = 1; 1571 unsigned last_hit = 0; 1572 int binary_match_only = 0; 1573 unsigned count = 0; 1574 int try_lookahead = 0; 1575 int show_function = 0; 1576 struct userdiff_driver *textconv = NULL; 1577 enum grep_context ctx = GREP_CONTEXT_HEAD; 1578 xdemitconf_t xecfg; 1579 1580 if (!opt->status_only && gs->name == NULL) 1581 BUG("grep call which could print a name requires " 1582 "grep_source.name be non-NULL"); 1583 1584 if (!opt->output) 1585 opt->output = std_output; 1586 1587 if (opt->pre_context || opt->post_context || opt->file_break || 1588 opt->funcbody) { 1589 /* Show hunk marks, except for the first file. */ 1590 if (opt->last_shown) 1591 opt->show_hunk_mark = 1; 1592 /* 1593 * If we're using threads then we can't easily identify 1594 * the first file. Always put hunk marks in that case 1595 * and skip the very first one later in work_done(). 1596 */ 1597 if (opt->output != std_output) 1598 opt->show_hunk_mark = 1; 1599 } 1600 opt->last_shown = 0; 1601 1602 if (opt->allow_textconv) { 1603 grep_source_load_driver(gs, opt->repo->index); 1604 /* 1605 * We might set up the shared textconv cache data here, which 1606 * is not thread-safe. Also, get_oid_with_context() and 1607 * parse_object() might be internally called. As they are not 1608 * currently thread-safe and might be racy with object reading, 1609 * obj_read_lock() must be called. 1610 */ 1611 grep_attr_lock(); 1612 obj_read_lock(); 1613 textconv = userdiff_get_textconv(opt->repo, gs->driver); 1614 obj_read_unlock(); 1615 grep_attr_unlock(); 1616 } 1617 1618 /* 1619 * We know the result of a textconv is text, so we only have to care 1620 * about binary handling if we are not using it. 1621 */ 1622 if (!textconv) { 1623 switch (opt->binary) { 1624 case GREP_BINARY_DEFAULT: 1625 if (grep_source_is_binary(gs, opt->repo->index)) 1626 binary_match_only = 1; 1627 break; 1628 case GREP_BINARY_NOMATCH: 1629 if (grep_source_is_binary(gs, opt->repo->index)) 1630 return 0; /* Assume unmatch */ 1631 break; 1632 case GREP_BINARY_TEXT: 1633 break; 1634 default: 1635 BUG("unknown binary handling mode"); 1636 } 1637 } 1638 1639 memset(&xecfg, 0, sizeof(xecfg)); 1640 opt->priv = &xecfg; 1641 1642 try_lookahead = should_lookahead(opt); 1643 1644 if (fill_textconv_grep(opt->repo, textconv, gs) < 0) 1645 return 0; 1646 1647 bol = gs->buf; 1648 left = gs->size; 1649 while (left) { 1650 const char *eol; 1651 int hit; 1652 ssize_t cno; 1653 ssize_t col = -1, icol = -1; 1654 1655 /* 1656 * look_ahead() skips quickly to the line that possibly 1657 * has the next hit; don't call it if we need to do 1658 * something more than just skipping the current line 1659 * in response to an unmatch for the current line. E.g. 1660 * inside a post-context window, we will show the current 1661 * line as a context around the previous hit when it 1662 * doesn't hit. 1663 */ 1664 if (try_lookahead 1665 && !(last_hit 1666 && (show_function || 1667 lno <= last_hit + opt->post_context))) { 1668 hit = look_ahead(opt, &left, &lno, &bol); 1669 if (hit < 0) 1670 try_lookahead = 0; 1671 else if (hit) 1672 break; 1673 } 1674 eol = end_of_line(bol, &left); 1675 1676 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) 1677 ctx = GREP_CONTEXT_BODY; 1678 1679 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits); 1680 1681 if (collect_hits) 1682 goto next_line; 1683 1684 /* "grep -v -e foo -e bla" should list lines 1685 * that do not have either, so inversion should 1686 * be done outside. 1687 */ 1688 if (opt->invert) 1689 hit = !hit; 1690 if (opt->unmatch_name_only) { 1691 if (hit) 1692 return 0; 1693 goto next_line; 1694 } 1695 if (hit && (opt->max_count < 0 || count < opt->max_count)) { 1696 count++; 1697 if (opt->status_only) 1698 return 1; 1699 if (opt->name_only) { 1700 show_name(opt, gs->name); 1701 return 1; 1702 } 1703 if (opt->count) 1704 goto next_line; 1705 if (binary_match_only) { 1706 opt->output(opt, "Binary file ", 12); 1707 output_color(opt, gs->name, strlen(gs->name), 1708 opt->colors[GREP_COLOR_FILENAME]); 1709 opt->output(opt, " matches\n", 9); 1710 return 1; 1711 } 1712 /* Hit at this line. If we haven't shown the 1713 * pre-context lines, we would need to show them. 1714 */ 1715 if (opt->pre_context || opt->funcbody) 1716 show_pre_context(opt, gs, bol, eol, lno); 1717 else if (opt->funcname) 1718 show_funcname_line(opt, gs, bol, lno); 1719 cno = opt->invert ? icol : col; 1720 if (cno < 0) { 1721 /* 1722 * A negative cno indicates that there was no 1723 * match on the line. We are thus inverted and 1724 * being asked to show all lines that _don't_ 1725 * match a given expression. Therefore, set cno 1726 * to 0 to suggest the whole line matches. 1727 */ 1728 cno = 0; 1729 } 1730 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':'); 1731 last_hit = lno; 1732 if (opt->funcbody) 1733 show_function = 1; 1734 goto next_line; 1735 } 1736 if (show_function && (!peek_bol || peek_bol < bol)) { 1737 unsigned long peek_left = left; 1738 const char *peek_eol = eol; 1739 1740 /* 1741 * Trailing empty lines are not interesting. 1742 * Peek past them to see if they belong to the 1743 * body of the current function. 1744 */ 1745 peek_bol = bol; 1746 while (is_empty_line(peek_bol, peek_eol)) { 1747 peek_bol = peek_eol + 1; 1748 peek_eol = end_of_line(peek_bol, &peek_left); 1749 } 1750 1751 if (peek_bol >= gs->buf + gs->size || 1752 match_funcname(opt, gs, peek_bol, peek_eol)) 1753 show_function = 0; 1754 } 1755 if (show_function || 1756 (last_hit && lno <= last_hit + opt->post_context)) { 1757 /* If the last hit is within the post context, 1758 * we need to show this line. 1759 */ 1760 show_line(opt, bol, eol, gs->name, lno, col + 1, '-'); 1761 } 1762 1763 next_line: 1764 bol = eol + 1; 1765 if (!left) 1766 break; 1767 left--; 1768 lno++; 1769 } 1770 1771 if (collect_hits) 1772 return 0; 1773 1774 if (opt->status_only) 1775 return opt->unmatch_name_only; 1776 if (opt->unmatch_name_only) { 1777 /* We did not see any hit, so we want to show this */ 1778 show_name(opt, gs->name); 1779 return 1; 1780 } 1781 1782 xdiff_clear_find_func(&xecfg); 1783 opt->priv = NULL; 1784 1785 /* NEEDSWORK: 1786 * The real "grep -c foo *.c" gives many "bar.c:0" lines, 1787 * which feels mostly useless but sometimes useful. Maybe 1788 * make it another option? For now suppress them. 1789 */ 1790 if (opt->count && count) { 1791 char buf[32]; 1792 if (opt->pathname) { 1793 output_color(opt, gs->name, strlen(gs->name), 1794 opt->colors[GREP_COLOR_FILENAME]); 1795 output_sep(opt, ':'); 1796 } 1797 xsnprintf(buf, sizeof(buf), "%u\n", count); 1798 opt->output(opt, buf, strlen(buf)); 1799 return 1; 1800 } 1801 return !!last_hit; 1802} 1803 1804static void clr_hit_marker(struct grep_expr *x) 1805{ 1806 /* All-hit markers are meaningful only at the very top level 1807 * OR node. 1808 */ 1809 while (1) { 1810 x->hit = 0; 1811 if (x->node != GREP_NODE_OR) 1812 return; 1813 x->u.binary.left->hit = 0; 1814 x = x->u.binary.right; 1815 } 1816} 1817 1818static int chk_hit_marker(struct grep_expr *x) 1819{ 1820 /* Top level nodes have hit markers. See if they all are hits */ 1821 while (1) { 1822 if (x->node != GREP_NODE_OR) 1823 return x->hit; 1824 if (!x->u.binary.left->hit) 1825 return 0; 1826 x = x->u.binary.right; 1827 } 1828} 1829 1830int grep_source(struct grep_opt *opt, struct grep_source *gs) 1831{ 1832 /* 1833 * we do not have to do the two-pass grep when we do not check 1834 * buffer-wide "all-match". 1835 */ 1836 if (!opt->all_match && !opt->no_body_match) 1837 return grep_source_1(opt, gs, 0); 1838 1839 /* Otherwise the toplevel "or" terms hit a bit differently. 1840 * We first clear hit markers from them. 1841 */ 1842 clr_hit_marker(opt->pattern_expression); 1843 opt->body_hit = 0; 1844 grep_source_1(opt, gs, 1); 1845 1846 if (opt->all_match && !chk_hit_marker(opt->pattern_expression)) 1847 return 0; 1848 if (opt->no_body_match && opt->body_hit) 1849 return 0; 1850 1851 return grep_source_1(opt, gs, 0); 1852} 1853 1854static void grep_source_init_buf(struct grep_source *gs, 1855 const char *buf, 1856 unsigned long size) 1857{ 1858 gs->type = GREP_SOURCE_BUF; 1859 gs->name = NULL; 1860 gs->path = NULL; 1861 gs->buf = buf; 1862 gs->size = size; 1863 gs->driver = NULL; 1864 gs->identifier = NULL; 1865} 1866 1867int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size) 1868{ 1869 struct grep_source gs; 1870 int r; 1871 1872 grep_source_init_buf(&gs, buf, size); 1873 1874 r = grep_source(opt, &gs); 1875 1876 grep_source_clear(&gs); 1877 return r; 1878} 1879 1880void grep_source_init_file(struct grep_source *gs, const char *name, 1881 const char *path) 1882{ 1883 gs->type = GREP_SOURCE_FILE; 1884 gs->name = xstrdup_or_null(name); 1885 gs->path = xstrdup_or_null(path); 1886 gs->buf = NULL; 1887 gs->size = 0; 1888 gs->driver = NULL; 1889 gs->identifier = xstrdup(path); 1890} 1891 1892void grep_source_init_oid(struct grep_source *gs, const char *name, 1893 const char *path, const struct object_id *oid, 1894 struct repository *repo) 1895{ 1896 gs->type = GREP_SOURCE_OID; 1897 gs->name = xstrdup_or_null(name); 1898 gs->path = xstrdup_or_null(path); 1899 gs->buf = NULL; 1900 gs->size = 0; 1901 gs->driver = NULL; 1902 gs->identifier = oiddup(oid); 1903 gs->repo = repo; 1904} 1905 1906void grep_source_clear(struct grep_source *gs) 1907{ 1908 FREE_AND_NULL(gs->name); 1909 FREE_AND_NULL(gs->path); 1910 FREE_AND_NULL(gs->identifier); 1911 grep_source_clear_data(gs); 1912} 1913 1914void grep_source_clear_data(struct grep_source *gs) 1915{ 1916 switch (gs->type) { 1917 case GREP_SOURCE_FILE: 1918 case GREP_SOURCE_OID: 1919 /* these types own the buffer */ 1920 free((char *)gs->buf); 1921 gs->buf = NULL; 1922 gs->size = 0; 1923 break; 1924 case GREP_SOURCE_BUF: 1925 /* leave user-provided buf intact */ 1926 break; 1927 } 1928} 1929 1930static int grep_source_load_oid(struct grep_source *gs) 1931{ 1932 enum object_type type; 1933 1934 gs->buf = odb_read_object(gs->repo->objects, gs->identifier, 1935 &type, &gs->size); 1936 if (!gs->buf) 1937 return error(_("'%s': unable to read %s"), 1938 gs->name, 1939 oid_to_hex(gs->identifier)); 1940 return 0; 1941} 1942 1943static int grep_source_load_file(struct grep_source *gs) 1944{ 1945 const char *filename = gs->identifier; 1946 struct stat st; 1947 char *data; 1948 size_t size; 1949 int i; 1950 1951 if (lstat(filename, &st) < 0) { 1952 err_ret: 1953 if (errno != ENOENT) 1954 error_errno(_("failed to stat '%s'"), filename); 1955 return -1; 1956 } 1957 if (!S_ISREG(st.st_mode)) 1958 return -1; 1959 size = xsize_t(st.st_size); 1960 i = open(filename, O_RDONLY); 1961 if (i < 0) 1962 goto err_ret; 1963 data = xmallocz(size); 1964 if (st.st_size != read_in_full(i, data, size)) { 1965 error_errno(_("'%s': short read"), filename); 1966 close(i); 1967 free(data); 1968 return -1; 1969 } 1970 close(i); 1971 1972 gs->buf = data; 1973 gs->size = size; 1974 return 0; 1975} 1976 1977static int grep_source_load(struct grep_source *gs) 1978{ 1979 if (gs->buf) 1980 return 0; 1981 1982 switch (gs->type) { 1983 case GREP_SOURCE_FILE: 1984 return grep_source_load_file(gs); 1985 case GREP_SOURCE_OID: 1986 return grep_source_load_oid(gs); 1987 case GREP_SOURCE_BUF: 1988 return gs->buf ? 0 : -1; 1989 } 1990 BUG("invalid grep_source type to load"); 1991} 1992 1993void grep_source_load_driver(struct grep_source *gs, 1994 struct index_state *istate) 1995{ 1996 if (gs->driver) 1997 return; 1998 1999 grep_attr_lock(); 2000 if (gs->path) 2001 gs->driver = userdiff_find_by_path(istate, gs->path); 2002 if (!gs->driver) 2003 gs->driver = userdiff_find_by_name("default"); 2004 grep_attr_unlock(); 2005} 2006 2007static int grep_source_is_binary(struct grep_source *gs, 2008 struct index_state *istate) 2009{ 2010 grep_source_load_driver(gs, istate); 2011 if (gs->driver->binary != -1) 2012 return gs->driver->binary; 2013 2014 if (!grep_source_load(gs)) 2015 return buffer_is_binary(gs->buf, gs->size); 2016 2017 return 0; 2018}