t/chainlint.pl at reftables-rust · freshlybakedca.ke/git

freshlybakedca.ke / git
fork atom
Git fork
fork atom
git / t / chainlint.pl
at reftables-rust 883 lines 26 kB view raw
wrap content
Eric Sunshine chainlint: reduce annotation noise-factor 2y ago
a13ff419
  1#!/usr/bin/env perl
  2#
  3# Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
  4#
  5# This tool scans shell scripts for test definitions and checks those tests for
  6# problems, such as broken &&-chains, which might hide bugs in the tests
  7# themselves or in behaviors being exercised by the tests.
  8#
  9# Input arguments are pathnames of shell scripts containing test definitions,
 10# or globs referencing a collection of scripts. For each problem discovered,
 11# the pathname of the script containing the test is printed along with the test
 12# name and the test body with a `?!LINT: ...?!` annotation at the location of
 13# each detected problem, where "..." is an explanation of the problem. Returns
 14# zero if no problems are discovered, otherwise non-zero.
 15
 16use warnings;
 17use strict;
 18use Config;
 19use File::Glob;
 20use Getopt::Long;
 21
 22my $jobs = -1;
 23my $show_stats;
 24my $emit_all;
 25
 26# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
 27# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
 28# similar to lexical analyzers for other languages, this one differs in a few
 29# substantial ways due to quirks of the shell command language.
 30#
 31# For instance, in many languages, newline is just whitespace like space or
 32# TAB, but in shell a newline is a command separator, thus a distinct lexical
 33# token. A newline is significant and returned as a distinct token even at the
 34# end of a shell comment.
 35#
 36# In other languages, `1+2` would typically be scanned as three tokens
 37# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
 38# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
 39# In shell, several characters with special meaning lose that meaning when not
 40# surrounded by whitespace. For instance, the negation operator `!` is special
 41# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
 42# just a plain character in the longer token "foo!uucp". In many other
 43# languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
 44# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
 45#
 46# The lexical analyzer for the shell command language is also somewhat unusual
 47# in that it recursively invokes the parser to handle the body of `$(...)`
 48# expressions which can contain arbitrary shell code. Such expressions may be
 49# encountered both inside and outside of double-quoted strings.
 50#
 51# The lexical analyzer is responsible for consuming shell here-doc bodies which
 52# extend from the line following a `<<TAG` operator until a line consisting
 53# solely of `TAG`. Here-doc consumption begins when a newline is encountered.
 54# It is legal for multiple here-doc `<<TAG` operators to be present on a single
 55# line, in which case their bodies must be present one following the next, and
 56# are consumed in the (left-to-right) order the `<<TAG` operators appear on the
 57# line. A special complication is that the bodies of all here-docs must be
 58# consumed when the newline is encountered even if the parse context depth has
 59# changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
 60# "A" and "B" must be consumed even though "A" was introduced outside the
 61# recursive parse context in which "B" was introduced and in which the newline
 62# is encountered.
 63package Lexer;
 64
 65sub new {
 66	my ($class, $parser, $s) = @_;
 67	bless {
 68		parser => $parser,
 69		buff => $s,
 70		lineno => 1,
 71		heretags => []
 72	} => $class;
 73}
 74
 75sub scan_heredoc_tag {
 76	my $self = shift @_;
 77	${$self->{buff}} =~ /\G(-?)/gc;
 78	my $indented = $1;
 79	my $token = $self->scan_token();
 80	return "<<$indented" unless $token;
 81	my $tag = $token->[0];
 82	$tag =~ s/['"\\]//g;
 83	$$token[0] = $indented ? "\t$tag" : "$tag";
 84	push(@{$self->{heretags}}, $token);
 85	return "<<$indented$tag";
 86}
 87
 88sub scan_op {
 89	my ($self, $c) = @_;
 90	my $b = $self->{buff};
 91	return $c unless $$b =~ /\G(.)/sgc;
 92	my $cc = $c . $1;
 93	return scan_heredoc_tag($self) if $cc eq '<<';
 94	return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
 95	pos($$b)--;
 96	return $c;
 97}
 98
 99sub scan_sqstring {
100	my $self = shift @_;
101	${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
102	my $s = $1;
103	$self->{lineno} += () = $s =~ /\n/sg;
104	return "'" . $s;
105}
106
107sub scan_dqstring {
108	my $self = shift @_;
109	my $b = $self->{buff};
110	my $s = '"';
111	while (1) {
112		# slurp up non-special characters
113		$s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
114		# handle special characters
115		last unless $$b =~ /\G(.)/sgc;
116		my $c = $1;
117		$s .= '"', last if $c eq '"';
118		$s .= '$' . $self->scan_dollar(), next if $c eq '$';
119		if ($c eq '\\') {
120			$s .= '\\', last unless $$b =~ /\G(.)/sgc;
121			$c = $1;
122			$self->{lineno}++, next if $c eq "\n"; # line splice
123			# backslash escapes only $, `, ", \ in dq-string
124			$s .= '\\' unless $c =~ /^[\$`"\\]$/;
125			$s .= $c;
126			next;
127		}
128		die("internal error scanning dq-string '$c'\n");
129	}
130	$self->{lineno} += () = $s =~ /\n/sg;
131	return $s;
132}
133
134sub scan_balanced {
135	my ($self, $c1, $c2) = @_;
136	my $b = $self->{buff};
137	my $depth = 1;
138	my $s = $c1;
139	while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
140		$s .= $1;
141		$depth++, next if $s =~ /\Q$c1\E$/;
142		$depth--;
143		last if $depth == 0;
144	}
145	$self->{lineno} += () = $s =~ /\n/sg;
146	return $s;
147}
148
149sub scan_subst {
150	my $self = shift @_;
151	my @tokens = $self->{parser}->parse(qr/^\)$/);
152	$self->{parser}->next_token(); # closing ")"
153	return @tokens;
154}
155
156sub scan_dollar {
157	my $self = shift @_;
158	my $b = $self->{buff};
159	return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
160	return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
161	return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
162	return $1 if $$b =~ /\G(\w+)/gc; # $var
163	return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
164	return '';
165}
166
167sub swallow_heredocs {
168	my $self = shift @_;
169	my $b = $self->{buff};
170	my $tags = $self->{heretags};
171	while (my $tag = shift @$tags) {
172		my $start = pos($$b);
173		my $indent = $$tag[0] =~ s/^\t// ? '\\s*' : '';
174		$$b =~ /(?:\G|\n)$indent\Q$$tag[0]\E(?:\n|\z)/gc;
175		if (pos($$b) > $start) {
176			my $body = substr($$b, $start, pos($$b) - $start);
177			$self->{parser}->{heredocs}->{$$tag[0]} = {
178				content => substr($body, 0, length($body) - length($&)),
179				start_line => $self->{lineno},
180		        };
181			$self->{lineno} += () = $body =~ /\n/sg;
182			next;
183		}
184		push(@{$self->{parser}->{problems}}, ['HEREDOC', $tag]);
185		$$b =~ /(?:\G|\n).*\z/gc; # consume rest of input
186		my $body = substr($$b, $start, pos($$b) - $start);
187		$self->{lineno} += () = $body =~ /\n/sg;
188		last;
189	}
190}
191
192sub scan_token {
193	my $self = shift @_;
194	my $b = $self->{buff};
195	my $token = '';
196	my ($start, $startln);
197RESTART:
198	$startln = $self->{lineno};
199	$$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
200	$start = pos($$b) || 0;
201	$self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
202	while (1) {
203		# slurp up non-special characters
204		$token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
205		# handle special characters
206		last unless $$b =~ /\G(.)/sgc;
207		my $c = $1;
208		pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token
209		pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
210		$token .= $self->scan_sqstring(), next if $c eq "'";
211		$token .= $self->scan_dqstring(), next if $c eq '"';
212		$token .= $c . $self->scan_dollar(), next if $c eq '$';
213		$self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n";
214		$token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
215		$token = $c, last if $c =~ /^[(){}]$/;
216		if ($c eq '\\') {
217			$token .= '\\', last unless $$b =~ /\G(.)/sgc;
218			$c = $1;
219			$self->{lineno}++, next if $c eq "\n" && length($token); # line splice
220			$self->{lineno}++, goto RESTART if $c eq "\n"; # line splice
221			$token .= '\\' . $c;
222			next;
223		}
224		die("internal error scanning character '$c'\n");
225	}
226	return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef;
227}
228
229# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
230# is a recursive descent parser very roughly modeled after section 2.10 "Shell
231# Grammar" of POSIX chapter 2 "Shell Command Language".
232package ShellParser;
233
234sub new {
235	my ($class, $s) = @_;
236	my $self = bless {
237		buff => [],
238		stop => [],
239		output => [],
240		heredocs => {},
241		insubshell => 0,
242	} => $class;
243	$self->{lexer} = Lexer->new($self, $s);
244	return $self;
245}
246
247sub next_token {
248	my $self = shift @_;
249	return pop(@{$self->{buff}}) if @{$self->{buff}};
250	return $self->{lexer}->scan_token();
251}
252
253sub untoken {
254	my $self = shift @_;
255	push(@{$self->{buff}}, @_);
256}
257
258sub peek {
259	my $self = shift @_;
260	my $token = $self->next_token();
261	return undef unless defined($token);
262	$self->untoken($token);
263	return $token;
264}
265
266sub stop_at {
267	my ($self, $token) = @_;
268	return 1 unless defined($token);
269	my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
270	return defined($stop) && $token->[0] =~ $stop;
271}
272
273sub expect {
274	my ($self, $expect) = @_;
275	my $token = $self->next_token();
276	return $token if defined($token) && $token->[0] eq $expect;
277	push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n");
278	$self->untoken($token) if defined($token);
279	return ();
280}
281
282sub optional_newlines {
283	my $self = shift @_;
284	my @tokens;
285	while (my $token = $self->peek()) {
286		last unless $token->[0] eq "\n";
287		push(@tokens, $self->next_token());
288	}
289	return @tokens;
290}
291
292sub parse_group {
293	my $self = shift @_;
294	return ($self->parse(qr/^}$/),
295		$self->expect('}'));
296}
297
298sub parse_subshell {
299	my $self = shift @_;
300	$self->{insubshell}++;
301	my @tokens = ($self->parse(qr/^\)$/),
302		      $self->expect(')'));
303	$self->{insubshell}--;
304	return @tokens;
305}
306
307sub parse_case_pattern {
308	my $self = shift @_;
309	my @tokens;
310	while (defined(my $token = $self->next_token())) {
311		push(@tokens, $token);
312		last if $token->[0] eq ')';
313	}
314	return @tokens;
315}
316
317sub parse_case {
318	my $self = shift @_;
319	my @tokens;
320	push(@tokens,
321	     $self->next_token(), # subject
322	     $self->optional_newlines(),
323	     $self->expect('in'),
324	     $self->optional_newlines());
325	while (1) {
326		my $token = $self->peek();
327		last unless defined($token) && $token->[0] ne 'esac';
328		push(@tokens,
329		     $self->parse_case_pattern(),
330		     $self->optional_newlines(),
331		     $self->parse(qr/^(?:;;|esac)$/)); # item body
332		$token = $self->peek();
333		last unless defined($token) && $token->[0] ne 'esac';
334		push(@tokens,
335		     $self->expect(';;'),
336		     $self->optional_newlines());
337	}
338	push(@tokens, $self->expect('esac'));
339	return @tokens;
340}
341
342sub parse_for {
343	my $self = shift @_;
344	my @tokens;
345	push(@tokens,
346	     $self->next_token(), # variable
347	     $self->optional_newlines());
348	my $token = $self->peek();
349	if (defined($token) && $token->[0] eq 'in') {
350		push(@tokens,
351		     $self->expect('in'),
352		     $self->optional_newlines());
353	}
354	push(@tokens,
355	     $self->parse(qr/^do$/), # items
356	     $self->expect('do'),
357	     $self->optional_newlines(),
358	     $self->parse_loop_body(),
359	     $self->expect('done'));
360	return @tokens;
361}
362
363sub parse_if {
364	my $self = shift @_;
365	my @tokens;
366	while (1) {
367		push(@tokens,
368		     $self->parse(qr/^then$/), # if/elif condition
369		     $self->expect('then'),
370		     $self->optional_newlines(),
371		     $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
372		my $token = $self->peek();
373		last unless defined($token) && $token->[0] eq 'elif';
374		push(@tokens, $self->expect('elif'));
375	}
376	my $token = $self->peek();
377	if (defined($token) && $token->[0] eq 'else') {
378		push(@tokens,
379		     $self->expect('else'),
380		     $self->optional_newlines(),
381		     $self->parse(qr/^fi$/)); # else body
382	}
383	push(@tokens, $self->expect('fi'));
384	return @tokens;
385}
386
387sub parse_loop_body {
388	my $self = shift @_;
389	return $self->parse(qr/^done$/);
390}
391
392sub parse_loop {
393	my $self = shift @_;
394	return ($self->parse(qr/^do$/), # condition
395		$self->expect('do'),
396		$self->optional_newlines(),
397		$self->parse_loop_body(),
398		$self->expect('done'));
399}
400
401sub parse_func {
402	my $self = shift @_;
403	return ($self->expect('('),
404		$self->expect(')'),
405		$self->optional_newlines(),
406		$self->parse_cmd()); # body
407}
408
409sub parse_bash_array_assignment {
410	my $self = shift @_;
411	my @tokens = $self->expect('(');
412	while (defined(my $token = $self->next_token())) {
413		push(@tokens, $token);
414		last if $token->[0] eq ')';
415	}
416	return @tokens;
417}
418
419my %compound = (
420	'{' => \&parse_group,
421	'(' => \&parse_subshell,
422	'case' => \&parse_case,
423	'for' => \&parse_for,
424	'if' => \&parse_if,
425	'until' => \&parse_loop,
426	'while' => \&parse_loop);
427
428sub parse_cmd {
429	my $self = shift @_;
430	my $cmd = $self->next_token();
431	return () unless defined($cmd);
432	return $cmd if $cmd->[0] eq "\n";
433
434	my $token;
435	my @tokens = $cmd;
436	if ($cmd->[0] eq '!') {
437		push(@tokens, $self->parse_cmd());
438		return @tokens;
439	} elsif (my $f = $compound{$cmd->[0]}) {
440		push(@tokens, $self->$f());
441	} elsif (defined($token = $self->peek()) && $token->[0] eq '(') {
442		if ($cmd->[0] !~ /\w=$/) {
443			push(@tokens, $self->parse_func());
444			return @tokens;
445		}
446		my @array = $self->parse_bash_array_assignment();
447		$tokens[-1]->[0] .= join(' ', map {$_->[0]} @array);
448		$tokens[-1]->[2] = $array[$#array][2] if @array;
449	}
450
451	while (defined(my $token = $self->next_token())) {
452		$self->untoken($token), last if $self->stop_at($token);
453		push(@tokens, $token);
454		last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
455	}
456	push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n";
457	return @tokens;
458}
459
460sub accumulate {
461	my ($self, $tokens, $cmd) = @_;
462	push(@$tokens, @$cmd);
463}
464
465sub parse {
466	my ($self, $stop) = @_;
467	push(@{$self->{stop}}, $stop);
468	goto DONE if $self->stop_at($self->peek());
469	my @tokens;
470	while (my @cmd = $self->parse_cmd()) {
471		$self->accumulate(\@tokens, \@cmd);
472		last if $self->stop_at($self->peek());
473	}
474DONE:
475	pop(@{$self->{stop}});
476	return @tokens;
477}
478
479# TestParser is a subclass of ShellParser which, beyond parsing shell script
480# code, is also imbued with semantic knowledge of test construction, and checks
481# tests for common problems (such as broken &&-chains) which might hide bugs in
482# the tests themselves or in behaviors being exercised by the tests. As such,
483# TestParser is only called upon to parse test bodies, not the top-level
484# scripts in which the tests are defined.
485package TestParser;
486
487use base 'ShellParser';
488
489sub new {
490	my $class = shift @_;
491	my $self = $class->SUPER::new(@_);
492	$self->{problems} = [];
493	return $self;
494}
495
496sub find_non_nl {
497	my $tokens = shift @_;
498	my $n = shift @_;
499	$n = $#$tokens if !defined($n);
500	$n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n";
501	return $n;
502}
503
504sub ends_with {
505	my ($tokens, $needles) = @_;
506	my $n = find_non_nl($tokens);
507	for my $needle (reverse(@$needles)) {
508		return undef if $n < 0;
509		$n = find_non_nl($tokens, $n), next if $needle eq "\n";
510		return undef if $$tokens[$n]->[0] !~ $needle;
511		$n--;
512	}
513	return 1;
514}
515
516sub match_ending {
517	my ($tokens, $endings) = @_;
518	for my $needles (@$endings) {
519		next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
520		return 1 if ends_with($tokens, $needles);
521	}
522	return undef;
523}
524
525sub parse_loop_body {
526	my $self = shift @_;
527	my @tokens = $self->SUPER::parse_loop_body(@_);
528	# did loop signal failure via "|| return" or "|| exit"?
529	return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens;
530	# did loop upstream of a pipe signal failure via "|| echo 'impossible
531	# text'" as the final command in the loop body?
532	return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
533	# flag missing "return/exit" handling explicit failure in loop body
534	my $n = find_non_nl(\@tokens);
535	push(@{$self->{problems}}, [$self->{insubshell} ? 'LOOPEXIT' : 'LOOPRETURN', $tokens[$n]]);
536	return @tokens;
537}
538
539my @safe_endings = (
540	[qr/^(?:&&|\|\||\||&)$/],
541	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
542	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
543	[qr/^(?:exit|return|continue)$/],
544	[qr/^(?:exit|return|continue)$/, qr/^;$/]);
545
546sub accumulate {
547	my ($self, $tokens, $cmd) = @_;
548	my $problems = $self->{problems};
549
550	# no previous command to check for missing "&&"
551	goto DONE unless @$tokens;
552
553	# new command is empty line; can't yet check if previous is missing "&&"
554	goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n";
555
556	# did previous command end with "&&", "|", "|| return" or similar?
557	goto DONE if match_ending($tokens, \@safe_endings);
558
559	# if this command handles "$?" specially, then okay for previous
560	# command to be missing "&&"
561	for my $token (@$cmd) {
562		goto DONE if $token->[0] =~ /\$\?/;
563	}
564
565	# if this command is "false", "return 1", or "exit 1" (which signal
566	# failure explicitly), then okay for all preceding commands to be
567	# missing "&&"
568	if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
569		@$problems = grep {$_->[0] ne 'AMP'} @$problems;
570		goto DONE;
571	}
572
573	# flag missing "&&" at end of previous command
574	my $n = find_non_nl($tokens);
575	push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
576
577DONE:
578	$self->SUPER::accumulate($tokens, $cmd);
579}
580
581# ScriptParser is a subclass of ShellParser which identifies individual test
582# definitions within test scripts, and passes each test body through TestParser
583# to identify possible problems. ShellParser detects test definitions not only
584# at the top-level of test scripts but also within compound commands such as
585# loops and function definitions.
586package ScriptParser;
587
588use base 'ShellParser';
589
590sub new {
591	my $class = shift @_;
592	my $self = $class->SUPER::new(@_);
593	$self->{ntests} = 0;
594	$self->{nerrs} = 0;
595	return $self;
596}
597
598# extract the raw content of a token, which may be a single string or a
599# composition of multiple strings and non-string character runs; for instance,
600# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
601sub unwrap {
602	my $token = (@_ ? shift @_ : $_)->[0];
603	# simple case: 'sqstring' or "dqstring"
604	return $token if $token =~ s/^'([^']*)'$/$1/;
605	return $token if $token =~ s/^"([^"]*)"$/$1/;
606
607	# composite case
608	my ($s, $q, $escaped);
609	while (1) {
610		# slurp up non-special characters
611		$s .= $1 if $token =~ /\G([^\\'"]*)/gc;
612		# handle special characters
613		last unless $token =~ /\G(.)/sgc;
614		my $c = $1;
615		$q = undef, next if defined($q) && $c eq $q;
616		$q = $c, next if !defined($q) && $c =~ /^['"]$/;
617		if ($c eq '\\') {
618			last unless $token =~ /\G(.)/sgc;
619			$c = $1;
620			$s .= '\\' if $c eq "\n"; # preserve line splice
621		}
622		$s .= $c;
623	}
624	return $s
625}
626
627sub format_problem {
628	local $_ = shift;
629	/^AMP$/ && return "missing '&&'";
630	/^LOOPRETURN$/ && return "missing '|| return 1'";
631	/^LOOPEXIT$/ && return "missing '|| exit 1'";
632	/^HEREDOC$/ && return 'unclosed heredoc';
633	die("unrecognized problem type '$_'\n");
634}
635
636sub check_test {
637	my $self = shift @_;
638	my $title = unwrap(shift @_);
639	my $body = shift @_;
640	my $lineno = $body->[3];
641	$body = unwrap($body);
642	if ($body eq '-') {
643		my $herebody = shift @_;
644		$body = $herebody->{content};
645		$lineno = $herebody->{start_line};
646	}
647	$self->{ntests}++;
648	my $parser = TestParser->new(\$body);
649	my @tokens = $parser->parse();
650	my $problems = $parser->{problems};
651	$self->{nerrs} += @$problems;
652	return unless $emit_all || @$problems;
653	my $c = main::fd_colors(1);
654	my ($erropen, $errclose) = -t 1 ? ("$c->{rev}$c->{red}", $c->{reset}) : ('?!', '?!');
655	my $start = 0;
656	my $checked = '';
657	for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
658		my ($label, $token) = @$_;
659		my $pos = $token->[2];
660		my $err = format_problem($label);
661		$checked .= substr($body, $start, $pos - $start);
662		$checked .= ' ' unless $checked =~ /\s$/;
663		$checked .= "${erropen}LINT: $err$errclose";
664		$checked .= ' ' unless $pos >= length($body) ||
665		    substr($body, $pos, 1) =~ /^\s/;
666		$start = $pos;
667	}
668	$checked .= substr($body, $start);
669	$checked =~ s/^/$lineno++ . ' '/mge;
670	$checked =~ s/^\d+ \n//;
671	$checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg;
672	$checked .= "\n" unless $checked =~ /\n$/;
673	push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
674}
675
676sub parse_cmd {
677	my $self = shift @_;
678	my @tokens = $self->SUPER::parse_cmd();
679	return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/;
680	my $n = $#tokens;
681	$n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
682	my $herebody;
683	if ($n >= 2 && $tokens[$n-1]->[0] eq '-' && $tokens[$n]->[0] =~ /^<<-?(.+)$/) {
684		$herebody = $self->{heredocs}->{$1};
685		$n--;
686	}
687	$self->check_test($tokens[1], $tokens[2], $herebody) if $n == 2; # title body
688	$self->check_test($tokens[2], $tokens[3], $herebody) if $n > 2;  # prereq title body
689	return @tokens;
690}
691
692# main contains high-level functionality for processing command-line switches,
693# feeding input test scripts to ScriptParser, and reporting results.
694package main;
695
696my $getnow = sub { return time(); };
697my $interval = sub { return time() - shift; };
698if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
699	$getnow = sub { return [Time::HiRes::gettimeofday()]; };
700	$interval = sub { return Time::HiRes::tv_interval(shift); };
701}
702
703# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
704# outside of get_colors() since under 'ithreads' all threads use %ENV of main
705# thread and ignore %ENV changes in subthreads.
706$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
707
708my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => '');
709my %COLORS = ();
710sub get_colors {
711	return \%COLORS if %COLORS;
712	if (exists($ENV{NO_COLOR})) {
713		%COLORS = @NOCOLORS;
714		return \%COLORS;
715	}
716	if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) {
717		%COLORS = (bold  => "\e[1m",
718			   rev   => "\e[7m",
719			   dim   => "\e[2m",
720			   reset => "\e[0m",
721			   blue  => "\e[34m",
722			   green => "\e[32m",
723			   red   => "\e[31m");
724		return \%COLORS;
725	}
726	if (system("tput sgr0 >/dev/null 2>&1") == 0 &&
727	    system("tput bold >/dev/null 2>&1") == 0 &&
728	    system("tput rev  >/dev/null 2>&1") == 0 &&
729	    system("tput dim  >/dev/null 2>&1") == 0 &&
730	    system("tput setaf 1 >/dev/null 2>&1") == 0) {
731		%COLORS = (bold  => `tput bold`,
732			   rev   => `tput rev`,
733			   dim   => `tput dim`,
734			   reset => `tput sgr0`,
735			   blue  => `tput setaf 4`,
736			   green => `tput setaf 2`,
737			   red   => `tput setaf 1`);
738		return \%COLORS;
739	}
740	%COLORS = @NOCOLORS;
741	return \%COLORS;
742}
743
744my %FD_COLORS = ();
745sub fd_colors {
746	my $fd = shift;
747	return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
748	$FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
749	return $FD_COLORS{$fd};
750}
751
752sub ncores {
753	# Windows
754	if (exists($ENV{NUMBER_OF_PROCESSORS})) {
755		my $ncpu = $ENV{NUMBER_OF_PROCESSORS};
756		return $ncpu > 0 ? $ncpu : 1;
757	}
758	# Linux / MSYS2 / Cygwin / WSL
759	if (open my $fh, '<', '/proc/cpuinfo') {
760		my $cpuinfo = do { local $/; <$fh> };
761		close($fh);
762		if ($cpuinfo =~ /^n?cpus active\s*:\s*(\d+)/m) {
763			return $1 if $1 > 0;
764		}
765		my @matches = ($cpuinfo =~ /^(processor|CPU)[\s\d]*:/mg);
766		return @matches ? scalar(@matches) : 1;
767	}
768	# macOS & BSD
769	if ($^O =~ /(?:^darwin$|bsd)/) {
770		my $ncpu = qx/sysctl -n hw.ncpu/;
771		return $ncpu > 0 ? $ncpu : 1;
772	}
773	return 1;
774}
775
776sub show_stats {
777	my ($start_time, $stats) = @_;
778	my $walltime = $interval->($start_time);
779	my ($usertime) = times();
780	my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
781	my $c = fd_colors(2);
782	print(STDERR $c->{green});
783	for (@$stats) {
784		my ($worker, $nscripts, $ntests, $nerrs) = @$_;
785		print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
786		$total_workers++;
787		$total_scripts += $nscripts;
788		$total_tests += $ntests;
789		$total_errs += $nerrs;
790	}
791	printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
792}
793
794sub check_script {
795	my ($id, $next_script, $emit) = @_;
796	my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
797	while (my $path = $next_script->()) {
798		$nscripts++;
799		my $fh;
800		unless (open($fh, "<:unix:crlf", $path)) {
801			$emit->("?!ERR?! $path: $!\n");
802			next;
803		}
804		my $s = do { local $/; <$fh> };
805		close($fh);
806		my $parser = ScriptParser->new(\$s);
807		1 while $parser->parse_cmd();
808		if (@{$parser->{output}}) {
809			my $c = fd_colors(1);
810			my $s = join('', @{$parser->{output}});
811			$emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
812		}
813		$ntests += $parser->{ntests};
814		$nerrs += $parser->{nerrs};
815	}
816	return [$id, $nscripts, $ntests, $nerrs];
817}
818
819sub exit_code {
820	my $stats = shift @_;
821	for (@$stats) {
822		my ($worker, $nscripts, $ntests, $nerrs) = @$_;
823		return 1 if $nerrs;
824	}
825	return 0;
826}
827
828Getopt::Long::Configure(qw{bundling});
829GetOptions(
830	"emit-all!" => \$emit_all,
831	"jobs|j=i" => \$jobs,
832	"stats|show-stats!" => \$show_stats) or die("option error\n");
833$jobs = ncores() if $jobs < 1;
834
835my $start_time = $getnow->();
836my @stats;
837
838my @scripts;
839push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
840unless (@scripts) {
841	show_stats($start_time, \@stats) if $show_stats;
842	exit;
843}
844$jobs = @scripts if @scripts < $jobs;
845
846unless ($jobs > 1 &&
847	$Config{useithreads} && eval {
848	require threads; threads->import();
849	require Thread::Queue; Thread::Queue->import();
850	1;
851	}) {
852	push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
853	show_stats($start_time, \@stats) if $show_stats;
854	exit(exit_code(\@stats));
855}
856
857my $script_queue = Thread::Queue->new();
858my $output_queue = Thread::Queue->new();
859
860sub next_script { return $script_queue->dequeue(); }
861sub emit { $output_queue->enqueue(@_); }
862
863sub monitor {
864	while (my $s = $output_queue->dequeue()) {
865		print($s);
866	}
867}
868
869my $mon = threads->create({'context' => 'void'}, \&monitor);
870threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
871
872$script_queue->enqueue(@scripts);
873$script_queue->end();
874
875for (threads->list()) {
876	push(@stats, $_->join()) unless $_ == $mon;
877}
878
879$output_queue->end();
880$mon->join();
881
882show_stats($start_time, \@stats) if $show_stats;
883exit(exit_code(\@stats));