Git fork

userdiff: extend Bash pattern to cover more shell function forms

The previous function regex required explicit matching of function
bodies using `{`, `(`, `((`, or `[[`, which caused several issues:

- It failed to capture valid functions where `{` was on the next line
due to line continuation (`\`).
- It did not recognize functions with single command body, such as
`x () echo hello`.

Replacing the function body matching logic with `.*$`, ensures
that everything on the function definition line is captured.

Additionally, the word regex is refined to better recognize shell
syntax, including additional parameter expansion operators and
command-line options.

Signed-off-by: Moumita Dhar <dhar61595@gmail.com>
Acked-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

Moumita Dhar and committed by
Junio C Hamano
ea8a71b4 cb96e169

+128 -8
+4
t/t4018/bash-bashism-style-complete-line-capture
··· 1 + function myfunc # RIGHT 2 + { 3 + echo 'ChangeMe' 4 + }
+4
t/t4018/bash-posix-style-complete-line-capture
··· 1 + func() { # RIGHT 2 + 3 + ChangeMe 4 + }
+3
t/t4018/bash-posix-style-single-command-function
··· 1 + RIGHT() echo "hello" 2 + 3 + ChangeMe
+1
t/t4034-diff-words.sh
··· 320 320 321 321 test_language_driver ada 322 322 test_language_driver bibtex 323 + test_language_driver bash 323 324 test_language_driver cpp 324 325 test_language_driver csharp 325 326 test_language_driver css
+36
t/t4034/bash/expect
··· 1 + <BOLD>diff --git a/pre b/post<RESET> 2 + <BOLD>index 09ac008..60ba6a2 100644<RESET> 3 + <BOLD>--- a/pre<RESET> 4 + <BOLD>+++ b/post<RESET> 5 + <CYAN>@@ -1,31 +1,31 @@<RESET> 6 + <RED>my_var<RESET><GREEN>new_var<RESET>=10 7 + x=<RED>123<RESET><GREEN>456<RESET> 8 + echo <RED>$1<RESET><GREEN>$2<RESET> 9 + echo <RED>$USER<RESET><GREEN>$USERNAME<RESET> 10 + ${<RED>HOME<RESET><GREEN>HOMEDIR<RESET>} 11 + ((a<RED>+<RESET><GREEN>+=<RESET>b)) 12 + ((a<RED>*<RESET><GREEN>*=<RESET>b)) 13 + ((a<RED>/<RESET><GREEN>/=<RESET>b)) 14 + ((a<RED>%<RESET><GREEN>%=<RESET>b)) 15 + ((a<RED>|<RESET><GREEN>|=<RESET>b)) 16 + ((a<RED>^<RESET><GREEN>^=<RESET>b)) 17 + ((a<RED>=<RESET><GREEN>==<RESET>b)) 18 + ((a<RED>!<RESET><GREEN>!=<RESET>b)) 19 + ((a<RED><<RESET><GREEN><=<RESET>b)) 20 + ((a<RED>><RESET><GREEN>>=<RESET>b)) 21 + $((a<RED><<RESET><GREEN><<<RESET>b)) 22 + $((a<RED>><RESET><GREEN>>><RESET>b)) 23 + $((a<RED>&<RESET><GREEN>&&<RESET>b)) 24 + $((a<RED>|<RESET><GREEN>||<RESET>b)) 25 + ${a<RED>:<RESET><GREEN>:-<RESET>b} 26 + ${a<RED>:<RESET><GREEN>:=<RESET>b} 27 + ${a<RED>:<RESET><GREEN>:+<RESET>b} 28 + ${a<RED>:<RESET><GREEN>:?<RESET>b} 29 + ${a<RED>#<RESET><GREEN>##<RESET>*/} 30 + ${a<RED>%<RESET><GREEN>%%<RESET>.*} 31 + ${a<RED>^<RESET><GREEN>^^<RESET>} 32 + ${a<RED>,<RESET><GREEN>,,<RESET>} 33 + ${<GREEN>!<RESET>a} 34 + ${a[<RED>*<RESET><GREEN>@<RESET>]} 35 + ls <RED>-a<RESET><GREEN>-x<RESET> 36 + ls <RED>--all<RESET><GREEN>--color<RESET>
+31
t/t4034/bash/post
··· 1 + new_var=10 2 + x=456 3 + echo $2 4 + echo $USERNAME 5 + ${HOMEDIR} 6 + ((a+=b)) 7 + ((a*=b)) 8 + ((a/=b)) 9 + ((a%=b)) 10 + ((a|=b)) 11 + ((a^=b)) 12 + ((a==b)) 13 + ((a!=b)) 14 + ((a<=b)) 15 + ((a>=b)) 16 + $((a<<b)) 17 + $((a>>b)) 18 + $((a&&b)) 19 + $((a||b)) 20 + ${a:-b} 21 + ${a:=b} 22 + ${a:+b} 23 + ${a:?b} 24 + ${a##*/} 25 + ${a%%.*} 26 + ${a^^} 27 + ${a,,} 28 + ${!a} 29 + ${a[@]} 30 + ls -x 31 + ls --color
+31
t/t4034/bash/pre
··· 1 + my_var=10 2 + x=123 3 + echo $1 4 + echo $USER 5 + ${HOME} 6 + ((a+b)) 7 + ((a*b)) 8 + ((a/b)) 9 + ((a%b)) 10 + ((a|b)) 11 + ((a^b)) 12 + ((a=b)) 13 + ((a!b)) 14 + ((a<b)) 15 + ((a>b)) 16 + $((a<b)) 17 + $((a>b)) 18 + $((a&b)) 19 + $((a|b)) 20 + ${a:b} 21 + ${a:b} 22 + ${a:b} 23 + ${a:b} 24 + ${a#*/} 25 + ${a%.*} 26 + ${a^} 27 + ${a,} 28 + ${a} 29 + ${a[*]} 30 + ls -a 31 + ls --all
+18 -8
userdiff.c
··· 59 59 "(" 60 60 "(" 61 61 /* POSIX identifier with mandatory parentheses */ 62 - "[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\\([ \t]*\\))" 62 + "([a-zA-Z_][a-zA-Z0-9_]*[ \t]*\\([ \t]*\\))" 63 63 "|" 64 64 /* Bashism identifier with optional parentheses */ 65 - "(function[ \t]+[a-zA-Z_][a-zA-Z0-9_]*(([ \t]*\\([ \t]*\\))|([ \t]+))" 65 + "(function[ \t]+[a-zA-Z_][a-zA-Z0-9_]*(([ \t]*\\([ \t]*\\))|([ \t]+)))" 66 66 ")" 67 - /* Optional whitespace */ 68 - "[ \t]*" 69 - /* Compound command starting with `{`, `(`, `((` or `[[` */ 70 - "(\\{|\\(\\(?|\\[\\[)" 67 + /* Everything after the function header is captured */ 68 + ".*$" 71 69 /* End of captured text */ 72 70 ")", 73 71 /* -- */ 74 - /* Characters not in the default $IFS value */ 75 - "[^ \t]+"), 72 + /* Identifiers: variable and function names */ 73 + "[a-zA-Z_][a-zA-Z0-9_]*" 74 + /* Shell variables: $VAR, ${VAR} */ 75 + "|\\$[a-zA-Z0-9_]+|\\$\\{" 76 + /*Command list separators and redirection operators */ 77 + "|\\|\\||&&|<<|>>" 78 + /* Operators ending in '=' (comparison + compound assignment) */ 79 + "|==|!=|<=|>=|[-+*/%&|^]=" 80 + /* Additional parameter expansion operators */ 81 + "|:=|:-|:\\+|:\\?|##|%%|\\^\\^|,," 82 + /* Command-line options (to avoid splitting -option) */ 83 + "|[-a-zA-Z0-9_]+" 84 + /* Brackets and grouping symbols */ 85 + "|\\(|\\)|\\{|\\}|\\[|\\]"), 76 86 PATTERNS("bibtex", 77 87 "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", 78 88 /* -- */