Weighs the soul of incoming HTTP requests to stop AI crawlers
at main 779 lines 21 kB view raw
1# Repeated letters 2#\b([a-z])\g{-1}{2,}\b 3 4# marker to ignore all code on line 5^.*/\* #no-spell-check-line \*/.*$ 6# marker to ignore all code on line 7^.*\bno-spell-check(?:-line|)(?:\s.*|)$ 8 9# https://cspell.org/configuration/document-settings/ 10# cspell inline 11^.*\b[Cc][Ss][Pp][Ee][Ll]{2}:\s*[Dd][Ii][Ss][Aa][Bb][Ll][Ee]-[Ll][Ii][Nn][Ee]\b 12 13# copyright 14Copyright (?:\([Cc]\)|)(?:[-\d, ]|and)+(?: [A-Z][a-z]+ [A-Z][a-z]+,?)+ 15 16# patch hunk comments 17^@@ -\d+(?:,\d+|) \+\d+(?:,\d+|) @@ .* 18# git index header 19index (?:[0-9a-z]{7,40},|)[0-9a-z]{7,40}\.\.[0-9a-z]{7,40} 20 21# file permissions 22['"`\s][-bcdLlpsw](?:[-r][-w][-Ssx]){2}[-r][-w][-SsTtx]\+?['"`\s] 23 24# css fonts 25\bfont(?:-family|):[^;}]+ 26 27# css url wrappings 28\burl\([^)]+\) 29 30# cid urls 31(['"])cid:.*?\g{-1} 32 33# data url in parens 34\(data:(?:[^) ][^)]*?|)(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})[^)]*\) 35# data url in quotes 36([`'"])data:(?:[^ `'"].*?|)(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,}).*\g{-1} 37# data url 38\bdata:[-a-zA-Z=;:/0-9+]*,\S* 39 40# https/http/file urls 41(?:\b(?:https?|ftp|file)://)[-A-Za-z0-9+&@#/*%?=~_|!:,.;]+[-A-Za-z0-9+&@#/*%=~_|] 42 43# mailto urls 44mailto:[-a-zA-Z=;:/?%&0-9+@._]{3,} 45 46# magnet urls 47magnet:[?=:\w]+ 48 49# magnet urls 50"magnet:[^"]+" 51 52# obs: 53"obs:[^"]*" 54 55# The `\b` here means a break, it's the fancy way to handle urls, but it makes things harder to read 56# In this examples content, I'm using a number of different ways to match things to show various approaches 57# asciinema 58\basciinema\.org/a/[0-9a-zA-Z]+ 59 60# asciinema v2 61^\[\d+\.\d+, "[io]", ".*"\]$ 62 63# apple 64\bdeveloper\.apple\.com/[-\w?=/]+ 65# Apple music 66\bembed\.music\.apple\.com/fr/playlist/usr-share/[-\w.]+ 67 68# appveyor api 69\bci\.appveyor\.com/api/projects/status/[0-9a-z]+ 70# appveyor project 71\bci\.appveyor\.com/project/(?:[^/\s"]*/){2}builds?/\d+/job/[0-9a-z]+ 72 73# Amazon 74 75# Amazon 76\bamazon\.com/[-\w]+/(?:dp/[0-9A-Z]+|) 77# AWS ARN 78arn:aws:[-/:\w]+ 79# AWS S3 80\b\w*\.s3[^.]*\.amazonaws\.com/[-\w/&#%_?:=]* 81# AWS execute-api 82\b[0-9a-z]{10}\.execute-api\.[-0-9a-z]+\.amazonaws\.com\b 83# AWS ELB 84\b\w+\.[-0-9a-z]+\.elb\.amazonaws\.com\b 85# AWS SNS 86\bsns\.[-0-9a-z]+.amazonaws\.com/[-\w/&#%_?:=]* 87# AWS VPC 88vpc-\w+ 89 90# While you could try to match `http://` and `https://` by using `s?` in `https?://`, sometimes there 91# YouTube url 92\b(?:(?:www\.|)youtube\.com|youtu.be)/(?:channel/|embed/|user/|playlist\?list=|watch\?v=|v/|)[-a-zA-Z0-9?&=_%]* 93# YouTube music 94\bmusic\.youtube\.com/youtubei/v1/browse(?:[?&]\w+=[-a-zA-Z0-9?&=_]*) 95# YouTube tag 96<\s*youtube\s+id=['"][-a-zA-Z0-9?_]*['"] 97# YouTube image 98\bimg\.youtube\.com/vi/[-a-zA-Z0-9?&=_]* 99# Google Accounts 100\baccounts.google.com/[-_/?=.:;+%&0-9a-zA-Z]* 101# Google Analytics 102\bgoogle-analytics\.com/collect.[-0-9a-zA-Z?%=&_.~]* 103# Google APIs 104\bgoogleapis\.(?:com|dev)/[a-z]+/(?:v\d+/|)[a-z]+/[-@:./?=\w+|&]+ 105# Google Artifact Registry 106\.pkg\.dev(?:/[-\w]+)+(?::[-\w]+|) 107# Google Storage 108\b[-a-zA-Z0-9.]*\bstorage\d*\.googleapis\.com(?:/\S*|) 109# Google Calendar 110\bcalendar\.google\.com/calendar(?:/u/\d+|)/embed\?src=[@./?=\w&%]+ 111\w+\@group\.calendar\.google\.com\b 112# Google DataStudio 113\bdatastudio\.google\.com/(?:(?:c/|)u/\d+/|)(?:embed/|)(?:open|reporting|datasources|s)/[-0-9a-zA-Z]+(?:/page/[-0-9a-zA-Z]+|) 114# The leading `/` here is as opposed to the `\b` above 115# ... a short way to match `https://` or `http://` since most urls have one of those prefixes 116# Google Docs 117/docs\.google\.com/[a-z]+/(?:ccc\?key=\w+|(?:u/\d+|d/(?:e/|)[0-9a-zA-Z_-]+/)?(?:edit\?[-\w=#.]*|/\?[\w=&]*|)) 118# Google Drive 119\bdrive\.google\.com/(?:file/d/|open)[-0-9a-zA-Z_?=]* 120# Google Groups 121\bgroups\.google\.com(?:/[a-z]+/(?:#!|)[^/\s"]+)* 122# Google Maps 123\bmaps\.google\.com/maps\?[\w&;=]* 124# Google themes 125themes\.googleusercontent\.com/static/fonts/[^/\s"]+/v\d+/[^.]+. 126# Google CDN 127\bclients2\.google(?:usercontent|)\.com[-0-9a-zA-Z/.]* 128# Goo.gl 129/goo\.gl/[a-zA-Z0-9]+ 130# Google Chrome Store 131\bchrome\.google\.com/webstore/detail/[-\w]*(?:/\w*|) 132# Google Books 133\bgoogle\.(?:\w{2,4})/books(?:/\w+)*\?[-\w\d=&#.]* 134# Google Fonts 135\bfonts\.(?:googleapis|gstatic)\.com/[-/?=:;+&0-9a-zA-Z]* 136# Google Forms 137\bforms\.gle/\w+ 138# Google Scholar 139\bscholar\.google\.com/citations\?user=[A-Za-z0-9_]+ 140# Google Colab Research Drive 141\bcolab\.research\.google\.com/drive/[-0-9a-zA-Z_?=]* 142# Google Cloud regions 143(?:us|(?:north|south)america|europe|asia|australia|me|africa)-(?:north|south|east|west|central){1,2}\d+ 144 145# GitHub SHAs (api) 146\bapi.github\.com/repos(?:/[^/\s"]+){3}/[0-9a-f]+\b 147# GitHub SHAs (markdown) 148(?:\[`?[0-9a-f]+`?\]\(https:/|)/(?:www\.|)github\.com(?:/[^/\s"]+){2,}(?:/[^/\s")]+)(?:[0-9a-f]+(?:[-0-9a-zA-Z/#.]*|)\b|) 149# GitHub SHAs 150\bgithub\.com(?:/[^/\s"]+){2}[@#][0-9a-f]+\b 151# GitHub SHA refs 152\[([0-9a-f]+)\]\(https://(?:www\.|)github.com/[-\w]+/[-\w]+/commit/\g{-1}[0-9a-f]* 153# GitHub wiki 154\bgithub\.com/(?:[^/]+/){2}wiki/(?:(?:[^/]+/|)_history|[^/]+(?:/_compare|)/[0-9a-f.]{40,})\b 155# githubusercontent 156/[-a-z0-9]+\.githubusercontent\.com/[-a-zA-Z0-9?&=_\/.]* 157# githubassets 158\bgithubassets.com/[0-9a-f]+(?:[-/\w.]+) 159# gist github 160\bgist\.github\.com/[^/\s"]+/[0-9a-f]+ 161# git.io 162\bgit\.io/[0-9a-zA-Z]+ 163# GitHub JSON 164"node_id": "[-a-zA-Z=;:/0-9+_]*" 165# Contributor 166\[[^\]]+\]\(https://github\.com/[^/\s"]+/?\) 167# GHSA 168GHSA(?:-[0-9a-z]{4}){3} 169 170# GitHub actions 171\buses:\s+[-\w.]+/[-\w./]+@[-\w.]+ 172 173# GitLab commit 174\bgitlab\.[^/\s"]*/\S+/\S+/commit/[0-9a-f]{7,16}#[0-9a-f]{40}\b 175# GitLab merge requests 176\bgitlab\.[^/\s"]*/\S+/\S+/-/merge_requests/\d+/diffs#[0-9a-f]{40}\b 177# GitLab uploads 178\bgitlab\.[^/\s"]*/uploads/[-a-zA-Z=;:/0-9+]* 179# GitLab commits 180\bgitlab\.[^/\s"]*/(?:[^/\s"]+/){2}commits?/[0-9a-f]+\b 181 182# #includes 183^\s*#include\s*(?:<.*?>|".*?") 184 185# #pragma lib 186^\s*#pragma comment\(lib, ".*?"\) 187 188# binance 189accounts\.binance\.com/[a-z/]*oauth/authorize\?[-0-9a-zA-Z&%]* 190 191# bitbucket diff 192\bapi\.bitbucket\.org/\d+\.\d+/repositories/(?:[^/\s"]+/){2}diff(?:stat|)(?:/[^/\s"]+){2}:[0-9a-f]+ 193# bitbucket repositories commits 194\bapi\.bitbucket\.org/\d+\.\d+/repositories/(?:[^/\s"]+/){2}commits?/[0-9a-f]+ 195# bitbucket commits 196\bbitbucket\.org/(?:[^/\s"]+/){2}commits?/[0-9a-f]+ 197 198# bit.ly 199\bbit\.ly/\w+ 200 201# bitrise 202\bapp\.bitrise\.io/app/[0-9a-f]*/[\w.?=&]* 203 204# bootstrapcdn.com 205\bbootstrapcdn\.com/[-./\w]+ 206 207# cdn.cloudflare.com 208\bcdnjs\.cloudflare\.com/[./\w]+ 209 210# circleci 211\bcircleci\.com/gh(?:/[^/\s"]+){1,5}.[a-z]+\?[-0-9a-zA-Z=&]+ 212 213# gitter 214\bgitter\.im(?:/[^/\s"]+){2}\?at=[0-9a-f]+ 215 216# gravatar 217\bgravatar\.com/avatar/[0-9a-f]+ 218 219# ibm 220[a-z.]*ibm\.com/[-_#=:%!?~.\\/\d\w]* 221 222# imgur 223\bimgur\.com/[^.]+ 224 225# Internet Archive 226\barchive\.org/web/\d+/(?:[-\w.?,'/\\+&%$#_:]*) 227 228# discord 229/discord(?:app\.com|\.gg)/(?:invite/)?[a-zA-Z0-9]{7,} 230 231# Disqus 232\bdisqus\.com/[-\w/%.()!?&=_]* 233 234# medium link 235\blink\.medium\.com/[a-zA-Z0-9]+ 236# medium 237\bmedium\.com/@?[^/\s"]+/[-\w]+ 238 239# microsoft 240\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]* 241# powerbi 242\bapp\.powerbi\.com/reportEmbed/[^"' ]* 243# vs devops 244\bvisualstudio.com(?::443|)/[-\w/?=%&.]* 245# microsoft store 246\bmicrosoft\.com/store/apps/\w+ 247 248# mvnrepository.com 249\bmvnrepository\.com/[-0-9a-z./]+ 250 251# now.sh 252/[0-9a-z-.]+\.now\.sh\b 253 254# oracle 255\bdocs\.oracle\.com/[-0-9a-zA-Z./_?#&=]* 256 257# chromatic.com 258/\S+.chromatic.com\S*[")] 259 260# codacy 261\bapi\.codacy\.com/project/badge/Grade/[0-9a-f]+ 262 263# compai 264\bcompai\.pub/v1/png/[0-9a-f]+ 265 266# mailgun api 267\.api\.mailgun\.net/v3/domains/[0-9a-z]+\.mailgun.org/messages/[0-9a-zA-Z=@]* 268# mailgun 269\b[0-9a-z]+.mailgun.org 270 271# /message-id/ 272/message-id/[-\w@./%]+ 273 274# Reddit 275\breddit\.com/r/[/\w_]* 276 277# requestb.in 278\brequestb\.in/[0-9a-z]+ 279 280# sched 281\b[a-z0-9]+\.sched\.com\b 282 283# Slack url 284slack://[a-zA-Z0-9?&=]+ 285# Slack 286\bslack\.com/[-0-9a-zA-Z/_~?&=.]* 287# Slack edge 288\bslack-edge\.com/[-a-zA-Z0-9?&=%./]+ 289# Slack images 290\bslack-imgs\.com/[-a-zA-Z0-9?&=%.]+ 291 292# shields.io 293\bshields\.io/[-\w/%?=&.:+;,]* 294 295# stackexchange -- https://stackexchange.com/feeds/sites 296\b(?:askubuntu|serverfault|stack(?:exchange|overflow)|superuser).com/(?:questions/\w+/[-\w]+|a/) 297 298# Sentry 299[0-9a-f]{32}\@o\d+\.ingest\.sentry\.io\b 300 301# Twitter markdown 302\[@[^[/\]:]*?\]\(https://twitter.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|)\) 303# Twitter hashtag 304\btwitter\.com/hashtag/[\w?_=&]* 305# Twitter status 306\btwitter\.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|) 307# Twitter profile images 308\btwimg\.com/profile_images/[_\w./]* 309# Twitter media 310\btwimg\.com/media/[-_\w./?=]* 311# Twitter link shortened 312\bt\.co/\w+ 313 314# facebook 315\bfburl\.com/[0-9a-z_]+ 316# facebook CDN 317\bfbcdn\.net/[\w/.,]* 318# facebook watch 319\bfb\.watch/[0-9A-Za-z]+ 320 321# dropbox 322\bdropbox\.com/sh?/[^/\s"]+/[-0-9A-Za-z_.%?=&;]+ 323 324# ipfs protocol 325ipfs://[0-9a-zA-Z]{3,} 326# ipfs url 327/ipfs/[0-9a-zA-Z]{3,} 328 329# w3 330\bw3\.org/[-0-9a-zA-Z/#.]+ 331 332# loom 333\bloom\.com/embed/[0-9a-f]+ 334 335# regex101 336\bregex101\.com/r/[^/\s"]+/\d+ 337 338# figma 339\bfigma\.com/file(?:/[0-9a-zA-Z]+/)+ 340 341# freecodecamp.org 342\bfreecodecamp\.org/[-\w/.]+ 343 344# image.tmdb.org 345\bimage\.tmdb\.org/[/\w.]+ 346 347# mermaid 348\bmermaid\.ink/img/[-\w]+|\bmermaid-js\.github\.io/mermaid-live-editor/#/edit/[-\w]+ 349 350# Wikipedia 351\ben\.wikipedia\.org/wiki/[-\w%.#]+ 352 353# gitweb 354[^"\s]+/gitweb/\S+;h=[0-9a-f]+ 355 356# HyperKitty lists 357/archives/list/[^@/]+@[^/\s"]*/message/[^/\s"]*/ 358 359# lists 360/thread\.html/[^"\s]+ 361 362# list-management 363\blist-manage\.com/subscribe(?:[?&](?:u|id)=[0-9a-f]+)+ 364 365# kubectl.kubernetes.io/last-applied-configuration 366"kubectl.kubernetes.io/last-applied-configuration": ".*" 367 368# pgp 369\bgnupg\.net/pks/lookup[?&=0-9a-zA-Z]* 370 371# Spotify 372\bopen\.spotify\.com/embed/playlist/\w+ 373 374# Mastodon 375\bmastodon\.[-a-z.]*/(?:media/|@)[?&=0-9a-zA-Z_]* 376 377# scastie 378\bscastie\.scala-lang\.org/[^/]+/\w+ 379 380# images.unsplash.com 381\bimages\.unsplash\.com/(?:(?:flagged|reserve)/|)[-\w./%?=%&.;]+ 382 383# pastebin 384\bpastebin\.com/[\w/]+ 385 386# heroku 387\b\w+\.heroku\.com/source/archive/\w+ 388 389# quip 390\b\w+\.quip\.com/\w+(?:(?:#|/issues/)\w+)? 391 392# badgen.net 393\bbadgen\.net/badge/[^")\]'\s]+ 394 395# statuspage.io 396\w+\.statuspage\.io\b 397 398# media.giphy.com 399\bmedia\.giphy\.com/media/[^/]+/[\w.?&=]+ 400 401# tinyurl 402\btinyurl\.com/\w+ 403 404# codepen 405\bcodepen\.io/[\w/]+ 406 407# registry.npmjs.org 408\bregistry\.npmjs\.org/(?:@[^/"']+/|)[^/"']+/-/[-\w@.]+ 409 410# getopts 411\bgetopts\s+(?:"[^"]+"|'[^']+') 412 413# ANSI color codes 414(?:\\(?:u00|x)1[Bb]|\\03[1-7]|\x1b|\\u\{1[Bb]\})\[\d+(?:;\d+)*m 415 416# URL escaped characters 417%[0-9A-F][A-F](?=[A-Za-z]) 418# lower URL escaped characters 419%[0-9a-f][a-f](?=[a-z]{2,}) 420# IPv6 421\b(?:[0-9a-fA-F]{0,4}:){3,7}[0-9a-fA-F]{0,4}\b 422# c99 hex digits (not the full format, just one I've seen) 4230x[0-9a-fA-F](?:\.[0-9a-fA-F]*|)[pP] 424# Punycode 425\bxn--[-0-9a-z]+ 426# sha 427sha\d+:[0-9a-f]*?[a-f]{3,}[0-9a-f]* 428# sha-... -- uses a fancy capture 429(\\?['"]|&quot;)[0-9a-f]{40,}\g{-1} 430# hex runs 431\b[0-9a-fA-F]{16,}\b 432# hex in url queries 433=[0-9a-fA-F]*?(?:[A-F]{3,}|[a-f]{3,})[0-9a-fA-F]*?& 434# ssh 435(?:ssh-\S+|-nistp256) [-a-zA-Z=;:/0-9+]{12,} 436 437# PGP 438\b(?:[0-9A-F]{4} ){9}[0-9A-F]{4}\b 439# GPG keys 440\b(?:[0-9A-F]{4} ){5}(?: [0-9A-F]{4}){5}\b 441# Well known gpg keys 442.well-known/openpgpkey/[\w./]+ 443 444# pki 445-----BEGIN.*-----END 446 447# pki (base64) 448LS0tLS1CRUdJT.* 449 450# C# includes 451^\s*using [^;]+; 452 453# uuid: 454\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b 455# hex digits including css/html color classes: 456(?:[\\0][xX]|\\u|[uU]\+|#x?|%23|&H)[0-9_a-fA-FgGrR]*?[a-fA-FgGrR]{2,}[0-9_a-fA-FgGrR]*(?:[uUlL]{0,3}|[iu]\d+)\b 457 458# integrity 459integrity=(['"])(?:\s*sha\d+-[-a-zA-Z=;:/0-9+]{40,})+\g{-1} 460 461# https://www.gnu.org/software/groff/manual/groff.html 462# man troff content 463\\f[BCIPR] 464# '/" 465\\\([ad]q 466 467# .desktop mime types 468^MimeTypes?=.*$ 469# .desktop localized entries 470^[A-Z][a-z]+\[[a-z]+\]=.*$ 471# Localized .desktop content 472Name\[[^\]]+\]=.* 473 474# IServiceProvider / isAThing 475(?:(?:\b|_|(?<=[a-z]))I|(?:\b|_)(?:nsI|isA))(?=(?:[A-Z][a-z]{2,})+(?:[A-Z\d]|\b)) 476 477# crypt 478(['"])\$2[ayb]\$.{56}\g{-1} 479 480# apache/old crypt 481(['"]|)\$+(?:apr|)1\$+.{8}\$+.{22}\g{-1} 482 483# sha1 hash 484\{SHA\}[-a-zA-Z=;:/0-9+]{3,} 485 486# machine learning (?) 487\b(?i)ml(?=[a-z]{2,}) 488 489# python 490#\b(?i)py(?!gments|gmy|lon|ramid|ro|th)(?=[a-z]{2,}) 491 492# scrypt / argon 493\$(?:scrypt|argon\d+[di]*)\$\S+ 494 495# go.sum 496\bh1:\S+ 497 498# imports 499^import\s+(?:(?:static|type)\s+|)(?:[\w.]|\{\s*\w*?(?:,\s*(?:\w*|\*))+\s*\})+ 500 501# scala modules 502("[^"]+"\s*%%?\s*){2,3}"[^"]+" 503 504# container images 505image: [-\w./:@]+ 506 507# Docker images 508^\s*(?i)FROM\s+\S+:\S+(?:\s+AS\s+\S+|) 509 510# `docker images` REPOSITORY TAG IMAGE ID CREATED SIZE 511\s*\S+/\S+\s+\S+\s+[0-9a-f]{8,}\s+\d+\s+(?:hour|day|week)s ago\s+[\d.]+[KMGT]B 512 513# Intel intrinsics 514_mm_(?!dd)\w+ 515 516# Input to GitHub JSON 517content: (['"])[-a-zA-Z=;:/0-9+]*=\g{-1} 518 519# This does not cover multiline strings, if your repository has them, 520# you'll want to remove the `(?=.*?")` suffix. 521# The `(?=.*?")` suffix should limit the false positives rate 522# printf 523%(?:(?:(?:hh?|ll?|[jzt])?[diuoxn]|l?[cs]|L?[fega]|p)(?=[a-z]{2,})|(?:X|L?[FEGA])(?=[a-zA-Z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%)\b)(?=.*?['"]) 524 525# Alternative printf 526# %s 527%(?:s(?=[a-z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%[^s])\b)(?=.*?['"]) 528 529# Python string prefix / binary prefix 530# Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings 531(?<!['"])\b(?:B|BR|Br|F|FR|Fr|R|RB|RF|Rb|Rf|U|UR|Ur|b|bR|br|f|fR|fr|r|rB|rF|rb|rf|u|uR|ur)['"](?=[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,}) 532 533# Regular expressions for (P|p)assword 534\([A-Z]\|[a-z]\)[a-z]+ 535 536# JavaScript regular expressions 537# javascript test regex 538/.{3,}/[gim]*\.test\( 539# javascript match regex 540\.match\(/[^/\s"]{3,}/[gim]*\s* 541# javascript match regex 542\.match\(/\\[b].{3,}?/[gim]*\s*\)(?:;|$) 543# javascript regex 544^\s*/\\[b].{3,}?/[gim]*\s*(?:\)(?:;|$)|,$) 545# javascript replace regex 546\.replace\(/[^/\s"]{3,}/[gim]*\s*, 547# assign regex 548= /[^*].*?(?:[a-z]{3,}|[A-Z]{3,}|[A-Z][a-z]{2,}).*/[gim]*(?=\W|$) 549# perl regex test 550[!=]~ (?:/.*/|m\{.*?\}|m<.*?>|m([|!/@#,;']).*?\g{-1}) 551 552# perl qr regex 553(?<!\$)\bqr(?:\{.*?\}|<.*?>|\(.*?\)|([|!/@#,;']).*?\g{-1}) 554 555# perl run 556perl(?:\s+-[a-zA-Z]\w*)+ 557 558# C network byte conversions 559(?:\d|\bh)to(?!ken)(?=[a-z])|to(?=[adhiklpun]\() 560 561# Go regular expressions 562regexp?\.MustCompile\((?:`[^`]*`|".*"|'.*')\) 563 564# regex choice 565\(\?:[^)]+\|[^)]+\) 566 567# proto 568^\s*(\w+)\s\g{-1} = 569 570# sed regular expressions 571sed 's/(?:[^/]*?[a-zA-Z]{3,}[^/]*?/){2} 572 573# node packages 574(["'])@[^/'" ]+/[^/'" ]+\g{-1} 575 576# go install 577go install(?:\s+[a-z]+\.[-@\w/.]+)+ 578 579# pom.xml 580<(?:group|artifact)Id>.*?< 581 582# jetbrains schema https://youtrack.jetbrains.com/issue/RSRP-489571 583urn:shemas-jetbrains-com 584 585# Debian changelog severity 586[-\w]+ \(.*\) (?:\w+|baseline|unstable|experimental); urgency=(?:low|medium|high|emergency|critical)\b 587 588# kubernetes pod status lists 589# https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase 590\w+(?:-\w+)+\s+\d+/\d+\s+(?:Running|Pending|Succeeded|Failed|Unknown)\s+ 591 592# kubectl - pods in CrashLoopBackOff 593\w+-[0-9a-f]+-\w+\s+\d+/\d+\s+CrashLoopBackOff\s+ 594 595# kubernetes applications 596\.apps/[-\w]+ 597 598# kubernetes object suffix 599-[0-9a-f]{10}-\w{5}\s 600 601# kubernetes crd patterns 602^\s*pattern: .*$ 603 604# posthog secrets 605([`'"])phc_[^"',]+\g{-1} 606 607# xcode 608 609# xcodeproject scenes 610(?:Controller|destination|(?:first|second)Item|ID|id)="\w{3}-\w{2}-\w{3}" 611 612# xcode api botches 613customObjectInstantitationMethod 614 615# msvc api botches 616PrependWithABINamepsace 617 618# configure flags 619.* \| --\w{2,}.*?(?=\w+\s\w+) 620 621# font awesome classes 622\.fa-[-a-z0-9]+ 623 624# bearer auth 625(['"])[Bb]ear[e][r] .{3,}?\g{-1} 626 627# bearer auth 628\b[Bb]ear[e][r]:? [-a-zA-Z=;:/0-9+.]{3,} 629 630# basic auth 631(['"])[Bb]asic [-a-zA-Z=;:/0-9+]{3,}\g{-1} 632 633# basic auth 634: [Bb]asic [-a-zA-Z=;:/0-9+.]{3,} 635 636# base64 encoded content 637([`'"])[-a-zA-Z=;:/0-9+]{3,}=\g{-1} 638# base64 encoded content in xml/sgml 639>[-a-zA-Z=;:/0-9+]{3,}=</ 640# base64 encoded content, possibly wrapped in mime 641#(?:^|[\s=;:?])[-a-zA-Z=;:/0-9+]{50,}(?:[\s=;:?]|$) 642# base64 encoded json 643\beyJ[-a-zA-Z=;:/0-9+]+ 644# base64 encoded pkcs 645\bMII[-a-zA-Z=;:/0-9+]+ 646 647# uuencoded 648#[!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_]{40,} 649 650# DNS rr data 651(?:\d+\s+){3}(?:[-+/=.\w]{2,}\s*){1,2} 652 653# encoded-word 654=\?[-a-zA-Z0-9"*%]+\?[BQ]\?[^?]{0,75}\?= 655 656# numerator 657\bnumer\b(?=.*denom) 658 659# Time Zones 660\b(?:Africa|Atlantic|America|Antarctica|Arctic|Asia|Australia|Europe|Indian|Pacific)(?:/[-\w]+)+ 661 662# linux kernel info 663^(?:bugs|flags|Features)\s+:.* 664 665# systemd mode 666systemd.*?running in system mode \([-+].*\)$ 667 668# Lorem 669# Update Lorem based on your content (requires `ge` and `w` from https://github.com/jsoref/spelling; and `review` from https://github.com/check-spelling/check-spelling/wiki/Looking-for-items-locally ) 670# grep '^[^#].*lorem' .github/actions/spelling/patterns.txt|perl -pne 's/.*i..\?://;s/\).*//' |tr '|' "\n"|sort -f |xargs -n1 ge|perl -pne 's/^[^:]*://'|sort -u|w|sed -e 's/ .*//'|w|review - 671# Warning, while `(?i)` is very neat and fancy, if you have some binary files that aren't proper unicode, you might run into: 672# ... Operation "substitution (s///)" returns its argument for non-Unicode code point 0x1C19AE (the code point will vary). 673# ... You could manually change `(?i)X...` to use `[Xx]...` 674# ... or you could add the files to your `excludes` file (a version after 0.0.19 should identify the file path) 675(?:(?:\w|\s|[,.])*\b(?i)(?:amet|consectetur|cursus|dolor|eros|ipsum|lacus|libero|ligula|lorem|magna|neque|nulla|suscipit|tempus)\b(?:\w|\s|[,.])*) 676 677# Non-English 678# Even repositories expecting pure English content can unintentionally have Non-English content... People will occasionally mistakenly enter [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) which are essentially typos, and using this pattern will mean check-spelling will not complain about them. 679# 680# If the content to be checked should be written in English and the only Non-English items will be people's names, then you can consider adding this. 681# 682# Alternatively, if you're using check-spelling v0.0.25+, and you would like to _check_ the Non-English content for spelling errors, you can. For information on how to do so, see: 683# https://docs.check-spelling.dev/Feature:-Configurable-word-characters.html#unicode 684[a-zA-Z]*[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3}[a-zA-ZÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]*|[a-zA-Z]{3,}[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]|[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3,} 685 686# highlighted letters 687\[[A-Z]\][a-z]+ 688 689# French 690# This corpus only had capital letters, but you probably want lowercase ones as well. 691\b[LN]'+[a-z]{2,}\b 692 693# latex (check-spelling >= 0.0.22) 694\\\w{2,}\{ 695 696# American Mathematical Society (AMS) / Doxygen 697TeX/AMS 698 699# File extensions 700\*\.[+\w]+, 701 702# eslint 703"varsIgnorePattern": ".+" 704 705# nolint 706nolint:\s*[\w,]+ 707 708# Windows short paths 709[/\\][^/\\]{5,6}~\d{1,2}(?=[/\\]) 710 711# Windows Resources with accelerators 712\b[A-Z]&[a-z]+\b(?!;) 713 714# signed off by 715(?i)Signed-off-by: .* 716 717# cygwin paths 718/cygdrive/[a-zA-Z]/(?:Program Files(?: \(.*?\)| ?)(?:/[-+.~\\/()\w ]+)*|[-+.~\\/()\w])+ 719 720# in check-spelling@v0.0.22+, printf markers aren't automatically consumed 721# printf markers 722(?<!\\)\\[nrt](?=[a-z]{2,}) 723# alternate printf markers if you run into latex and friends 724(?<!\\)\\[nrt](?=[a-z]{2,})(?=.*['"`]) 725 726# Markdown anchor links 727\(#\S*?[a-zA-Z]\S*?\) 728 729# apache 730a2(?:en|dis) 731 732# weak e-tag 733W/"[^"]+" 734 735# authors/credits 736^\*(?: [A-Z](?:\w+|\.)){2,} (?=\[|$) 737 738# the negative lookahead here is to allow catching 'templatesz' as a misspelling 739# but to otherwise recognize a Windows path with \templates\foo.template or similar: 740\\(?:necessary|r(?:elease|eport|esolve[dr]?|esult)|t(?:arget|emplates?))(?![a-z]) 741# ignore long runs of a single character: 742\b([A-Za-z])\g{-1}{3,}\b 743 744# version suffix <word>v# 745(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_])) 746 747# Compiler flags (Unix, Java/Scala) 748# Use if you have things like `-Pdocker` and want to treat them as `docker` 749#(?:^|[\t ,>"'`=(#])-(?:(?:J-|)[DPWXY]|[Llf])(?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,}) 750 751# Compiler flags (Windows / PowerShell) 752# This is a subset of the more general compiler flags pattern. 753# It avoids matching `-Path` to prevent it from being treated as `ath` 754#(?:^|[\t ,"'`=(#])-(?:[DPL](?=[A-Z]{2,})|[WXYlf](?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,})) 755 756# Compiler flags (linker) 757,-B 758 759# libraries 760(?:\b|_)[Ll]ib(?:re(?=office)|)(?!era[lt]|ero|erty|rar(?:i(?:an|es)|y))(?=[a-z]) 761 762# WWNN/WWPN (NAA identifiers) 763\b(?:0x)?10[0-9a-f]{14}\b|\b(?:0x|3)?[25][0-9a-f]{15}\b|\b(?:0x|3)?6[0-9a-f]{31}\b 764 765# iSCSI iqn (approximate regex) 766\biqn\.[0-9]{4}-[0-9]{2}(?:[\.-][a-z][a-z0-9]*)*\b 767 768# curl arguments 769\b(?:\\n|)curl(?:\.exe|)(?:\s+-[a-zA-Z]{1,2}\b)*(?:\s+-[a-zA-Z]{3,})(?:\s+-[a-zA-Z]+)* 770# set arguments 771\b(?:bash|sh|set)(?:\s+[-+][abefimouxE]{1,2})*\s+[-+][abefimouxE]{3,}(?:\s+[-+][abefimouxE]+)* 772# tar arguments 773\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+ 774# tput arguments -- https://man7.org/linux/man-pages/man5/terminfo.5.html -- technically they can be more than 5 chars long... 775\btput\s+(?:(?:-[SV]|-T\s*\w+)\s+)*\w{3,5}\b 776# macOS temp folders 777/var/folders/\w\w/[+\w]+/(?:T|-Caches-)/ 778# github runner temp folders 779/home/runner/work/_temp/[-_/a-z0-9]+