Weighs the soul of incoming HTTP requests to stop AI crawlers
1# Repeated letters
2#\b([a-z])\g{-1}{2,}\b
3
4# marker to ignore all code on line
5^.*/\* #no-spell-check-line \*/.*$
6# marker to ignore all code on line
7^.*\bno-spell-check(?:-line|)(?:\s.*|)$
8
9# https://cspell.org/configuration/document-settings/
10# cspell inline
11^.*\b[Cc][Ss][Pp][Ee][Ll]{2}:\s*[Dd][Ii][Ss][Aa][Bb][Ll][Ee]-[Ll][Ii][Nn][Ee]\b
12
13# copyright
14Copyright (?:\([Cc]\)|)(?:[-\d, ]|and)+(?: [A-Z][a-z]+ [A-Z][a-z]+,?)+
15
16# patch hunk comments
17^@@ -\d+(?:,\d+|) \+\d+(?:,\d+|) @@ .*
18# git index header
19index (?:[0-9a-z]{7,40},|)[0-9a-z]{7,40}\.\.[0-9a-z]{7,40}
20
21# file permissions
22['"`\s][-bcdLlpsw](?:[-r][-w][-Ssx]){2}[-r][-w][-SsTtx]\+?['"`\s]
23
24# css fonts
25\bfont(?:-family|):[^;}]+
26
27# css url wrappings
28\burl\([^)]+\)
29
30# cid urls
31(['"])cid:.*?\g{-1}
32
33# data url in parens
34\(data:(?:[^) ][^)]*?|)(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})[^)]*\)
35# data url in quotes
36([`'"])data:(?:[^ `'"].*?|)(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,}).*\g{-1}
37# data url
38\bdata:[-a-zA-Z=;:/0-9+]*,\S*
39
40# https/http/file urls
41(?:\b(?:https?|ftp|file)://)[-A-Za-z0-9+&@#/*%?=~_|!:,.;]+[-A-Za-z0-9+&@#/*%=~_|]
42
43# mailto urls
44mailto:[-a-zA-Z=;:/?%&0-9+@._]{3,}
45
46# magnet urls
47magnet:[?=:\w]+
48
49# magnet urls
50"magnet:[^"]+"
51
52# obs:
53"obs:[^"]*"
54
55# The `\b` here means a break, it's the fancy way to handle urls, but it makes things harder to read
56# In this examples content, I'm using a number of different ways to match things to show various approaches
57# asciinema
58\basciinema\.org/a/[0-9a-zA-Z]+
59
60# asciinema v2
61^\[\d+\.\d+, "[io]", ".*"\]$
62
63# apple
64\bdeveloper\.apple\.com/[-\w?=/]+
65# Apple music
66\bembed\.music\.apple\.com/fr/playlist/usr-share/[-\w.]+
67
68# appveyor api
69\bci\.appveyor\.com/api/projects/status/[0-9a-z]+
70# appveyor project
71\bci\.appveyor\.com/project/(?:[^/\s"]*/){2}builds?/\d+/job/[0-9a-z]+
72
73# Amazon
74
75# Amazon
76\bamazon\.com/[-\w]+/(?:dp/[0-9A-Z]+|)
77# AWS ARN
78arn:aws:[-/:\w]+
79# AWS S3
80\b\w*\.s3[^.]*\.amazonaws\.com/[-\w/&#%_?:=]*
81# AWS execute-api
82\b[0-9a-z]{10}\.execute-api\.[-0-9a-z]+\.amazonaws\.com\b
83# AWS ELB
84\b\w+\.[-0-9a-z]+\.elb\.amazonaws\.com\b
85# AWS SNS
86\bsns\.[-0-9a-z]+.amazonaws\.com/[-\w/&#%_?:=]*
87# AWS VPC
88vpc-\w+
89
90# While you could try to match `http://` and `https://` by using `s?` in `https?://`, sometimes there
91# YouTube url
92\b(?:(?:www\.|)youtube\.com|youtu.be)/(?:channel/|embed/|user/|playlist\?list=|watch\?v=|v/|)[-a-zA-Z0-9?&=_%]*
93# YouTube music
94\bmusic\.youtube\.com/youtubei/v1/browse(?:[?&]\w+=[-a-zA-Z0-9?&=_]*)
95# YouTube tag
96<\s*youtube\s+id=['"][-a-zA-Z0-9?_]*['"]
97# YouTube image
98\bimg\.youtube\.com/vi/[-a-zA-Z0-9?&=_]*
99# Google Accounts
100\baccounts.google.com/[-_/?=.:;+%&0-9a-zA-Z]*
101# Google Analytics
102\bgoogle-analytics\.com/collect.[-0-9a-zA-Z?%=&_.~]*
103# Google APIs
104\bgoogleapis\.(?:com|dev)/[a-z]+/(?:v\d+/|)[a-z]+/[-@:./?=\w+|&]+
105# Google Artifact Registry
106\.pkg\.dev(?:/[-\w]+)+(?::[-\w]+|)
107# Google Storage
108\b[-a-zA-Z0-9.]*\bstorage\d*\.googleapis\.com(?:/\S*|)
109# Google Calendar
110\bcalendar\.google\.com/calendar(?:/u/\d+|)/embed\?src=[@./?=\w&%]+
111\w+\@group\.calendar\.google\.com\b
112# Google DataStudio
113\bdatastudio\.google\.com/(?:(?:c/|)u/\d+/|)(?:embed/|)(?:open|reporting|datasources|s)/[-0-9a-zA-Z]+(?:/page/[-0-9a-zA-Z]+|)
114# The leading `/` here is as opposed to the `\b` above
115# ... a short way to match `https://` or `http://` since most urls have one of those prefixes
116# Google Docs
117/docs\.google\.com/[a-z]+/(?:ccc\?key=\w+|(?:u/\d+|d/(?:e/|)[0-9a-zA-Z_-]+/)?(?:edit\?[-\w=#.]*|/\?[\w=&]*|))
118# Google Drive
119\bdrive\.google\.com/(?:file/d/|open)[-0-9a-zA-Z_?=]*
120# Google Groups
121\bgroups\.google\.com(?:/[a-z]+/(?:#!|)[^/\s"]+)*
122# Google Maps
123\bmaps\.google\.com/maps\?[\w&;=]*
124# Google themes
125themes\.googleusercontent\.com/static/fonts/[^/\s"]+/v\d+/[^.]+.
126# Google CDN
127\bclients2\.google(?:usercontent|)\.com[-0-9a-zA-Z/.]*
128# Goo.gl
129/goo\.gl/[a-zA-Z0-9]+
130# Google Chrome Store
131\bchrome\.google\.com/webstore/detail/[-\w]*(?:/\w*|)
132# Google Books
133\bgoogle\.(?:\w{2,4})/books(?:/\w+)*\?[-\w\d=&#.]*
134# Google Fonts
135\bfonts\.(?:googleapis|gstatic)\.com/[-/?=:;+&0-9a-zA-Z]*
136# Google Forms
137\bforms\.gle/\w+
138# Google Scholar
139\bscholar\.google\.com/citations\?user=[A-Za-z0-9_]+
140# Google Colab Research Drive
141\bcolab\.research\.google\.com/drive/[-0-9a-zA-Z_?=]*
142# Google Cloud regions
143(?:us|(?:north|south)america|europe|asia|australia|me|africa)-(?:north|south|east|west|central){1,2}\d+
144
145# GitHub SHAs (api)
146\bapi.github\.com/repos(?:/[^/\s"]+){3}/[0-9a-f]+\b
147# GitHub SHAs (markdown)
148(?:\[`?[0-9a-f]+`?\]\(https:/|)/(?:www\.|)github\.com(?:/[^/\s"]+){2,}(?:/[^/\s")]+)(?:[0-9a-f]+(?:[-0-9a-zA-Z/#.]*|)\b|)
149# GitHub SHAs
150\bgithub\.com(?:/[^/\s"]+){2}[@#][0-9a-f]+\b
151# GitHub SHA refs
152\[([0-9a-f]+)\]\(https://(?:www\.|)github.com/[-\w]+/[-\w]+/commit/\g{-1}[0-9a-f]*
153# GitHub wiki
154\bgithub\.com/(?:[^/]+/){2}wiki/(?:(?:[^/]+/|)_history|[^/]+(?:/_compare|)/[0-9a-f.]{40,})\b
155# githubusercontent
156/[-a-z0-9]+\.githubusercontent\.com/[-a-zA-Z0-9?&=_\/.]*
157# githubassets
158\bgithubassets.com/[0-9a-f]+(?:[-/\w.]+)
159# gist github
160\bgist\.github\.com/[^/\s"]+/[0-9a-f]+
161# git.io
162\bgit\.io/[0-9a-zA-Z]+
163# GitHub JSON
164"node_id": "[-a-zA-Z=;:/0-9+_]*"
165# Contributor
166\[[^\]]+\]\(https://github\.com/[^/\s"]+/?\)
167# GHSA
168GHSA(?:-[0-9a-z]{4}){3}
169
170# GitHub actions
171\buses:\s+[-\w.]+/[-\w./]+@[-\w.]+
172
173# GitLab commit
174\bgitlab\.[^/\s"]*/\S+/\S+/commit/[0-9a-f]{7,16}#[0-9a-f]{40}\b
175# GitLab merge requests
176\bgitlab\.[^/\s"]*/\S+/\S+/-/merge_requests/\d+/diffs#[0-9a-f]{40}\b
177# GitLab uploads
178\bgitlab\.[^/\s"]*/uploads/[-a-zA-Z=;:/0-9+]*
179# GitLab commits
180\bgitlab\.[^/\s"]*/(?:[^/\s"]+/){2}commits?/[0-9a-f]+\b
181
182# #includes
183^\s*#include\s*(?:<.*?>|".*?")
184
185# #pragma lib
186^\s*#pragma comment\(lib, ".*?"\)
187
188# binance
189accounts\.binance\.com/[a-z/]*oauth/authorize\?[-0-9a-zA-Z&%]*
190
191# bitbucket diff
192\bapi\.bitbucket\.org/\d+\.\d+/repositories/(?:[^/\s"]+/){2}diff(?:stat|)(?:/[^/\s"]+){2}:[0-9a-f]+
193# bitbucket repositories commits
194\bapi\.bitbucket\.org/\d+\.\d+/repositories/(?:[^/\s"]+/){2}commits?/[0-9a-f]+
195# bitbucket commits
196\bbitbucket\.org/(?:[^/\s"]+/){2}commits?/[0-9a-f]+
197
198# bit.ly
199\bbit\.ly/\w+
200
201# bitrise
202\bapp\.bitrise\.io/app/[0-9a-f]*/[\w.?=&]*
203
204# bootstrapcdn.com
205\bbootstrapcdn\.com/[-./\w]+
206
207# cdn.cloudflare.com
208\bcdnjs\.cloudflare\.com/[./\w]+
209
210# circleci
211\bcircleci\.com/gh(?:/[^/\s"]+){1,5}.[a-z]+\?[-0-9a-zA-Z=&]+
212
213# gitter
214\bgitter\.im(?:/[^/\s"]+){2}\?at=[0-9a-f]+
215
216# gravatar
217\bgravatar\.com/avatar/[0-9a-f]+
218
219# ibm
220[a-z.]*ibm\.com/[-_#=:%!?~.\\/\d\w]*
221
222# imgur
223\bimgur\.com/[^.]+
224
225# Internet Archive
226\barchive\.org/web/\d+/(?:[-\w.?,'/\\+&%$#_:]*)
227
228# discord
229/discord(?:app\.com|\.gg)/(?:invite/)?[a-zA-Z0-9]{7,}
230
231# Disqus
232\bdisqus\.com/[-\w/%.()!?&=_]*
233
234# medium link
235\blink\.medium\.com/[a-zA-Z0-9]+
236# medium
237\bmedium\.com/@?[^/\s"]+/[-\w]+
238
239# microsoft
240\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
241# powerbi
242\bapp\.powerbi\.com/reportEmbed/[^"' ]*
243# vs devops
244\bvisualstudio.com(?::443|)/[-\w/?=%&.]*
245# microsoft store
246\bmicrosoft\.com/store/apps/\w+
247
248# mvnrepository.com
249\bmvnrepository\.com/[-0-9a-z./]+
250
251# now.sh
252/[0-9a-z-.]+\.now\.sh\b
253
254# oracle
255\bdocs\.oracle\.com/[-0-9a-zA-Z./_?#&=]*
256
257# chromatic.com
258/\S+.chromatic.com\S*[")]
259
260# codacy
261\bapi\.codacy\.com/project/badge/Grade/[0-9a-f]+
262
263# compai
264\bcompai\.pub/v1/png/[0-9a-f]+
265
266# mailgun api
267\.api\.mailgun\.net/v3/domains/[0-9a-z]+\.mailgun.org/messages/[0-9a-zA-Z=@]*
268# mailgun
269\b[0-9a-z]+.mailgun.org
270
271# /message-id/
272/message-id/[-\w@./%]+
273
274# Reddit
275\breddit\.com/r/[/\w_]*
276
277# requestb.in
278\brequestb\.in/[0-9a-z]+
279
280# sched
281\b[a-z0-9]+\.sched\.com\b
282
283# Slack url
284slack://[a-zA-Z0-9?&=]+
285# Slack
286\bslack\.com/[-0-9a-zA-Z/_~?&=.]*
287# Slack edge
288\bslack-edge\.com/[-a-zA-Z0-9?&=%./]+
289# Slack images
290\bslack-imgs\.com/[-a-zA-Z0-9?&=%.]+
291
292# shields.io
293\bshields\.io/[-\w/%?=&.:+;,]*
294
295# stackexchange -- https://stackexchange.com/feeds/sites
296\b(?:askubuntu|serverfault|stack(?:exchange|overflow)|superuser).com/(?:questions/\w+/[-\w]+|a/)
297
298# Sentry
299[0-9a-f]{32}\@o\d+\.ingest\.sentry\.io\b
300
301# Twitter markdown
302\[@[^[/\]:]*?\]\(https://twitter.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|)\)
303# Twitter hashtag
304\btwitter\.com/hashtag/[\w?_=&]*
305# Twitter status
306\btwitter\.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|)
307# Twitter profile images
308\btwimg\.com/profile_images/[_\w./]*
309# Twitter media
310\btwimg\.com/media/[-_\w./?=]*
311# Twitter link shortened
312\bt\.co/\w+
313
314# facebook
315\bfburl\.com/[0-9a-z_]+
316# facebook CDN
317\bfbcdn\.net/[\w/.,]*
318# facebook watch
319\bfb\.watch/[0-9A-Za-z]+
320
321# dropbox
322\bdropbox\.com/sh?/[^/\s"]+/[-0-9A-Za-z_.%?=&;]+
323
324# ipfs protocol
325ipfs://[0-9a-zA-Z]{3,}
326# ipfs url
327/ipfs/[0-9a-zA-Z]{3,}
328
329# w3
330\bw3\.org/[-0-9a-zA-Z/#.]+
331
332# loom
333\bloom\.com/embed/[0-9a-f]+
334
335# regex101
336\bregex101\.com/r/[^/\s"]+/\d+
337
338# figma
339\bfigma\.com/file(?:/[0-9a-zA-Z]+/)+
340
341# freecodecamp.org
342\bfreecodecamp\.org/[-\w/.]+
343
344# image.tmdb.org
345\bimage\.tmdb\.org/[/\w.]+
346
347# mermaid
348\bmermaid\.ink/img/[-\w]+|\bmermaid-js\.github\.io/mermaid-live-editor/#/edit/[-\w]+
349
350# Wikipedia
351\ben\.wikipedia\.org/wiki/[-\w%.#]+
352
353# gitweb
354[^"\s]+/gitweb/\S+;h=[0-9a-f]+
355
356# HyperKitty lists
357/archives/list/[^@/]+@[^/\s"]*/message/[^/\s"]*/
358
359# lists
360/thread\.html/[^"\s]+
361
362# list-management
363\blist-manage\.com/subscribe(?:[?&](?:u|id)=[0-9a-f]+)+
364
365# kubectl.kubernetes.io/last-applied-configuration
366"kubectl.kubernetes.io/last-applied-configuration": ".*"
367
368# pgp
369\bgnupg\.net/pks/lookup[?&=0-9a-zA-Z]*
370
371# Spotify
372\bopen\.spotify\.com/embed/playlist/\w+
373
374# Mastodon
375\bmastodon\.[-a-z.]*/(?:media/|@)[?&=0-9a-zA-Z_]*
376
377# scastie
378\bscastie\.scala-lang\.org/[^/]+/\w+
379
380# images.unsplash.com
381\bimages\.unsplash\.com/(?:(?:flagged|reserve)/|)[-\w./%?=%&.;]+
382
383# pastebin
384\bpastebin\.com/[\w/]+
385
386# heroku
387\b\w+\.heroku\.com/source/archive/\w+
388
389# quip
390\b\w+\.quip\.com/\w+(?:(?:#|/issues/)\w+)?
391
392# badgen.net
393\bbadgen\.net/badge/[^")\]'\s]+
394
395# statuspage.io
396\w+\.statuspage\.io\b
397
398# media.giphy.com
399\bmedia\.giphy\.com/media/[^/]+/[\w.?&=]+
400
401# tinyurl
402\btinyurl\.com/\w+
403
404# codepen
405\bcodepen\.io/[\w/]+
406
407# registry.npmjs.org
408\bregistry\.npmjs\.org/(?:@[^/"']+/|)[^/"']+/-/[-\w@.]+
409
410# getopts
411\bgetopts\s+(?:"[^"]+"|'[^']+')
412
413# ANSI color codes
414(?:\\(?:u00|x)1[Bb]|\\03[1-7]|\x1b|\\u\{1[Bb]\})\[\d+(?:;\d+)*m
415
416# URL escaped characters
417%[0-9A-F][A-F](?=[A-Za-z])
418# lower URL escaped characters
419%[0-9a-f][a-f](?=[a-z]{2,})
420# IPv6
421\b(?:[0-9a-fA-F]{0,4}:){3,7}[0-9a-fA-F]{0,4}\b
422# c99 hex digits (not the full format, just one I've seen)
4230x[0-9a-fA-F](?:\.[0-9a-fA-F]*|)[pP]
424# Punycode
425\bxn--[-0-9a-z]+
426# sha
427sha\d+:[0-9a-f]*?[a-f]{3,}[0-9a-f]*
428# sha-... -- uses a fancy capture
429(\\?['"]|")[0-9a-f]{40,}\g{-1}
430# hex runs
431\b[0-9a-fA-F]{16,}\b
432# hex in url queries
433=[0-9a-fA-F]*?(?:[A-F]{3,}|[a-f]{3,})[0-9a-fA-F]*?&
434# ssh
435(?:ssh-\S+|-nistp256) [-a-zA-Z=;:/0-9+]{12,}
436
437# PGP
438\b(?:[0-9A-F]{4} ){9}[0-9A-F]{4}\b
439# GPG keys
440\b(?:[0-9A-F]{4} ){5}(?: [0-9A-F]{4}){5}\b
441# Well known gpg keys
442.well-known/openpgpkey/[\w./]+
443
444# pki
445-----BEGIN.*-----END
446
447# pki (base64)
448LS0tLS1CRUdJT.*
449
450# C# includes
451^\s*using [^;]+;
452
453# uuid:
454\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b
455# hex digits including css/html color classes:
456(?:[\\0][xX]|\\u|[uU]\+|#x?|%23|&H)[0-9_a-fA-FgGrR]*?[a-fA-FgGrR]{2,}[0-9_a-fA-FgGrR]*(?:[uUlL]{0,3}|[iu]\d+)\b
457
458# integrity
459integrity=(['"])(?:\s*sha\d+-[-a-zA-Z=;:/0-9+]{40,})+\g{-1}
460
461# https://www.gnu.org/software/groff/manual/groff.html
462# man troff content
463\\f[BCIPR]
464# '/"
465\\\([ad]q
466
467# .desktop mime types
468^MimeTypes?=.*$
469# .desktop localized entries
470^[A-Z][a-z]+\[[a-z]+\]=.*$
471# Localized .desktop content
472Name\[[^\]]+\]=.*
473
474# IServiceProvider / isAThing
475(?:(?:\b|_|(?<=[a-z]))I|(?:\b|_)(?:nsI|isA))(?=(?:[A-Z][a-z]{2,})+(?:[A-Z\d]|\b))
476
477# crypt
478(['"])\$2[ayb]\$.{56}\g{-1}
479
480# apache/old crypt
481(['"]|)\$+(?:apr|)1\$+.{8}\$+.{22}\g{-1}
482
483# sha1 hash
484\{SHA\}[-a-zA-Z=;:/0-9+]{3,}
485
486# machine learning (?)
487\b(?i)ml(?=[a-z]{2,})
488
489# python
490#\b(?i)py(?!gments|gmy|lon|ramid|ro|th)(?=[a-z]{2,})
491
492# scrypt / argon
493\$(?:scrypt|argon\d+[di]*)\$\S+
494
495# go.sum
496\bh1:\S+
497
498# imports
499^import\s+(?:(?:static|type)\s+|)(?:[\w.]|\{\s*\w*?(?:,\s*(?:\w*|\*))+\s*\})+
500
501# scala modules
502("[^"]+"\s*%%?\s*){2,3}"[^"]+"
503
504# container images
505image: [-\w./:@]+
506
507# Docker images
508^\s*(?i)FROM\s+\S+:\S+(?:\s+AS\s+\S+|)
509
510# `docker images` REPOSITORY TAG IMAGE ID CREATED SIZE
511\s*\S+/\S+\s+\S+\s+[0-9a-f]{8,}\s+\d+\s+(?:hour|day|week)s ago\s+[\d.]+[KMGT]B
512
513# Intel intrinsics
514_mm_(?!dd)\w+
515
516# Input to GitHub JSON
517content: (['"])[-a-zA-Z=;:/0-9+]*=\g{-1}
518
519# This does not cover multiline strings, if your repository has them,
520# you'll want to remove the `(?=.*?")` suffix.
521# The `(?=.*?")` suffix should limit the false positives rate
522# printf
523%(?:(?:(?:hh?|ll?|[jzt])?[diuoxn]|l?[cs]|L?[fega]|p)(?=[a-z]{2,})|(?:X|L?[FEGA])(?=[a-zA-Z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%)\b)(?=.*?['"])
524
525# Alternative printf
526# %s
527%(?:s(?=[a-z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%[^s])\b)(?=.*?['"])
528
529# Python string prefix / binary prefix
530# Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings
531(?<!['"])\b(?:B|BR|Br|F|FR|Fr|R|RB|RF|Rb|Rf|U|UR|Ur|b|bR|br|f|fR|fr|r|rB|rF|rb|rf|u|uR|ur)['"](?=[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})
532
533# Regular expressions for (P|p)assword
534\([A-Z]\|[a-z]\)[a-z]+
535
536# JavaScript regular expressions
537# javascript test regex
538/.{3,}/[gim]*\.test\(
539# javascript match regex
540\.match\(/[^/\s"]{3,}/[gim]*\s*
541# javascript match regex
542\.match\(/\\[b].{3,}?/[gim]*\s*\)(?:;|$)
543# javascript regex
544^\s*/\\[b].{3,}?/[gim]*\s*(?:\)(?:;|$)|,$)
545# javascript replace regex
546\.replace\(/[^/\s"]{3,}/[gim]*\s*,
547# assign regex
548= /[^*].*?(?:[a-z]{3,}|[A-Z]{3,}|[A-Z][a-z]{2,}).*/[gim]*(?=\W|$)
549# perl regex test
550[!=]~ (?:/.*/|m\{.*?\}|m<.*?>|m([|!/@#,;']).*?\g{-1})
551
552# perl qr regex
553(?<!\$)\bqr(?:\{.*?\}|<.*?>|\(.*?\)|([|!/@#,;']).*?\g{-1})
554
555# perl run
556perl(?:\s+-[a-zA-Z]\w*)+
557
558# C network byte conversions
559(?:\d|\bh)to(?!ken)(?=[a-z])|to(?=[adhiklpun]\()
560
561# Go regular expressions
562regexp?\.MustCompile\((?:`[^`]*`|".*"|'.*')\)
563
564# regex choice
565\(\?:[^)]+\|[^)]+\)
566
567# proto
568^\s*(\w+)\s\g{-1} =
569
570# sed regular expressions
571sed 's/(?:[^/]*?[a-zA-Z]{3,}[^/]*?/){2}
572
573# node packages
574(["'])@[^/'" ]+/[^/'" ]+\g{-1}
575
576# go install
577go install(?:\s+[a-z]+\.[-@\w/.]+)+
578
579# pom.xml
580<(?:group|artifact)Id>.*?<
581
582# jetbrains schema https://youtrack.jetbrains.com/issue/RSRP-489571
583urn:shemas-jetbrains-com
584
585# Debian changelog severity
586[-\w]+ \(.*\) (?:\w+|baseline|unstable|experimental); urgency=(?:low|medium|high|emergency|critical)\b
587
588# kubernetes pod status lists
589# https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
590\w+(?:-\w+)+\s+\d+/\d+\s+(?:Running|Pending|Succeeded|Failed|Unknown)\s+
591
592# kubectl - pods in CrashLoopBackOff
593\w+-[0-9a-f]+-\w+\s+\d+/\d+\s+CrashLoopBackOff\s+
594
595# kubernetes applications
596\.apps/[-\w]+
597
598# kubernetes object suffix
599-[0-9a-f]{10}-\w{5}\s
600
601# kubernetes crd patterns
602^\s*pattern: .*$
603
604# posthog secrets
605([`'"])phc_[^"',]+\g{-1}
606
607# xcode
608
609# xcodeproject scenes
610(?:Controller|destination|(?:first|second)Item|ID|id)="\w{3}-\w{2}-\w{3}"
611
612# xcode api botches
613customObjectInstantitationMethod
614
615# msvc api botches
616PrependWithABINamepsace
617
618# configure flags
619.* \| --\w{2,}.*?(?=\w+\s\w+)
620
621# font awesome classes
622\.fa-[-a-z0-9]+
623
624# bearer auth
625(['"])[Bb]ear[e][r] .{3,}?\g{-1}
626
627# bearer auth
628\b[Bb]ear[e][r]:? [-a-zA-Z=;:/0-9+.]{3,}
629
630# basic auth
631(['"])[Bb]asic [-a-zA-Z=;:/0-9+]{3,}\g{-1}
632
633# basic auth
634: [Bb]asic [-a-zA-Z=;:/0-9+.]{3,}
635
636# base64 encoded content
637([`'"])[-a-zA-Z=;:/0-9+]{3,}=\g{-1}
638# base64 encoded content in xml/sgml
639>[-a-zA-Z=;:/0-9+]{3,}=</
640# base64 encoded content, possibly wrapped in mime
641#(?:^|[\s=;:?])[-a-zA-Z=;:/0-9+]{50,}(?:[\s=;:?]|$)
642# base64 encoded json
643\beyJ[-a-zA-Z=;:/0-9+]+
644# base64 encoded pkcs
645\bMII[-a-zA-Z=;:/0-9+]+
646
647# uuencoded
648#[!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_]{40,}
649
650# DNS rr data
651(?:\d+\s+){3}(?:[-+/=.\w]{2,}\s*){1,2}
652
653# encoded-word
654=\?[-a-zA-Z0-9"*%]+\?[BQ]\?[^?]{0,75}\?=
655
656# numerator
657\bnumer\b(?=.*denom)
658
659# Time Zones
660\b(?:Africa|Atlantic|America|Antarctica|Arctic|Asia|Australia|Europe|Indian|Pacific)(?:/[-\w]+)+
661
662# linux kernel info
663^(?:bugs|flags|Features)\s+:.*
664
665# systemd mode
666systemd.*?running in system mode \([-+].*\)$
667
668# Lorem
669# Update Lorem based on your content (requires `ge` and `w` from https://github.com/jsoref/spelling; and `review` from https://github.com/check-spelling/check-spelling/wiki/Looking-for-items-locally )
670# grep '^[^#].*lorem' .github/actions/spelling/patterns.txt|perl -pne 's/.*i..\?://;s/\).*//' |tr '|' "\n"|sort -f |xargs -n1 ge|perl -pne 's/^[^:]*://'|sort -u|w|sed -e 's/ .*//'|w|review -
671# Warning, while `(?i)` is very neat and fancy, if you have some binary files that aren't proper unicode, you might run into:
672# ... Operation "substitution (s///)" returns its argument for non-Unicode code point 0x1C19AE (the code point will vary).
673# ... You could manually change `(?i)X...` to use `[Xx]...`
674# ... or you could add the files to your `excludes` file (a version after 0.0.19 should identify the file path)
675(?:(?:\w|\s|[,.])*\b(?i)(?:amet|consectetur|cursus|dolor|eros|ipsum|lacus|libero|ligula|lorem|magna|neque|nulla|suscipit|tempus)\b(?:\w|\s|[,.])*)
676
677# Non-English
678# Even repositories expecting pure English content can unintentionally have Non-English content... People will occasionally mistakenly enter [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) which are essentially typos, and using this pattern will mean check-spelling will not complain about them.
679#
680# If the content to be checked should be written in English and the only Non-English items will be people's names, then you can consider adding this.
681#
682# Alternatively, if you're using check-spelling v0.0.25+, and you would like to _check_ the Non-English content for spelling errors, you can. For information on how to do so, see:
683# https://docs.check-spelling.dev/Feature:-Configurable-word-characters.html#unicode
684[a-zA-Z]*[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3}[a-zA-ZÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]*|[a-zA-Z]{3,}[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]|[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3,}
685
686# highlighted letters
687\[[A-Z]\][a-z]+
688
689# French
690# This corpus only had capital letters, but you probably want lowercase ones as well.
691\b[LN]'+[a-z]{2,}\b
692
693# latex (check-spelling >= 0.0.22)
694\\\w{2,}\{
695
696# American Mathematical Society (AMS) / Doxygen
697TeX/AMS
698
699# File extensions
700\*\.[+\w]+,
701
702# eslint
703"varsIgnorePattern": ".+"
704
705# nolint
706nolint:\s*[\w,]+
707
708# Windows short paths
709[/\\][^/\\]{5,6}~\d{1,2}(?=[/\\])
710
711# Windows Resources with accelerators
712\b[A-Z]&[a-z]+\b(?!;)
713
714# signed off by
715(?i)Signed-off-by: .*
716
717# cygwin paths
718/cygdrive/[a-zA-Z]/(?:Program Files(?: \(.*?\)| ?)(?:/[-+.~\\/()\w ]+)*|[-+.~\\/()\w])+
719
720# in check-spelling@v0.0.22+, printf markers aren't automatically consumed
721# printf markers
722(?<!\\)\\[nrt](?=[a-z]{2,})
723# alternate printf markers if you run into latex and friends
724(?<!\\)\\[nrt](?=[a-z]{2,})(?=.*['"`])
725
726# Markdown anchor links
727\(#\S*?[a-zA-Z]\S*?\)
728
729# apache
730a2(?:en|dis)
731
732# weak e-tag
733W/"[^"]+"
734
735# authors/credits
736^\*(?: [A-Z](?:\w+|\.)){2,} (?=\[|$)
737
738# the negative lookahead here is to allow catching 'templatesz' as a misspelling
739# but to otherwise recognize a Windows path with \templates\foo.template or similar:
740\\(?:necessary|r(?:elease|eport|esolve[dr]?|esult)|t(?:arget|emplates?))(?![a-z])
741# ignore long runs of a single character:
742\b([A-Za-z])\g{-1}{3,}\b
743
744# version suffix <word>v#
745(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_]))
746
747# Compiler flags (Unix, Java/Scala)
748# Use if you have things like `-Pdocker` and want to treat them as `docker`
749#(?:^|[\t ,>"'`=(#])-(?:(?:J-|)[DPWXY]|[Llf])(?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,})
750
751# Compiler flags (Windows / PowerShell)
752# This is a subset of the more general compiler flags pattern.
753# It avoids matching `-Path` to prevent it from being treated as `ath`
754#(?:^|[\t ,"'`=(#])-(?:[DPL](?=[A-Z]{2,})|[WXYlf](?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,}))
755
756# Compiler flags (linker)
757,-B
758
759# libraries
760(?:\b|_)[Ll]ib(?:re(?=office)|)(?!era[lt]|ero|erty|rar(?:i(?:an|es)|y))(?=[a-z])
761
762# WWNN/WWPN (NAA identifiers)
763\b(?:0x)?10[0-9a-f]{14}\b|\b(?:0x|3)?[25][0-9a-f]{15}\b|\b(?:0x|3)?6[0-9a-f]{31}\b
764
765# iSCSI iqn (approximate regex)
766\biqn\.[0-9]{4}-[0-9]{2}(?:[\.-][a-z][a-z0-9]*)*\b
767
768# curl arguments
769\b(?:\\n|)curl(?:\.exe|)(?:\s+-[a-zA-Z]{1,2}\b)*(?:\s+-[a-zA-Z]{3,})(?:\s+-[a-zA-Z]+)*
770# set arguments
771\b(?:bash|sh|set)(?:\s+[-+][abefimouxE]{1,2})*\s+[-+][abefimouxE]{3,}(?:\s+[-+][abefimouxE]+)*
772# tar arguments
773\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+
774# tput arguments -- https://man7.org/linux/man-pages/man5/terminfo.5.html -- technically they can be more than 5 chars long...
775\btput\s+(?:(?:-[SV]|-T\s*\w+)\s+)*\w{3,5}\b
776# macOS temp folders
777/var/folders/\w\w/[+\w]+/(?:T|-Caches-)/
778# github runner temp folders
779/home/runner/work/_temp/[-_/a-z0-9]+