this repo has no description
1/*
2 * CONTENT WARNING
3 *
4 * This file contains explicit slurs and hateful language. We're sorry you have to see them.
5 *
6 * These words exist here for one reason: to ensure our moderation system correctly blocks them.
7 * We can't verify the filter catches the n-word without testing against the actual word.
8 * Euphemisms wouldn't prove the protection works.
9 *
10 * If reading this file has caused you distress, please know:
11 * - you are valued and welcome in this community
12 * - these words do not reflect the views of this project or its contributors
13 * - we maintain this code precisely because we believe everyone deserves an experience on the web that is free from this kinda language
14*/
15
16use regex::Regex;
17use std::sync::OnceLock;
18
19static SLUR_REGEXES: OnceLock<Vec<Regex>> = OnceLock::new();
20static EXTRA_BANNED_WORDS: OnceLock<Vec<String>> = OnceLock::new();
21
22fn get_slur_regexes() -> &'static Vec<Regex> {
23 SLUR_REGEXES.get_or_init(|| {
24 vec![
25 Regex::new(r"\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][hĤĥȞȟḦḧḢḣḨḩḤḥḪḫH̱ẖĦħⱧⱨꞪɦꞕΗНн][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
26 Regex::new(r"\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0]{2}[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
27 Regex::new(r"\b[fḞḟƑƒꞘꞙᵮᶂ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa@4][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGg]{1,2}([ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeiÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ]{1,2}([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
28 Regex::new(r"\b[kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLlyÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]*\b").unwrap(),
29 Regex::new(r"\b[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeaÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ]?|n[ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]|[a4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa]?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
30 Regex::new(r"[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?").unwrap(),
31 Regex::new(r"\b[tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa4]+[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn]{1,2}([iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]|[yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
32 ]
33 })
34}
35
36fn get_extra_banned_words() -> &'static Vec<String> {
37 EXTRA_BANNED_WORDS.get_or_init(|| {
38 std::env::var("PDS_BANNED_WORDS")
39 .unwrap_or_default()
40 .split(',')
41 .map(|s| s.trim().to_lowercase())
42 .filter(|s| !s.is_empty())
43 .collect()
44 })
45}
46
47fn strip_trailing_digits(s: &str) -> &str {
48 s.trim_end_matches(|c: char| c.is_ascii_digit())
49}
50
51fn normalize_leetspeak(s: &str) -> String {
52 s.chars()
53 .map(|c| match c {
54 '4' | '@' => 'a',
55 '3' => 'e',
56 '1' | '!' | '|' => 'i',
57 '0' => 'o',
58 '5' | '$' => 's',
59 '7' => 't',
60 '8' => 'b',
61 '9' => 'g',
62 _ => c,
63 })
64 .collect()
65}
66
67pub fn has_explicit_slur(text: &str) -> bool {
68 has_explicit_slur_with_extra_words(text, get_extra_banned_words())
69}
70
71fn has_explicit_slur_with_extra_words(text: &str, extra_words: &[String]) -> bool {
72 let text_lower = text.to_lowercase();
73 let normalized = text_lower.replace(['.', '-', '_'], "");
74 let stripped = strip_trailing_digits(&text_lower);
75 let normalized_stripped = strip_trailing_digits(&normalized);
76
77 let regexes = get_slur_regexes();
78 if regexes.iter().any(|r| {
79 r.is_match(&text_lower)
80 || r.is_match(&normalized)
81 || r.is_match(stripped)
82 || r.is_match(normalized_stripped)
83 }) {
84 return true;
85 }
86
87 if !extra_words.is_empty() {
88 let leet_normalized = normalize_leetspeak(&normalized);
89 let leet_stripped = normalize_leetspeak(strip_trailing_digits(&leet_normalized));
90 if extra_words.iter().any(|w| {
91 text_lower.contains(w)
92 || normalized.contains(w)
93 || stripped.contains(w)
94 || normalized_stripped.contains(w)
95 || leet_normalized.contains(w)
96 || leet_stripped.contains(w)
97 }) {
98 return true;
99 }
100 }
101 false
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107 use base64::Engine;
108
109 fn d(b64: &str) -> String {
110 let bytes = base64::engine::general_purpose::STANDARD
111 .decode(b64)
112 .unwrap();
113 String::from_utf8(bytes).unwrap()
114 }
115
116 #[test]
117 fn test_chink_pattern() {
118 assert!(has_explicit_slur(&d("Y2hpbms=")));
119 assert!(has_explicit_slur(&d("Y2hpbmtz")));
120 assert!(has_explicit_slur(&d("Q0hJTks=")));
121 assert!(has_explicit_slur(&d("Q2hpbmtz")));
122 }
123
124 #[test]
125 fn test_coon_pattern() {
126 assert!(has_explicit_slur(&d("Y29vbg==")));
127 assert!(has_explicit_slur(&d("Y29vbnM=")));
128 assert!(has_explicit_slur(&d("Q09PTg==")));
129 }
130
131 #[test]
132 fn test_fag_pattern() {
133 assert!(has_explicit_slur(&d("ZmFn")));
134 assert!(has_explicit_slur(&d("ZmFncw==")));
135 assert!(has_explicit_slur(&d("ZmFnZ290")));
136 assert!(has_explicit_slur(&d("ZmFnZ290cw==")));
137 assert!(has_explicit_slur(&d("ZmFnZ290cnk=")));
138 }
139
140 #[test]
141 fn test_kike_pattern() {
142 assert!(has_explicit_slur(&d("a2lrZQ==")));
143 assert!(has_explicit_slur(&d("a2lrZXM=")));
144 assert!(has_explicit_slur(&d("S0lLRQ==")));
145 assert!(has_explicit_slur(&d("a2lrZXJ5")));
146 }
147
148 #[test]
149 fn test_nigger_pattern() {
150 assert!(has_explicit_slur(&d("bmlnZ2Vy")));
151 assert!(has_explicit_slur(&d("bmlnZ2Vycw==")));
152 assert!(has_explicit_slur(&d("TklHR0VS")));
153 assert!(has_explicit_slur(&d("bmlnZ2E=")));
154 assert!(has_explicit_slur(&d("bmlnZ2Fz")));
155 }
156
157 #[test]
158 fn test_tranny_pattern() {
159 assert!(has_explicit_slur(&d("dHJhbm55")));
160 assert!(has_explicit_slur(&d("dHJhbm5pZXM=")));
161 assert!(has_explicit_slur(&d("VFJBTk5Z")));
162 }
163
164 #[test]
165 fn test_normalization_bypass() {
166 assert!(has_explicit_slur(&d("bi5pLmcuZy5lLnI=")));
167 assert!(has_explicit_slur(&d("bi1pLWctZy1lLXI=")));
168 assert!(has_explicit_slur(&d("bl9pX2dfZ19lX3I=")));
169 assert!(has_explicit_slur(&d("Zi5hLmc=")));
170 assert!(has_explicit_slur(&d("Zi1hLWc=")));
171 assert!(has_explicit_slur(&d("Yy5oLmkubi5r")));
172 assert!(has_explicit_slur(&d("a19pX2tfZQ==")));
173 }
174
175 #[test]
176 fn test_trailing_digits_bypass() {
177 assert!(has_explicit_slur(&d("ZmFnZ290MTIz")));
178 assert!(has_explicit_slur(&d("bmlnZ2VyNjk=")));
179 assert!(has_explicit_slur(&d("Y2hpbms0MjA=")));
180 assert!(has_explicit_slur(&d("ZmFnMQ==")));
181 assert!(has_explicit_slur(&d("a2lrZTIwMjQ=")));
182 assert!(has_explicit_slur(&d("bl9pX2dfZ19lX3IxMjM=")));
183 }
184
185 #[test]
186 fn test_embedded_in_sentence() {
187 assert!(has_explicit_slur(&d("eW91IGFyZSBhIGZhZ2dvdA==")));
188 assert!(has_explicit_slur(&d("c3R1cGlkIG5pZ2dlcg==")));
189 assert!(has_explicit_slur(&d("Z28gYXdheSBjaGluaw==")));
190 }
191
192 #[test]
193 fn test_safe_words_not_matched() {
194 assert!(!has_explicit_slur("hello"));
195 assert!(!has_explicit_slur("world"));
196 assert!(!has_explicit_slur("bluesky"));
197 assert!(!has_explicit_slur("tranquil"));
198 assert!(!has_explicit_slur("programmer"));
199 assert!(!has_explicit_slur("trigger"));
200 assert!(!has_explicit_slur("bigger"));
201 assert!(!has_explicit_slur("digger"));
202 assert!(!has_explicit_slur("figure"));
203 assert!(!has_explicit_slur("configure"));
204 }
205
206 #[test]
207 fn test_similar_but_safe_words() {
208 assert!(!has_explicit_slur("niggardly"));
209 assert!(!has_explicit_slur("raccoon"));
210 }
211
212 #[test]
213 fn test_empty_and_whitespace() {
214 assert!(!has_explicit_slur(""));
215 assert!(!has_explicit_slur(" "));
216 assert!(!has_explicit_slur("\t\n"));
217 }
218
219 #[test]
220 fn test_case_insensitive() {
221 assert!(has_explicit_slur(&d("TklHR0VS")));
222 assert!(has_explicit_slur(&d("TmlnZ2Vy")));
223 assert!(has_explicit_slur(&d("TmlHZ0Vy")));
224 assert!(has_explicit_slur(&d("RkFHR09U")));
225 assert!(has_explicit_slur(&d("RmFnZ290")));
226 }
227
228 #[test]
229 fn test_leetspeak_bypass() {
230 assert!(has_explicit_slur(&d("ZjRnZ290")));
231 assert!(has_explicit_slur(&d("ZjRnZzB0")));
232 assert!(has_explicit_slur(&d("bjFnZ2Vy")));
233 assert!(has_explicit_slur(&d("bjFnZzNy")));
234 assert!(has_explicit_slur(&d("azFrZQ==")));
235 assert!(has_explicit_slur(&d("Y2gxbms=")));
236 assert!(has_explicit_slur(&d("dHI0bm55")));
237 }
238
239 #[test]
240 fn test_normalize_leetspeak() {
241 assert_eq!(normalize_leetspeak("h3llo"), "hello");
242 assert_eq!(normalize_leetspeak("w0rld"), "world");
243 assert_eq!(normalize_leetspeak("t3$t"), "test");
244 assert_eq!(normalize_leetspeak("b4dw0rd"), "badword");
245 assert_eq!(normalize_leetspeak("l33t5p34k"), "leetspeak");
246 assert_eq!(normalize_leetspeak("@ss"), "ass");
247 assert_eq!(normalize_leetspeak("sh!t"), "shit");
248 assert_eq!(normalize_leetspeak("normal"), "normal");
249 }
250
251 #[test]
252 fn test_extra_banned_words() {
253 let extra = vec!["badword".to_string(), "offensive".to_string()];
254
255 assert!(has_explicit_slur_with_extra_words("badword", &extra));
256 assert!(has_explicit_slur_with_extra_words("BADWORD", &extra));
257 assert!(has_explicit_slur_with_extra_words("b.a.d.w.o.r.d", &extra));
258 assert!(has_explicit_slur_with_extra_words("b-a-d-w-o-r-d", &extra));
259 assert!(has_explicit_slur_with_extra_words("b_a_d_w_o_r_d", &extra));
260 assert!(has_explicit_slur_with_extra_words("badword123", &extra));
261 assert!(has_explicit_slur_with_extra_words("b4dw0rd", &extra));
262 assert!(has_explicit_slur_with_extra_words("b4dw0rd789", &extra));
263 assert!(has_explicit_slur_with_extra_words("b.4.d.w.0.r.d", &extra));
264 assert!(has_explicit_slur_with_extra_words(
265 "this contains badword here",
266 &extra
267 ));
268 assert!(has_explicit_slur_with_extra_words("0ff3n$1v3", &extra));
269
270 assert!(!has_explicit_slur_with_extra_words("goodword", &extra));
271 assert!(!has_explicit_slur_with_extra_words("hello world", &extra));
272 }
273}