src/lib/content-filter.ts at main · atpota.to/flushes.app

The 1st decentralized social network for sharing when you're on the toilet. Post a "flush" today! Powered by the AT Protocol.
fork atom
flushes.app / src / lib / content-filter.ts
at main 160 lines 5.7 kB view raw
wrap content
damedotblog fix 8mo ago
2900b336
  1// List of banned terms (this is a sample list that can be modified as needed)
  2// These words will be filtered from all posts in the application
  3const BANNED_WORDS: string[] = [
  4  // Generic offensive terms
  5  'slur',
  6  
  7  // Hate speech related
  8  'racist', 'bigot', 'bigotry', 'homophobic', 'transphobic',
  9  
 10  // Profanity
 11  'fuck', 'damn', 'ass', 'asshole', 'bitch',
 12  
 13  // Violence
 14  'kill', 'murder', 'attack', 'violence', 'harm', 'hurt',
 15  
 16  // Discrimination terms
 17  'retard', 'retarded', 'idiot', 'stupid', 'dumb',
 18  
 19  // Sexual content
 20  'penis', 'vagina', 'dick', 'cock', 'pussy', 'sex',
 21  'masturbate', 'orgasm', 'horny', 'erection', 
 22  'blowjob', 'handjob',
 23  
 24  // Spam-related
 25  'viagra', 'cialis', 'enlarge', 'cryptocurrency', 'bitcoin', 'ethereum',
 26  'make money', 'get rich', 'earn fast', 'pyramid', 'scheme',
 27  
 28  // Links and promotion
 29  'discord.gg', 'telegram.me'
 30];
 31
 32// Special regexes for detecting slurs - adapted from https://github.com/Blank-Cheque/Slurs
 33/* eslint-disable no-misleading-character-class */
 34const EXPLICIT_SLUR_REGEXES = [
 35  /\bc[hH][iIl1][nN][kKsS]?\b/,                    // Anti-Asian slur
 36  /\bc[oO]{2}[nN][sS]?\b/,                         // Anti-Black slur
 37  /\bf[aA][gG]{1,2}([oOeE][tT]?|[iIyY][nNeE]?)?s?\b/, // Anti-LGBTQ+ slur
 38  /\bk[iIyY][kK][eE][sS]?\b/,                      // Anti-Semitic slur
 39  /\bn[iIl1oO][gG]{2}([aAeE][rR]?|[lL][eE][tT]|[nNoO][gG])?s?\b/, // Anti-Black slur
 40  /\bn[iIl1oO][gG]{2}[aAeE][sS]\b/,                // Anti-Black slur variation
 41  /\bt[rR][aA][nN][nN][iIyY][eE]?[sS]?\b/,         // Anti-transgender slur
 42];
 43
 44/**
 45 * Checks if a text contains any banned words
 46 * @param text The text to check
 47 * @returns True if the text contains banned words, false otherwise
 48 */
 49export function containsBannedWords(text: string): boolean {
 50  if (!text) return false;
 51  
 52  // Normalize text by removing common obfuscation techniques
 53  let normalizedText = text.toLowerCase()
 54    .replace(/0/g, 'o')      // Replace numbers with letters they resemble
 55    .replace(/1/g, 'i')
 56    .replace(/3/g, 'e')
 57    .replace(/4/g, 'a')
 58    .replace(/5/g, 's')
 59    .replace(/\$/g, 's')     // Replace symbols with letters they resemble
 60    .replace(/@/g, 'a')
 61    .replace(/!/g, 'i')
 62    .replace(/\*/g, '')      // Remove common censorship characters
 63    .replace(/\./g, '')
 64    .replace(/-/g, '')
 65    .replace(/_/g, '')
 66    .replace(/\s+/g, ' ');   // Normalize whitespace
 67  
 68  // Check explicit slur regexes (specialized pattern matching)
 69  for (const regex of EXPLICIT_SLUR_REGEXES) {
 70    if (regex.test(text)) {
 71      return true;
 72    }
 73  }
 74  
 75  // Check for exact matches and partial matches in the banned words list
 76  return BANNED_WORDS.some(word => {
 77    // Check for exact word match with word boundaries
 78    const exactRegex = new RegExp(`\\b${word}\\b`, 'i');
 79    if (exactRegex.test(normalizedText)) return true;
 80    
 81    // Check for intentional letter spacing like "s e x"
 82    const spacedWord = word.split('').join('\\s*');
 83    const spacedRegex = new RegExp(`\\b${spacedWord}\\b`, 'i');
 84    if (spacedRegex.test(normalizedText)) return true;
 85    
 86    // For shorter words (4 letters or less), also check for substring matches
 87    // This helps catch compound words that contain banned terms
 88    if (word.length <= 4) {
 89      const substringRegex = new RegExp(word, 'i');
 90      return substringRegex.test(normalizedText);
 91    }
 92    
 93    return false;
 94  });
 95}
 96
 97/**
 98 * Sanitizes text by removing or replacing banned words
 99 * @param text The text to sanitize
100 * @returns Sanitized text with banned words replaced by asterisks
101 */
102export function sanitizeText(text: string): string {
103  if (!text) return text;
104  
105  let sanitized = text;
106  
107  // First pass: replace exact word matches
108  BANNED_WORDS.forEach(word => {
109    const exactRegex = new RegExp(`\\b${word}\\b`, 'gi');
110    sanitized = sanitized.replace(exactRegex, '*'.repeat(word.length));
111  });
112  
113  // Second pass: look for spaced out words (e.g., "s e x")
114  BANNED_WORDS.forEach(word => {
115    if (word.length > 2) {
116      const spacedWord = word.split('').join('\\s*');
117      const spacedRegex = new RegExp(`\\b${spacedWord}\\b`, 'gi');
118      
119      // Use a callback to replace with the right number of asterisks
120      sanitized = sanitized.replace(spacedRegex, (match) => {
121        return '*'.repeat(match.replace(/\s+/g, '').length);
122      });
123    }
124  });
125  
126  // Third pass: for shorter words, also check substrings in larger words
127  BANNED_WORDS.filter(word => word.length <= 4).forEach(word => {
128    // This regex finds the word as a substring but not at word boundaries
129    const substringRegex = new RegExp(`(?<!\\w)${word}(?!\\w)`, 'gi');
130    sanitized = sanitized.replace(substringRegex, '*'.repeat(word.length));
131  });
132  
133  return sanitized;
134}
135
136/**
137 * Specialized function to check for explicit slurs using the advanced regex patterns
138 * @param text The text to check
139 * @returns True if the text contains any explicit slurs
140 */
141export function containsExplicitSlurs(text: string): boolean {
142  if (!text) return false;
143  
144  return EXPLICIT_SLUR_REGEXES.some(regex => regex.test(text));
145}
146
147/**
148 * Checks if a character is one of our allowed emojis
149 * @param emoji The emoji character to check
150 * @returns True if the emoji is in our allowed list
151 */
152export function isAllowedEmoji(emoji: string): boolean {
153  const ALLOWED_EMOJIS = [
154    '🚽', '🧻', '💩', '💨', '🚾', '🧼', '🪠', '🚻', '🩸', '💧', '💦', '😌', 
155    '😣', '🤢', '🤮', '🥴', '😮‍💨', '😳', '😵', '🌾', '🍦', '📱', '📖', '💭',
156    '1️⃣', '2️⃣', '🟡', '🟤'
157  ];
158  
159  return ALLOWED_EMOJIS.includes(emoji);
160}