this repo has no description
at main 529 lines 21 kB view raw
1// Email classifier using rule-based approach learned from labeled data 2 3import type { EmailInput, ClassificationResult } from "./types.ts"; 4 5export class EmailClassifier { 6 classify(email: EmailInput): ClassificationResult { 7 // Defensive checks for Apps Script environment 8 if (!email || typeof email !== 'object') { 9 return { 10 pertains: false, 11 reason: "Invalid email object", 12 confidence: 0.0, 13 matched_rules: ["invalid_input"] 14 }; 15 } 16 17 const subject = (email.subject || '').toLowerCase(); 18 const body = (email.body || '').toLowerCase(); 19 const from = (email.from || '').toLowerCase(); 20 const combined = `${subject} ${body}`; 21 22 // CRITICAL RULES: Always relevant (security, passwords, account issues) 23 const securityResult = this.checkSecurity(subject, body, combined); 24 if (securityResult) return securityResult; 25 26 // RESPONSE TO STUDENT OUTREACH: Replies to emails the student sent 27 const outreachResult = this.checkStudentOutreach(subject, body, combined); 28 if (outreachResult) return outreachResult; 29 30 // RESPONSE TO STUDENT ACTION: Application confirmations, enrollment confirmations 31 const actionResult = this.checkStudentAction(subject, body, combined); 32 if (actionResult) return actionResult; 33 34 // ACCEPTED STUDENT: Portal access, deposit reminders, accepted student info 35 const acceptedResult = this.checkAccepted(subject, body, combined); 36 if (acceptedResult) return acceptedResult; 37 38 // DUAL ENROLLMENT: Course registration, schedules, specific to enrolled students 39 const dualEnrollmentResult = this.checkDualEnrollment(subject, body, combined, from); 40 if (dualEnrollmentResult) return dualEnrollmentResult; 41 42 // SCHOLARSHIP AWARDED: Actually awarded/received (not eligible/apply/consideration) 43 const scholarshipResult = this.checkScholarship(subject, body, combined); 44 if (scholarshipResult) return scholarshipResult; 45 46 // FINANCIAL AID READY: Explicit offers ready to review (not applications) 47 const aidResult = this.checkFinancialAid(subject, body, combined); 48 if (aidResult) return aidResult; 49 50 // DEFINITELY NOT RELEVANT: Marketing, newsletters, unsolicited outreach 51 const irrelevantResult = this.checkIrrelevant(subject, body, combined, from); 52 if (irrelevantResult) return irrelevantResult; 53 54 // DEFAULT: If uncertain, mark as not relevant (fail-safe for spam) 55 return { 56 pertains: false, 57 reason: "No clear relevance indicators found", 58 confidence: 0.3, 59 matched_rules: ["default_not_relevant"] 60 }; 61 } 62 63 private checkStudentOutreach(subject: string, body: string, combined: string): ClassificationResult | null { 64 // Check if this is a reply to an email the student sent 65 const isReply = /^re:/i.test(subject.trim()); 66 if (!isReply) return null; 67 68 const responsePatterns = [ 69 /\bthank\s+you\s+for\s+reaching\s+out\b/, 70 /\bthanks\s+for\s+reaching\s+out\b/, 71 /\bthank\s+you\s+for\s+(your\s+)?(email|inquiry|question|interest)\b/, 72 /\bin\s+response\s+to\s+your\s+(email|inquiry|question)\b/, 73 ]; 74 75 for (const pattern of responsePatterns) { 76 if (pattern.test(combined)) { 77 return { 78 pertains: true, 79 reason: "Reply to student's outreach email", 80 confidence: 0.95, 81 matched_rules: ["student_outreach_reply"] 82 }; 83 } 84 } 85 86 return null; 87 } 88 89 private checkSecurity(subject: string, body: string, combined: string): ClassificationResult | null { 90 const patterns = [ 91 /\bpassword\s+(reset|change|update|expired)\b/, 92 /\breset\s+your\s+password\b/, 93 /\baccount\s+security\b/, 94 /\bsecurity\s+alert\b/, 95 /\bunusual\s+(sign[- ]?in|activity)\b/, 96 /\bverification\s+code\b/, 97 /\b(2fa|mfa|two[- ]factor)\b/, 98 /\bcompromised\s+account\b/, 99 /\baccount\s+(locked|suspended)\b/, 100 /\bsuspicious\s+activity\b/, 101 ]; 102 103 for (const pattern of patterns) { 104 if (pattern.test(combined)) { 105 // Make sure it's not just marketing mentioning "saving" (false positive on "$36,645 on tuition") 106 // Real security alerts won't talk about tuition savings 107 if (/\bsaving.*\bon\s+tuition\b|\btuition.*\bsaving\b/.test(combined)) { 108 return null; // Just marketing 109 } 110 return { 111 pertains: true, 112 reason: "Security/password alert - always important", 113 confidence: 1.0, 114 matched_rules: ["security_alert"] 115 }; 116 } 117 } 118 119 return null; 120 } 121 122 private checkStudentAction(subject: string, body: string, combined: string): ClassificationResult | null { 123 const patterns = [ 124 /\bapplication\s+(received|complete|submitted|confirmation)\b/, 125 /\breceived\s+your\s+application\b/, 126 /\bthank\s+you\s+for\s+(applying|submitting)\b/, 127 /\benrollment\s+confirmation\b/, 128 /\bconfirmation\s+(of|for)\s+(your\s+)?(application|enrollment)\b/, 129 /\byour\s+application\s+(has\s+been|is)\s+(received|complete)\b/, 130 ]; 131 132 for (const pattern of patterns) { 133 if (pattern.test(combined)) { 134 // But exclude if it's just marketing about "how to apply" 135 if (/\bhow\s+to\s+apply\b|\bapply\s+now\b|\bstart\s+(your\s+)?application\b/.test(combined)) { 136 return null; 137 } 138 // Exclude "haven't received your application" or "we haven't received" (outreach) 139 if (/\bhaven'?t\s+received\s+your\s+application\b|\bwe\s+haven'?t\s+received\b/.test(combined)) { 140 return null; 141 } 142 // Exclude "before you apply" or "help with before you apply" (outreach) 143 if (/\bbefore\s+you\s+apply\b/.test(combined)) { 144 return null; 145 } 146 return { 147 pertains: true, 148 reason: "Confirmation of student action (application/enrollment)", 149 confidence: 0.95, 150 matched_rules: ["student_action_confirmation"] 151 }; 152 } 153 } 154 155 return null; 156 } 157 158 private checkAccepted(subject: string, body: string, combined: string): ClassificationResult | null { 159 const patterns = [ 160 /\baccepted\s+(student\s+)?portal\b/, 161 /\byour\s+(personalized\s+)?accepted\s+portal\b/, 162 /\bdeposit\s+(today|now|by|to\s+reserve)\b/, 163 /\breserve\s+your\s+(place|spot)\b/, 164 /\bcongratulations.*\baccepted\b/, 165 /\byou\s+(have\s+been|are|were)\s+accepted\b/, 166 /\badmission\s+(decision|offer)\b/, 167 /\benroll(ment)?\s+deposit\b/, 168 ]; 169 170 for (const pattern of patterns) { 171 if (pattern.test(combined)) { 172 // Exclude pre-admission and marketing 173 if (/\bacceptance\s+rate\b|\bhigh\s+acceptance\b|\bpre[- ]admit(ted)?\b|\bautomatic\s+admission\b/.test(combined)) { 174 return null; 175 } 176 // Exclude "direct admit/admission" marketing that asks to complete profile 177 if (/\bdirect\s+(admit(ted)?|admission)\b.*\b(complete|submit).*\bprofile\b|\b(complete|submit).*\bprofile\b.*\bdirect\s+(admit(ted)?|admission)\b/.test(combined)) { 178 return null; 179 } 180 // Exclude marketing about future admission decisions 181 if (/\byou\s+will\s+(also\s+)?receive\s+(an?\s+)?(accelerated\s+)?admission\s+decision\b/.test(combined)) { 182 return null; 183 } 184 if (/\breceive\s+an\s+admission\s+decision\s+within\b/.test(combined)) { 185 return null; 186 } 187 // Exclude "Priority Student" spam that asks to submit application 188 if (/\bpriority\s+student\b.*\bsubmit.*application\b|\bsubmit.*\bpriority\s+student\s+application\b/.test(combined)) { 189 return null; 190 } 191 // Exclude if asking to submit ANY application (not accepted yet) 192 if (/\bsubmit\s+(your\s+)?(the\s+)?application\b/.test(combined)) { 193 return null; 194 } 195 // Exclude "once you are accepted" - means they're not accepted yet 196 if (/\bonce\s+you\s+(are|have\s+been)\s+accepted\b/.test(combined)) { 197 return null; 198 } 199 // Exclude "reserve your spot" for events/webinars (not enrollment) 200 if (/\breserve\s+your\s+spot\b/.test(combined) && /\b(virtual|webinar|event|program|zoom|session)\b/.test(combined)) { 201 return null; 202 } 203 // Exclude "top candidate" spam asking to apply/start application 204 if (/\btop\s+candidate\b.*\b(apply|start.*application|submit.*application)\b/.test(combined)) { 205 return null; 206 } 207 if (/\binvite\s+you\s+to\s+apply\b/.test(combined)) { 208 return null; 209 } 210 // Exclude application deadline marketing (Early Decision/Action, priority deadlines, etc.) 211 if (/\b(early\s+(decision|action)|priority)\b.*\b(deadline|apply|application)\b.*\b(approaching|by|extended)\b/.test(combined)) { 212 return null; 213 } 214 if (/\bapply\s+(by|now|right\s+away|today)\b|\bdeadline.*\b(december|january|february|march)\b/.test(combined)) { 215 return null; 216 } 217 // Exclude "Panther Priority Application" and similar marketing 218 if (/\bpanther\s+priority\s+application\b|\bpriority\s+application\b/.test(combined)) { 219 return null; 220 } 221 // Exclude "deadline details" marketing spam 222 if (/\bdeadline\s+details\b|\byour\s+deadline\b/.test(combined)) { 223 return null; 224 } 225 // Exclude "application deadline will be" (future deadline announcements) 226 if (/\bapplication\s+deadline\s+will\s+be\b/.test(combined)) { 227 return null; 228 } 229 // Exclude "flip these pages" and similar exploratory marketing 230 if (/\bflip\s+these\s+pages\b|\blearn\s+more\s+about\s+being\b/.test(combined)) { 231 return null; 232 } 233 // Exclude "want to make sure you're ready" deadline pressure 234 if (/\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b/.test(combined)) { 235 return null; 236 } 237 // Exclude "we're interested in you" with apply language 238 if (/\bwe'?re\s+interested\s+in\s+you\b/.test(combined) && /\bapply\b/.test(combined)) { 239 return null; 240 } 241 // Exclude "you have until midnight/tonight to apply" deadline pressure 242 if (/\byou\s+have\s+until\b.*\b(midnight|tonight|today)\b.*\bto\s+apply\b/.test(combined)) { 243 return null; 244 } 245 // Exclude "I'm giving you until midnight/tonight" deadline extensions 246 if (/\bgiving\s+you\s+until\b.*\b(midnight|tonight|today)\b.*\bto\s+apply\b/.test(combined)) { 247 return null; 248 } 249 // Exclude "apply by the [month] deadline" marketing 250 if (/\bapply\s+by\s+the\s+(january|february|march|april|may|june|july|august|september|october|november|december)\b.*\bdeadline\b/.test(combined)) { 251 return null; 252 } 253 // Exclude fee waiver deadline pressure 254 if (/\bfee\s+waiver\b.*\b(ends|today|tonight|last\s+day)\b|\b(today|tonight).*\blast\s+day\s+for.*\bfee\s+waiver\b/.test(combined)) { 255 return null; 256 } 257 // Exclude "complete your application" with perks/benefits (priority status, no essay, etc.) 258 if (/\bcomplete\s+your\s+application\b.*\b(priority|perks|benefits|no\s+application\s+fee|no\s+essay)\b/.test(combined)) { 259 return null; 260 } 261 // Exclude "apply for free" or "waiving your fee" marketing 262 if (/\bapply\s+for\s+free\b|\bwaiving\s+your.*\bfee\b|\bwe'?re\s+waiving\s+your\b/.test(combined)) { 263 return null; 264 } 265 // Exclude "apply and enroll" combined deadline marketing 266 if (/\bapply\s+and\s+enroll\b.*\bfree\b/.test(combined)) { 267 return null; 268 } 269 // Exclude "haven't received your application" or "we haven't received" (outreach) 270 if (/\bhaven'?t\s+received\s+your\s+application\b|\bwe\s+haven'?t\s+received\b/.test(combined)) { 271 return null; 272 } 273 return { 274 pertains: true, 275 reason: "Accepted student portal/deposit information", 276 confidence: 0.95, 277 matched_rules: ["accepted_student"] 278 }; 279 } 280 } 281 282 return null; 283 } 284 285 private checkDualEnrollment(subject: string, body: string, combined: string, from: string): ClassificationResult | null { 286 // Check for dual enrollment patterns 287 const dualEnrollmentIndicators = [ 288 /\bdual\s+enrollment\b/, 289 /\bcourse\s+(registration|deletion|added|dropped)\b/, 290 /\bspring\s+\d{4}\s+(course|on[- ]campus)\b/, 291 /\bhow\s+to\s+register\b.*\b(course|class)/, 292 /\bcedarville\s+university\).*\b(course|registration)\b/, 293 ]; 294 295 for (const pattern of dualEnrollmentIndicators) { 296 if (pattern.test(combined)) { 297 // Dual enrollment is relevant if it's about actual courses, not marketing 298 if (/\blearn\s+more\s+about\b|\binterested\s+in\b|\bconsider\s+joining\b/.test(combined)) { 299 return null; // Just marketing 300 } 301 // Exclude general "explore your academic interests" marketing 302 if (/\bfreedom\s+to\s+explore\b.*\bacademic\s+interests\b|\bmajors,?\s+minors\s+and\s+more\b/.test(combined)) { 303 return null; 304 } 305 return { 306 pertains: true, 307 reason: "Dual enrollment course information", 308 confidence: 0.9, 309 matched_rules: ["dual_enrollment"] 310 }; 311 } 312 } 313 314 return null; 315 } 316 317 private checkScholarship(subject: string, body: string, combined: string): ClassificationResult | null { 318 // Check for specific scholarship application opportunities FIRST (for accepted/enrolled students) 319 // This is different from general "apply for scholarships" marketing 320 if (/\bapply\s+for\s+(the\s+)?.*\bscholarship\b/.test(subject)) { 321 // Check if it's specific (President's, Ministry, named scholarships) 322 if (/\bpresident'?s\b|\bministry\b|\bimpact\b/.test(combined)) { 323 return { 324 pertains: true, 325 reason: "Scholarship application opportunity for accepted student", 326 confidence: 0.75, 327 matched_rules: ["scholarship_application_opportunity"] 328 }; 329 } 330 } 331 332 // Negative indicators: not actually awarded - check these before awarded patterns 333 const notAwardedPatterns = [ 334 /\bscholarship\b.*\b(held|reserved)\s+for\s+you\b/, 335 /\b(held|reserved)\s+for\s+you\b/, 336 /\bconsider(ed|ation)\b.*\bscholarship\b/, 337 /\bscholarship\b.*\bconsider(ed|ation)\b/, 338 /\beligible\s+for\b.*\bscholarship\b/, 339 /\bscholarship\b.*\beligible\b/, 340 /\bmay\s+qualify\b.*\bscholarship\b/, 341 /\bguaranteed\s+admission\b/, 342 /\bpriority\s+consideration\b/, 343 // Scholarship events/days (attend to get scholarship = not awarded) 344 /\b(attend|register\s+for).*\bscholarship\s+(day|event|award\s+event)\b/, 345 /\bscholarship\s+(day|event).*\b(attend|register)\b/, 346 /\bsoar\s+(scholarship\s+award\s+)?event\b/, 347 // Direct admission/scholarship forms to submit (not awarded yet) 348 /\bdirect\s+admission\b.*\bscholarship\s+form\b/, 349 /\bscholarship\s+form\b.*\bdirect\s+admission\b/, 350 /\bsubmit\s+(your\s+)?.*\bscholarship\s+form\b/, 351 // "Want to make sure you're ready" deadline pressure with scholarship mention 352 /\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b.*\bscholarship\b/, 353 // Scholarship estimate (not actual award) 354 /\bscholarship\s+estimate\b/, 355 /\byou\s+have\s+not\s+(yet\s+)?seen\s+your.*\bscholarship\b/, 356 /\bacademic\s+scholarship\s+estimate\b/, 357 // "You've earned a scholarship" but says "pre-admission" (not actually awarded) 358 /\bpre[- ]admission\b/, 359 // Scholarship deadline approaching (apply for it, not awarded) 360 /\bscholarship\s+deadline\s+(approaching|soon)\b/, 361 // "upon admission" means not awarded yet 362 /\bscholarship\b.*\bupon\s+admission\b|\bupon\s+admission\b.*\bscholarship\b/, 363 ]; 364 365 // Check if scholarship is mentioned but not awarded 366 const hasScholarshipMention = /\bscholarship\b/.test(combined); 367 if (hasScholarshipMention) { 368 for (const pattern of notAwardedPatterns) { 369 if (pattern.test(combined)) { 370 return { 371 pertains: false, 372 reason: "Scholarship mentioned but not actually awarded (held/eligible/apply)", 373 confidence: 0.9, 374 matched_rules: ["scholarship_not_awarded"] 375 }; 376 } 377 } 378 } 379 380 // Positive indicators: actually awarded 381 const awardedPatterns = [ 382 /\bcongratulations\b.*\bscholarship\b/, 383 /\byou\s+(have|received|are\s+awarded|won)\b.*\bscholarship\b/, 384 /\bwe\s+(are\s+)?(pleased\s+to\s+)?award(ing)?\b.*\bscholarship\b/, 385 /\bscholarship\s+(offer|award)\b/, 386 /\breceived\s+a\s+scholarship\b/, 387 ]; 388 389 for (const pattern of awardedPatterns) { 390 if (pattern.test(combined)) { 391 return { 392 pertains: true, 393 reason: "Scholarship actually awarded", 394 confidence: 0.95, 395 matched_rules: ["scholarship_awarded"] 396 }; 397 } 398 } 399 400 return null; 401 } 402 403 private checkFinancialAid(subject: string, body: string, combined: string): ClassificationResult | null { 404 // Positive: aid is ready 405 const readyPatterns = [ 406 /\bfinancial\s+aid\b.*\boffer\b.*\b(ready|available)\b/, 407 /\b(ready|available)\b.*\bfinancial\s+aid\b.*\boffer\b/, 408 /\baward\s+letter\b.*\b(ready|available|posted|view)\b/, 409 /\b(view|review)\s+(your\s+)?award\s+letter\b/, 410 /\bfinancial\s+aid\s+package\b.*\b(ready|available|posted)\b/, 411 /\byour\s+aid\s+is\s+ready\b/, 412 ]; 413 414 // Negative: aid applications, FAFSA reminders 415 const notReadyPatterns = [ 416 /\blearn\s+more\s+about\b.*\bfinancial\s+aid\b/, 417 /\bapply\b.*\b(for\s+)?financial\s+aid\b/, 418 /\bfinancial\s+aid\b.*\bapplication\b/, 419 /\bcomplete\s+(your\s+)?fafsa\b/, 420 /\bconsidered\s+for\b.*\baid\b/, 421 /\bpriority\s+(deadline|consideration)\b.*\bfinancial\s+aid\b/, 422 ]; 423 424 for (const pattern of readyPatterns) { 425 if (pattern.test(combined)) { 426 // Check for negative indicators 427 for (const negPattern of notReadyPatterns) { 428 if (negPattern.test(combined)) { 429 return null; // Just application info 430 } 431 } 432 return { 433 pertains: true, 434 reason: "Financial aid offer ready to review", 435 confidence: 0.95, 436 matched_rules: ["financial_aid_ready"] 437 }; 438 } 439 } 440 441 return null; 442 } 443 444 private checkIrrelevant(subject: string, body: string, combined: string, from: string): ClassificationResult | null { 445 // Strong indicators of marketing/spam 446 const irrelevantPatterns = [ 447 // Newsletter/blog content 448 /\bstudent\s+life\s+blog\b/, 449 /\b(student\s+life\s+)?blog\s+(post|update)\b/, 450 /\bnew\s+student\s+life\s+blog\b/, 451 /\bnewsletter\b/, 452 /\bweekly\s+(digest|update)\b/, 453 454 // Marketing events 455 /\bupcoming\s+events\b/, 456 /\bjoin\s+us\s+(for|at|on\s+zoom)\b/, 457 /\bopen\s+house\b/, 458 /\bvirtual\s+tour\b/, 459 /\bcampus\s+(visit|tour|event)\b/, 460 /\bmeet\s+(the|our)\s+(students|faculty)\b/, 461 462 // Generic outreach (not applied yet) 463 /\bhaven'?t\s+applied.*yet\b/, 464 /\bstill\s+time\s+to\s+apply\b/, 465 /\bhow\s+is\s+your\s+college\s+search\b/, 466 /\bstart\s+(your\s+)?college\s+search\b/, 467 /\bexplore\s+(our\s+)?(programs|campus)\b/, 468 469 // Unsolicited outreach patterns 470 /\bi\s+hope\s+you\s+have\s+been\s+receiving\s+my\s+emails\b/, 471 /\bam\s+i\s+reaching\b/, 472 /\byou\s+are\s+on\s+.*\s+(radar|list)\b/, 473 /\byou'?re\s+on\s+(our|my)\s+radar\b/, 474 /\bi\s+want\s+to\s+make\s+sure\s+you\s+know\b/, 475 /\byou'?re\s+invited\s+to\s+submit\b/, 476 /\bi'?m\s+eager\s+to\s+consider\s+you\b/, 477 /\bsubmit\s+your\s+.*\s+application\b/, 478 /\bpriority\s+status\b.*\bsubmit.*application\b/, 479 /\btop\s+candidate\b.*\binvite\s+you\s+to\s+apply\b/, 480 /\binvite\s+you\s+to\s+apply\b/, 481 482 // Priority deadline extensions (spam) 483 /\bextended.*\bpriority\s+deadline\b/, 484 /\bpriority\s+deadline.*\bextended\b/, 485 486 // Summer camps/programs 487 /\bsummer\s+(academy|camp|program)\b/, 488 /\bsave\s+the\s+date\b/, 489 490 // Ugly sweaters and other fluff 491 /\bugly\s+sweater\b/, 492 /\bit'?s\s+.+\s+season\b/, 493 494 // FAFSA/scholarship info sessions (not actual aid offers) 495 /\bjoin\s+us.*\b(virtual\s+program|zoom)\b.*\b(scholarship|financial\s+aid)\b/, 496 /\blearn\s+more\b.*\b(scholarship|financial\s+aid)\s+(opportunities|options)\b/, 497 /\b(scholarship|financial\s+aid)\s+(opportunities|options)\b.*\blearn\s+more\b/, 498 ]; 499 500 for (const pattern of irrelevantPatterns) { 501 if (pattern.test(combined)) { 502 return { 503 pertains: false, 504 reason: "Marketing/newsletter/unsolicited outreach", 505 confidence: 0.95, 506 matched_rules: ["irrelevant_marketing"] 507 }; 508 } 509 } 510 511 // Haven't applied yet = not relevant 512 if (/\bhaven'?t\s+applied\b/.test(combined)) { 513 return { 514 pertains: false, 515 reason: "Unsolicited email where student has not applied", 516 confidence: 0.95, 517 matched_rules: ["not_applied"] 518 }; 519 } 520 521 return null; 522 } 523} 524 525// Convenience function 526export function classifyEmail(email: EmailInput): ClassificationResult { 527 const classifier = new EmailClassifier(); 528 return classifier.classify(email); 529}