this repo has no description
1// Email classifier using rule-based approach learned from labeled data
2
3import type { EmailInput, ClassificationResult } from "./types.ts";
4
5export class EmailClassifier {
6 classify(email: EmailInput): ClassificationResult {
7 // Defensive checks for Apps Script environment
8 if (!email || typeof email !== 'object') {
9 return {
10 pertains: false,
11 reason: "Invalid email object",
12 confidence: 0.0,
13 matched_rules: ["invalid_input"]
14 };
15 }
16
17 const subject = (email.subject || '').toLowerCase();
18 const body = (email.body || '').toLowerCase();
19 const from = (email.from || '').toLowerCase();
20 const combined = `${subject} ${body}`;
21
22 // CRITICAL RULES: Always relevant (security, passwords, account issues)
23 const securityResult = this.checkSecurity(subject, body, combined);
24 if (securityResult) return securityResult;
25
26 // RESPONSE TO STUDENT OUTREACH: Replies to emails the student sent
27 const outreachResult = this.checkStudentOutreach(subject, body, combined);
28 if (outreachResult) return outreachResult;
29
30 // RESPONSE TO STUDENT ACTION: Application confirmations, enrollment confirmations
31 const actionResult = this.checkStudentAction(subject, body, combined);
32 if (actionResult) return actionResult;
33
34 // ACCEPTED STUDENT: Portal access, deposit reminders, accepted student info
35 const acceptedResult = this.checkAccepted(subject, body, combined);
36 if (acceptedResult) return acceptedResult;
37
38 // DUAL ENROLLMENT: Course registration, schedules, specific to enrolled students
39 const dualEnrollmentResult = this.checkDualEnrollment(subject, body, combined, from);
40 if (dualEnrollmentResult) return dualEnrollmentResult;
41
42 // SCHOLARSHIP AWARDED: Actually awarded/received (not eligible/apply/consideration)
43 const scholarshipResult = this.checkScholarship(subject, body, combined);
44 if (scholarshipResult) return scholarshipResult;
45
46 // FINANCIAL AID READY: Explicit offers ready to review (not applications)
47 const aidResult = this.checkFinancialAid(subject, body, combined);
48 if (aidResult) return aidResult;
49
50 // DEFINITELY NOT RELEVANT: Marketing, newsletters, unsolicited outreach
51 const irrelevantResult = this.checkIrrelevant(subject, body, combined, from);
52 if (irrelevantResult) return irrelevantResult;
53
54 // DEFAULT: If uncertain, mark as not relevant (fail-safe for spam)
55 return {
56 pertains: false,
57 reason: "No clear relevance indicators found",
58 confidence: 0.3,
59 matched_rules: ["default_not_relevant"]
60 };
61 }
62
63 private checkStudentOutreach(subject: string, body: string, combined: string): ClassificationResult | null {
64 // Check if this is a reply to an email the student sent
65 const isReply = /^re:/i.test(subject.trim());
66 if (!isReply) return null;
67
68 const responsePatterns = [
69 /\bthank\s+you\s+for\s+reaching\s+out\b/,
70 /\bthanks\s+for\s+reaching\s+out\b/,
71 /\bthank\s+you\s+for\s+(your\s+)?(email|inquiry|question|interest)\b/,
72 /\bin\s+response\s+to\s+your\s+(email|inquiry|question)\b/,
73 ];
74
75 for (const pattern of responsePatterns) {
76 if (pattern.test(combined)) {
77 return {
78 pertains: true,
79 reason: "Reply to student's outreach email",
80 confidence: 0.95,
81 matched_rules: ["student_outreach_reply"]
82 };
83 }
84 }
85
86 return null;
87 }
88
89 private checkSecurity(subject: string, body: string, combined: string): ClassificationResult | null {
90 const patterns = [
91 /\bpassword\s+(reset|change|update|expired)\b/,
92 /\breset\s+your\s+password\b/,
93 /\baccount\s+security\b/,
94 /\bsecurity\s+alert\b/,
95 /\bunusual\s+(sign[- ]?in|activity)\b/,
96 /\bverification\s+code\b/,
97 /\b(2fa|mfa|two[- ]factor)\b/,
98 /\bcompromised\s+account\b/,
99 /\baccount\s+(locked|suspended)\b/,
100 /\bsuspicious\s+activity\b/,
101 ];
102
103 for (const pattern of patterns) {
104 if (pattern.test(combined)) {
105 // Make sure it's not just marketing mentioning "saving" (false positive on "$36,645 on tuition")
106 // Real security alerts won't talk about tuition savings
107 if (/\bsaving.*\bon\s+tuition\b|\btuition.*\bsaving\b/.test(combined)) {
108 return null; // Just marketing
109 }
110 return {
111 pertains: true,
112 reason: "Security/password alert - always important",
113 confidence: 1.0,
114 matched_rules: ["security_alert"]
115 };
116 }
117 }
118
119 return null;
120 }
121
122 private checkStudentAction(subject: string, body: string, combined: string): ClassificationResult | null {
123 const patterns = [
124 /\bapplication\s+(received|complete|submitted|confirmation)\b/,
125 /\breceived\s+your\s+application\b/,
126 /\bthank\s+you\s+for\s+(applying|submitting)\b/,
127 /\benrollment\s+confirmation\b/,
128 /\bconfirmation\s+(of|for)\s+(your\s+)?(application|enrollment)\b/,
129 /\byour\s+application\s+(has\s+been|is)\s+(received|complete)\b/,
130 ];
131
132 for (const pattern of patterns) {
133 if (pattern.test(combined)) {
134 // But exclude if it's just marketing about "how to apply"
135 if (/\bhow\s+to\s+apply\b|\bapply\s+now\b|\bstart\s+(your\s+)?application\b/.test(combined)) {
136 return null;
137 }
138 // Exclude "haven't received your application" or "we haven't received" (outreach)
139 if (/\bhaven'?t\s+received\s+your\s+application\b|\bwe\s+haven'?t\s+received\b/.test(combined)) {
140 return null;
141 }
142 // Exclude "before you apply" or "help with before you apply" (outreach)
143 if (/\bbefore\s+you\s+apply\b/.test(combined)) {
144 return null;
145 }
146 return {
147 pertains: true,
148 reason: "Confirmation of student action (application/enrollment)",
149 confidence: 0.95,
150 matched_rules: ["student_action_confirmation"]
151 };
152 }
153 }
154
155 return null;
156 }
157
158 private checkAccepted(subject: string, body: string, combined: string): ClassificationResult | null {
159 const patterns = [
160 /\baccepted\s+(student\s+)?portal\b/,
161 /\byour\s+(personalized\s+)?accepted\s+portal\b/,
162 /\bdeposit\s+(today|now|by|to\s+reserve)\b/,
163 /\breserve\s+your\s+(place|spot)\b/,
164 /\bcongratulations.*\baccepted\b/,
165 /\byou\s+(have\s+been|are|were)\s+accepted\b/,
166 /\badmission\s+(decision|offer)\b/,
167 /\benroll(ment)?\s+deposit\b/,
168 ];
169
170 for (const pattern of patterns) {
171 if (pattern.test(combined)) {
172 // Exclude pre-admission and marketing
173 if (/\bacceptance\s+rate\b|\bhigh\s+acceptance\b|\bpre[- ]admit(ted)?\b|\bautomatic\s+admission\b/.test(combined)) {
174 return null;
175 }
176 // Exclude "direct admit/admission" marketing that asks to complete profile
177 if (/\bdirect\s+(admit(ted)?|admission)\b.*\b(complete|submit).*\bprofile\b|\b(complete|submit).*\bprofile\b.*\bdirect\s+(admit(ted)?|admission)\b/.test(combined)) {
178 return null;
179 }
180 // Exclude marketing about future admission decisions
181 if (/\byou\s+will\s+(also\s+)?receive\s+(an?\s+)?(accelerated\s+)?admission\s+decision\b/.test(combined)) {
182 return null;
183 }
184 if (/\breceive\s+an\s+admission\s+decision\s+within\b/.test(combined)) {
185 return null;
186 }
187 // Exclude "Priority Student" spam that asks to submit application
188 if (/\bpriority\s+student\b.*\bsubmit.*application\b|\bsubmit.*\bpriority\s+student\s+application\b/.test(combined)) {
189 return null;
190 }
191 // Exclude if asking to submit ANY application (not accepted yet)
192 if (/\bsubmit\s+(your\s+)?(the\s+)?application\b/.test(combined)) {
193 return null;
194 }
195 // Exclude "once you are accepted" - means they're not accepted yet
196 if (/\bonce\s+you\s+(are|have\s+been)\s+accepted\b/.test(combined)) {
197 return null;
198 }
199 // Exclude "reserve your spot" for events/webinars (not enrollment)
200 if (/\breserve\s+your\s+spot\b/.test(combined) && /\b(virtual|webinar|event|program|zoom|session)\b/.test(combined)) {
201 return null;
202 }
203 // Exclude "top candidate" spam asking to apply/start application
204 if (/\btop\s+candidate\b.*\b(apply|start.*application|submit.*application)\b/.test(combined)) {
205 return null;
206 }
207 if (/\binvite\s+you\s+to\s+apply\b/.test(combined)) {
208 return null;
209 }
210 // Exclude application deadline marketing (Early Decision/Action, priority deadlines, etc.)
211 if (/\b(early\s+(decision|action)|priority)\b.*\b(deadline|apply|application)\b.*\b(approaching|by|extended)\b/.test(combined)) {
212 return null;
213 }
214 if (/\bapply\s+(by|now|right\s+away|today)\b|\bdeadline.*\b(december|january|february|march)\b/.test(combined)) {
215 return null;
216 }
217 // Exclude "Panther Priority Application" and similar marketing
218 if (/\bpanther\s+priority\s+application\b|\bpriority\s+application\b/.test(combined)) {
219 return null;
220 }
221 // Exclude "deadline details" marketing spam
222 if (/\bdeadline\s+details\b|\byour\s+deadline\b/.test(combined)) {
223 return null;
224 }
225 // Exclude "application deadline will be" (future deadline announcements)
226 if (/\bapplication\s+deadline\s+will\s+be\b/.test(combined)) {
227 return null;
228 }
229 // Exclude "flip these pages" and similar exploratory marketing
230 if (/\bflip\s+these\s+pages\b|\blearn\s+more\s+about\s+being\b/.test(combined)) {
231 return null;
232 }
233 // Exclude "want to make sure you're ready" deadline pressure
234 if (/\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b/.test(combined)) {
235 return null;
236 }
237 // Exclude "we're interested in you" with apply language
238 if (/\bwe'?re\s+interested\s+in\s+you\b/.test(combined) && /\bapply\b/.test(combined)) {
239 return null;
240 }
241 // Exclude "you have until midnight/tonight to apply" deadline pressure
242 if (/\byou\s+have\s+until\b.*\b(midnight|tonight|today)\b.*\bto\s+apply\b/.test(combined)) {
243 return null;
244 }
245 // Exclude "I'm giving you until midnight/tonight" deadline extensions
246 if (/\bgiving\s+you\s+until\b.*\b(midnight|tonight|today)\b.*\bto\s+apply\b/.test(combined)) {
247 return null;
248 }
249 // Exclude "apply by the [month] deadline" marketing
250 if (/\bapply\s+by\s+the\s+(january|february|march|april|may|june|july|august|september|october|november|december)\b.*\bdeadline\b/.test(combined)) {
251 return null;
252 }
253 // Exclude fee waiver deadline pressure
254 if (/\bfee\s+waiver\b.*\b(ends|today|tonight|last\s+day)\b|\b(today|tonight).*\blast\s+day\s+for.*\bfee\s+waiver\b/.test(combined)) {
255 return null;
256 }
257 // Exclude "complete your application" with perks/benefits (priority status, no essay, etc.)
258 if (/\bcomplete\s+your\s+application\b.*\b(priority|perks|benefits|no\s+application\s+fee|no\s+essay)\b/.test(combined)) {
259 return null;
260 }
261 // Exclude "apply for free" or "waiving your fee" marketing
262 if (/\bapply\s+for\s+free\b|\bwaiving\s+your.*\bfee\b|\bwe'?re\s+waiving\s+your\b/.test(combined)) {
263 return null;
264 }
265 // Exclude "apply and enroll" combined deadline marketing
266 if (/\bapply\s+and\s+enroll\b.*\bfree\b/.test(combined)) {
267 return null;
268 }
269 // Exclude "haven't received your application" or "we haven't received" (outreach)
270 if (/\bhaven'?t\s+received\s+your\s+application\b|\bwe\s+haven'?t\s+received\b/.test(combined)) {
271 return null;
272 }
273 return {
274 pertains: true,
275 reason: "Accepted student portal/deposit information",
276 confidence: 0.95,
277 matched_rules: ["accepted_student"]
278 };
279 }
280 }
281
282 return null;
283 }
284
285 private checkDualEnrollment(subject: string, body: string, combined: string, from: string): ClassificationResult | null {
286 // Check for dual enrollment patterns
287 const dualEnrollmentIndicators = [
288 /\bdual\s+enrollment\b/,
289 /\bcourse\s+(registration|deletion|added|dropped)\b/,
290 /\bspring\s+\d{4}\s+(course|on[- ]campus)\b/,
291 /\bhow\s+to\s+register\b.*\b(course|class)/,
292 /\bcedarville\s+university\).*\b(course|registration)\b/,
293 ];
294
295 for (const pattern of dualEnrollmentIndicators) {
296 if (pattern.test(combined)) {
297 // Dual enrollment is relevant if it's about actual courses, not marketing
298 if (/\blearn\s+more\s+about\b|\binterested\s+in\b|\bconsider\s+joining\b/.test(combined)) {
299 return null; // Just marketing
300 }
301 // Exclude general "explore your academic interests" marketing
302 if (/\bfreedom\s+to\s+explore\b.*\bacademic\s+interests\b|\bmajors,?\s+minors\s+and\s+more\b/.test(combined)) {
303 return null;
304 }
305 return {
306 pertains: true,
307 reason: "Dual enrollment course information",
308 confidence: 0.9,
309 matched_rules: ["dual_enrollment"]
310 };
311 }
312 }
313
314 return null;
315 }
316
317 private checkScholarship(subject: string, body: string, combined: string): ClassificationResult | null {
318 // Check for specific scholarship application opportunities FIRST (for accepted/enrolled students)
319 // This is different from general "apply for scholarships" marketing
320 if (/\bapply\s+for\s+(the\s+)?.*\bscholarship\b/.test(subject)) {
321 // Check if it's specific (President's, Ministry, named scholarships)
322 if (/\bpresident'?s\b|\bministry\b|\bimpact\b/.test(combined)) {
323 return {
324 pertains: true,
325 reason: "Scholarship application opportunity for accepted student",
326 confidence: 0.75,
327 matched_rules: ["scholarship_application_opportunity"]
328 };
329 }
330 }
331
332 // Negative indicators: not actually awarded - check these before awarded patterns
333 const notAwardedPatterns = [
334 /\bscholarship\b.*\b(held|reserved)\s+for\s+you\b/,
335 /\b(held|reserved)\s+for\s+you\b/,
336 /\bconsider(ed|ation)\b.*\bscholarship\b/,
337 /\bscholarship\b.*\bconsider(ed|ation)\b/,
338 /\beligible\s+for\b.*\bscholarship\b/,
339 /\bscholarship\b.*\beligible\b/,
340 /\bmay\s+qualify\b.*\bscholarship\b/,
341 /\bguaranteed\s+admission\b/,
342 /\bpriority\s+consideration\b/,
343 // Scholarship events/days (attend to get scholarship = not awarded)
344 /\b(attend|register\s+for).*\bscholarship\s+(day|event|award\s+event)\b/,
345 /\bscholarship\s+(day|event).*\b(attend|register)\b/,
346 /\bsoar\s+(scholarship\s+award\s+)?event\b/,
347 // Direct admission/scholarship forms to submit (not awarded yet)
348 /\bdirect\s+admission\b.*\bscholarship\s+form\b/,
349 /\bscholarship\s+form\b.*\bdirect\s+admission\b/,
350 /\bsubmit\s+(your\s+)?.*\bscholarship\s+form\b/,
351 // "Want to make sure you're ready" deadline pressure with scholarship mention
352 /\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b.*\bscholarship\b/,
353 // Scholarship estimate (not actual award)
354 /\bscholarship\s+estimate\b/,
355 /\byou\s+have\s+not\s+(yet\s+)?seen\s+your.*\bscholarship\b/,
356 /\bacademic\s+scholarship\s+estimate\b/,
357 // "You've earned a scholarship" but says "pre-admission" (not actually awarded)
358 /\bpre[- ]admission\b/,
359 // Scholarship deadline approaching (apply for it, not awarded)
360 /\bscholarship\s+deadline\s+(approaching|soon)\b/,
361 // "upon admission" means not awarded yet
362 /\bscholarship\b.*\bupon\s+admission\b|\bupon\s+admission\b.*\bscholarship\b/,
363 ];
364
365 // Check if scholarship is mentioned but not awarded
366 const hasScholarshipMention = /\bscholarship\b/.test(combined);
367 if (hasScholarshipMention) {
368 for (const pattern of notAwardedPatterns) {
369 if (pattern.test(combined)) {
370 return {
371 pertains: false,
372 reason: "Scholarship mentioned but not actually awarded (held/eligible/apply)",
373 confidence: 0.9,
374 matched_rules: ["scholarship_not_awarded"]
375 };
376 }
377 }
378 }
379
380 // Positive indicators: actually awarded
381 const awardedPatterns = [
382 /\bcongratulations\b.*\bscholarship\b/,
383 /\byou\s+(have|received|are\s+awarded|won)\b.*\bscholarship\b/,
384 /\bwe\s+(are\s+)?(pleased\s+to\s+)?award(ing)?\b.*\bscholarship\b/,
385 /\bscholarship\s+(offer|award)\b/,
386 /\breceived\s+a\s+scholarship\b/,
387 ];
388
389 for (const pattern of awardedPatterns) {
390 if (pattern.test(combined)) {
391 return {
392 pertains: true,
393 reason: "Scholarship actually awarded",
394 confidence: 0.95,
395 matched_rules: ["scholarship_awarded"]
396 };
397 }
398 }
399
400 return null;
401 }
402
403 private checkFinancialAid(subject: string, body: string, combined: string): ClassificationResult | null {
404 // Positive: aid is ready
405 const readyPatterns = [
406 /\bfinancial\s+aid\b.*\boffer\b.*\b(ready|available)\b/,
407 /\b(ready|available)\b.*\bfinancial\s+aid\b.*\boffer\b/,
408 /\baward\s+letter\b.*\b(ready|available|posted|view)\b/,
409 /\b(view|review)\s+(your\s+)?award\s+letter\b/,
410 /\bfinancial\s+aid\s+package\b.*\b(ready|available|posted)\b/,
411 /\byour\s+aid\s+is\s+ready\b/,
412 ];
413
414 // Negative: aid applications, FAFSA reminders
415 const notReadyPatterns = [
416 /\blearn\s+more\s+about\b.*\bfinancial\s+aid\b/,
417 /\bapply\b.*\b(for\s+)?financial\s+aid\b/,
418 /\bfinancial\s+aid\b.*\bapplication\b/,
419 /\bcomplete\s+(your\s+)?fafsa\b/,
420 /\bconsidered\s+for\b.*\baid\b/,
421 /\bpriority\s+(deadline|consideration)\b.*\bfinancial\s+aid\b/,
422 ];
423
424 for (const pattern of readyPatterns) {
425 if (pattern.test(combined)) {
426 // Check for negative indicators
427 for (const negPattern of notReadyPatterns) {
428 if (negPattern.test(combined)) {
429 return null; // Just application info
430 }
431 }
432 return {
433 pertains: true,
434 reason: "Financial aid offer ready to review",
435 confidence: 0.95,
436 matched_rules: ["financial_aid_ready"]
437 };
438 }
439 }
440
441 return null;
442 }
443
444 private checkIrrelevant(subject: string, body: string, combined: string, from: string): ClassificationResult | null {
445 // Strong indicators of marketing/spam
446 const irrelevantPatterns = [
447 // Newsletter/blog content
448 /\bstudent\s+life\s+blog\b/,
449 /\b(student\s+life\s+)?blog\s+(post|update)\b/,
450 /\bnew\s+student\s+life\s+blog\b/,
451 /\bnewsletter\b/,
452 /\bweekly\s+(digest|update)\b/,
453
454 // Marketing events
455 /\bupcoming\s+events\b/,
456 /\bjoin\s+us\s+(for|at|on\s+zoom)\b/,
457 /\bopen\s+house\b/,
458 /\bvirtual\s+tour\b/,
459 /\bcampus\s+(visit|tour|event)\b/,
460 /\bmeet\s+(the|our)\s+(students|faculty)\b/,
461
462 // Generic outreach (not applied yet)
463 /\bhaven'?t\s+applied.*yet\b/,
464 /\bstill\s+time\s+to\s+apply\b/,
465 /\bhow\s+is\s+your\s+college\s+search\b/,
466 /\bstart\s+(your\s+)?college\s+search\b/,
467 /\bexplore\s+(our\s+)?(programs|campus)\b/,
468
469 // Unsolicited outreach patterns
470 /\bi\s+hope\s+you\s+have\s+been\s+receiving\s+my\s+emails\b/,
471 /\bam\s+i\s+reaching\b/,
472 /\byou\s+are\s+on\s+.*\s+(radar|list)\b/,
473 /\byou'?re\s+on\s+(our|my)\s+radar\b/,
474 /\bi\s+want\s+to\s+make\s+sure\s+you\s+know\b/,
475 /\byou'?re\s+invited\s+to\s+submit\b/,
476 /\bi'?m\s+eager\s+to\s+consider\s+you\b/,
477 /\bsubmit\s+your\s+.*\s+application\b/,
478 /\bpriority\s+status\b.*\bsubmit.*application\b/,
479 /\btop\s+candidate\b.*\binvite\s+you\s+to\s+apply\b/,
480 /\binvite\s+you\s+to\s+apply\b/,
481
482 // Priority deadline extensions (spam)
483 /\bextended.*\bpriority\s+deadline\b/,
484 /\bpriority\s+deadline.*\bextended\b/,
485
486 // Summer camps/programs
487 /\bsummer\s+(academy|camp|program)\b/,
488 /\bsave\s+the\s+date\b/,
489
490 // Ugly sweaters and other fluff
491 /\bugly\s+sweater\b/,
492 /\bit'?s\s+.+\s+season\b/,
493
494 // FAFSA/scholarship info sessions (not actual aid offers)
495 /\bjoin\s+us.*\b(virtual\s+program|zoom)\b.*\b(scholarship|financial\s+aid)\b/,
496 /\blearn\s+more\b.*\b(scholarship|financial\s+aid)\s+(opportunities|options)\b/,
497 /\b(scholarship|financial\s+aid)\s+(opportunities|options)\b.*\blearn\s+more\b/,
498 ];
499
500 for (const pattern of irrelevantPatterns) {
501 if (pattern.test(combined)) {
502 return {
503 pertains: false,
504 reason: "Marketing/newsletter/unsolicited outreach",
505 confidence: 0.95,
506 matched_rules: ["irrelevant_marketing"]
507 };
508 }
509 }
510
511 // Haven't applied yet = not relevant
512 if (/\bhaven'?t\s+applied\b/.test(combined)) {
513 return {
514 pertains: false,
515 reason: "Unsolicited email where student has not applied",
516 confidence: 0.95,
517 matched_rules: ["not_applied"]
518 };
519 }
520
521 return null;
522 }
523}
524
525// Convenience function
526export function classifyEmail(email: EmailInput): ClassificationResult {
527 const classifier = new EmailClassifier();
528 return classifier.classify(email);
529}