rss email digests over ssh because you're a cool kid herald.dunkirk.sh
go rss rss-reader ssh charm
at main 238 lines 7.7 kB view raw
1package email 2 3import ( 4 "bytes" 5 "embed" 6 "fmt" 7 htmltemplate "html/template" 8 "regexp" 9 "strings" 10 texttemplate "text/template" 11 "time" 12 13 "github.com/microcosm-cc/bluemonday" 14) 15 16//go:embed templates/* 17var templateFS embed.FS 18 19type DigestData struct { 20 ConfigName string 21 TotalItems int 22 FeedGroups []FeedGroup 23} 24 25type FeedGroup struct { 26 FeedName string 27 FeedURL string 28 Items []FeedItem 29} 30 31type FeedItem struct { 32 Title string 33 Link string 34 Content string 35 Published time.Time 36} 37 38// templateFeedItem is used for template rendering with sanitized HTML content 39type templateFeedItem struct { 40 Title string 41 Link string 42 Content string // Original content (unused, kept for compatibility) 43 PlainContent string // HTML-stripped content for text template 44 SanitizedContent htmltemplate.HTML // Sanitized HTML for HTML template 45 Published time.Time 46} 47 48// templateFeedGroup is used for template rendering with sanitized items 49type templateFeedGroup struct { 50 FeedName string 51 FeedURL string 52 Items []templateFeedItem 53} 54 55// emailUnsafeTags are HTML5 semantic tags not supported by most email clients (Gmail, Outlook, etc.) 56var emailUnsafeTags = regexp.MustCompile(`</?(?:article|section|nav|header|footer|aside|main|figure|figcaption|details|summary|mark|time|dialog)(?:\s[^>]*)?>`) 57 58// spanTags matches span tags (used to strip syntax highlighting noise from code blocks) 59var spanTags = regexp.MustCompile(`</?span(?:\s[^>]*)?>`) 60 61// preTagOpen matches opening pre tags to add styling 62var preTagOpen = regexp.MustCompile(`<pre(?:\s[^>]*)?>`) 63 64// codeBlockStyle is inline CSS for code blocks in emails 65const codeBlockStyle = `<pre style="background-color:#f5f5f5;padding:12px;border-radius:4px;overflow-x:auto;font-family:monospace;font-size:13px;line-height:1.4">` 66 67// sanitizeHTML sanitizes HTML content, allowing safe tags while stripping styles and unsafe elements 68func sanitizeHTML(html string) string { 69 sanitized := policy.Sanitize(html) 70 // Strip HTML5 semantic tags that email clients don't support 71 sanitized = emailUnsafeTags.ReplaceAllString(sanitized, "") 72 // Strip span tags (removes syntax highlighting noise from code blocks) 73 sanitized = spanTags.ReplaceAllString(sanitized, "") 74 // Add styling to pre tags for better code block appearance 75 sanitized = preTagOpen.ReplaceAllString(sanitized, codeBlockStyle) 76 return sanitized 77} 78 79// htmlTagRegex matches HTML tags for stripping 80var htmlTagRegex = regexp.MustCompile(`<[^>]*>`) 81 82// preBlockRegex matches pre blocks including content 83var preBlockRegex = regexp.MustCompile(`(?s)<pre[^>]*>(.*?)</pre>`) 84 85// whitespaceCollapse collapses multiple whitespace chars 86var whitespaceCollapse = regexp.MustCompile(`[ \t]+`) 87 88// multipleNewlines collapses 3+ newlines to 2 89var multipleNewlines = regexp.MustCompile(`\n{3,}`) 90 91// decodeEntities decodes common HTML entities 92func decodeEntities(text string) string { 93 text = strings.ReplaceAll(text, "&amp;", "&") 94 text = strings.ReplaceAll(text, "&lt;", "<") 95 text = strings.ReplaceAll(text, "&gt;", ">") 96 text = strings.ReplaceAll(text, "&quot;", "\"") 97 text = strings.ReplaceAll(text, "&#39;", "'") 98 text = strings.ReplaceAll(text, "&nbsp;", " ") 99 return text 100} 101 102// stripHTML removes all HTML tags and decodes entities for plain text output 103func stripHTML(html string) string { 104 // First sanitize to ensure we're working with clean HTML 105 sanitized := policy.Sanitize(html) 106 107 // Extract code blocks and replace with placeholders 108 var codeBlocks []string 109 sanitized = preBlockRegex.ReplaceAllStringFunc(sanitized, func(match string) string { 110 inner := preBlockRegex.FindStringSubmatch(match) 111 if len(inner) < 2 { 112 return match 113 } 114 code := inner[1] 115 // Strip any remaining tags (like spans for syntax highlighting) 116 code = htmlTagRegex.ReplaceAllString(code, "") 117 code = decodeEntities(code) 118 // Indent each line with 4 spaces 119 lines := strings.Split(strings.TrimSpace(code), "\n") 120 for i, line := range lines { 121 lines[i] = " " + line 122 } 123 codeBlocks = append(codeBlocks, strings.Join(lines, "\n")) 124 return fmt.Sprintf("\n\n__CODEBLOCK_%d__\n\n", len(codeBlocks)-1) 125 }) 126 127 // Strip all remaining HTML tags 128 text := htmlTagRegex.ReplaceAllString(sanitized, "") 129 // Decode entities 130 text = decodeEntities(text) 131 // Collapse horizontal whitespace (but preserve newlines for structure) 132 text = whitespaceCollapse.ReplaceAllString(text, " ") 133 // Collapse excessive newlines 134 text = multipleNewlines.ReplaceAllString(text, "\n\n") 135 136 // Restore code blocks 137 for i, block := range codeBlocks { 138 text = strings.ReplaceAll(text, fmt.Sprintf("__CODEBLOCK_%d__", i), block) 139 } 140 141 return strings.TrimSpace(text) 142} 143 144var ( 145 htmlTmpl *htmltemplate.Template 146 textTmpl *texttemplate.Template 147 policy *bluemonday.Policy 148) 149 150func init() { 151 var err error 152 htmlTmpl, err = htmltemplate.ParseFS(templateFS, "templates/digest.html") 153 if err != nil { 154 panic("failed to parse HTML template: " + err.Error()) 155 } 156 textTmpl, err = texttemplate.ParseFS(templateFS, "templates/digest.txt") 157 if err != nil { 158 panic("failed to parse text template: " + err.Error()) 159 } 160 161 // Initialize HTML sanitization policy 162 // UGCPolicy allows safe HTML tags but strips styles and unsafe elements 163 // This prevents XSS attacks while allowing basic formatting 164 policy = bluemonday.UGCPolicy() 165} 166 167func RenderDigest(data *DigestData, inline bool, daysUntilExpiry int, showUrgentBanner, showWarningBanner bool) (html string, text string, err error) { 168 // Convert FeedGroups to templateFeedGroups with sanitized HTML content 169 sanitizedGroups := make([]templateFeedGroup, len(data.FeedGroups)) 170 for i, group := range data.FeedGroups { 171 sanitizedItems := make([]templateFeedItem, len(group.Items)) 172 for j, item := range group.Items { 173 sanitizedItems[j] = templateFeedItem{ 174 Title: item.Title, 175 Link: item.Link, 176 Content: item.Content, 177 PlainContent: stripHTML(item.Content), 178 SanitizedContent: htmltemplate.HTML(sanitizeHTML(item.Content)), // #nosec G203 -- Content is sanitized by bluemonday before conversion 179 Published: item.Published, 180 } 181 } 182 sanitizedGroups[i] = templateFeedGroup{ 183 FeedName: group.FeedName, 184 FeedURL: group.FeedURL, 185 Items: sanitizedItems, 186 } 187 } 188 189 // Prepare template data for HTML template (with sanitized content) 190 htmlTmplData := struct { 191 ConfigName string 192 TotalItems int 193 FeedGroups []templateFeedGroup 194 Inline bool 195 DaysUntilExpiry int 196 ShowUrgentBanner bool 197 ShowWarningBanner bool 198 }{ 199 ConfigName: data.ConfigName, 200 TotalItems: data.TotalItems, 201 FeedGroups: sanitizedGroups, 202 Inline: inline, 203 DaysUntilExpiry: daysUntilExpiry, 204 ShowUrgentBanner: showUrgentBanner, 205 ShowWarningBanner: showWarningBanner, 206 } 207 208 // Prepare template data for text template (with plain text content) 209 textTmplData := struct { 210 ConfigName string 211 TotalItems int 212 FeedGroups []templateFeedGroup 213 Inline bool 214 DaysUntilExpiry int 215 ShowUrgentBanner bool 216 ShowWarningBanner bool 217 }{ 218 ConfigName: data.ConfigName, 219 TotalItems: data.TotalItems, 220 FeedGroups: sanitizedGroups, 221 Inline: inline, 222 DaysUntilExpiry: daysUntilExpiry, 223 ShowUrgentBanner: showUrgentBanner, 224 ShowWarningBanner: showWarningBanner, 225 } 226 227 var htmlBuf, textBuf bytes.Buffer 228 229 if err = htmlTmpl.Execute(&htmlBuf, htmlTmplData); err != nil { 230 return "", "", err 231 } 232 233 if err = textTmpl.Execute(&textBuf, textTmplData); err != nil { 234 return "", "", err 235 } 236 237 return htmlBuf.String(), textBuf.String(), nil 238}