rss email digests over ssh because you're a cool kid
herald.dunkirk.sh
go
rss
rss-reader
ssh
charm
1package email
2
3import (
4 "bytes"
5 "embed"
6 "fmt"
7 htmltemplate "html/template"
8 "regexp"
9 "strings"
10 texttemplate "text/template"
11 "time"
12
13 "github.com/microcosm-cc/bluemonday"
14)
15
16//go:embed templates/*
17var templateFS embed.FS
18
19type DigestData struct {
20 ConfigName string
21 TotalItems int
22 FeedGroups []FeedGroup
23}
24
25type FeedGroup struct {
26 FeedName string
27 FeedURL string
28 Items []FeedItem
29}
30
31type FeedItem struct {
32 Title string
33 Link string
34 Content string
35 Published time.Time
36}
37
38// templateFeedItem is used for template rendering with sanitized HTML content
39type templateFeedItem struct {
40 Title string
41 Link string
42 Content string // Original content (unused, kept for compatibility)
43 PlainContent string // HTML-stripped content for text template
44 SanitizedContent htmltemplate.HTML // Sanitized HTML for HTML template
45 Published time.Time
46}
47
48// templateFeedGroup is used for template rendering with sanitized items
49type templateFeedGroup struct {
50 FeedName string
51 FeedURL string
52 Items []templateFeedItem
53}
54
55// emailUnsafeTags are HTML5 semantic tags not supported by most email clients (Gmail, Outlook, etc.)
56var emailUnsafeTags = regexp.MustCompile(`</?(?:article|section|nav|header|footer|aside|main|figure|figcaption|details|summary|mark|time|dialog)(?:\s[^>]*)?>`)
57
58// spanTags matches span tags (used to strip syntax highlighting noise from code blocks)
59var spanTags = regexp.MustCompile(`</?span(?:\s[^>]*)?>`)
60
61// preTagOpen matches opening pre tags to add styling
62var preTagOpen = regexp.MustCompile(`<pre(?:\s[^>]*)?>`)
63
64// codeBlockStyle is inline CSS for code blocks in emails
65const codeBlockStyle = `<pre style="background-color:#f5f5f5;padding:12px;border-radius:4px;overflow-x:auto;font-family:monospace;font-size:13px;line-height:1.4">`
66
67// sanitizeHTML sanitizes HTML content, allowing safe tags while stripping styles and unsafe elements
68func sanitizeHTML(html string) string {
69 sanitized := policy.Sanitize(html)
70 // Strip HTML5 semantic tags that email clients don't support
71 sanitized = emailUnsafeTags.ReplaceAllString(sanitized, "")
72 // Strip span tags (removes syntax highlighting noise from code blocks)
73 sanitized = spanTags.ReplaceAllString(sanitized, "")
74 // Add styling to pre tags for better code block appearance
75 sanitized = preTagOpen.ReplaceAllString(sanitized, codeBlockStyle)
76 return sanitized
77}
78
79// htmlTagRegex matches HTML tags for stripping
80var htmlTagRegex = regexp.MustCompile(`<[^>]*>`)
81
82// preBlockRegex matches pre blocks including content
83var preBlockRegex = regexp.MustCompile(`(?s)<pre[^>]*>(.*?)</pre>`)
84
85// whitespaceCollapse collapses multiple whitespace chars
86var whitespaceCollapse = regexp.MustCompile(`[ \t]+`)
87
88// multipleNewlines collapses 3+ newlines to 2
89var multipleNewlines = regexp.MustCompile(`\n{3,}`)
90
91// decodeEntities decodes common HTML entities
92func decodeEntities(text string) string {
93 text = strings.ReplaceAll(text, "&", "&")
94 text = strings.ReplaceAll(text, "<", "<")
95 text = strings.ReplaceAll(text, ">", ">")
96 text = strings.ReplaceAll(text, """, "\"")
97 text = strings.ReplaceAll(text, "'", "'")
98 text = strings.ReplaceAll(text, " ", " ")
99 return text
100}
101
102// stripHTML removes all HTML tags and decodes entities for plain text output
103func stripHTML(html string) string {
104 // First sanitize to ensure we're working with clean HTML
105 sanitized := policy.Sanitize(html)
106
107 // Extract code blocks and replace with placeholders
108 var codeBlocks []string
109 sanitized = preBlockRegex.ReplaceAllStringFunc(sanitized, func(match string) string {
110 inner := preBlockRegex.FindStringSubmatch(match)
111 if len(inner) < 2 {
112 return match
113 }
114 code := inner[1]
115 // Strip any remaining tags (like spans for syntax highlighting)
116 code = htmlTagRegex.ReplaceAllString(code, "")
117 code = decodeEntities(code)
118 // Indent each line with 4 spaces
119 lines := strings.Split(strings.TrimSpace(code), "\n")
120 for i, line := range lines {
121 lines[i] = " " + line
122 }
123 codeBlocks = append(codeBlocks, strings.Join(lines, "\n"))
124 return fmt.Sprintf("\n\n__CODEBLOCK_%d__\n\n", len(codeBlocks)-1)
125 })
126
127 // Strip all remaining HTML tags
128 text := htmlTagRegex.ReplaceAllString(sanitized, "")
129 // Decode entities
130 text = decodeEntities(text)
131 // Collapse horizontal whitespace (but preserve newlines for structure)
132 text = whitespaceCollapse.ReplaceAllString(text, " ")
133 // Collapse excessive newlines
134 text = multipleNewlines.ReplaceAllString(text, "\n\n")
135
136 // Restore code blocks
137 for i, block := range codeBlocks {
138 text = strings.ReplaceAll(text, fmt.Sprintf("__CODEBLOCK_%d__", i), block)
139 }
140
141 return strings.TrimSpace(text)
142}
143
144var (
145 htmlTmpl *htmltemplate.Template
146 textTmpl *texttemplate.Template
147 policy *bluemonday.Policy
148)
149
150func init() {
151 var err error
152 htmlTmpl, err = htmltemplate.ParseFS(templateFS, "templates/digest.html")
153 if err != nil {
154 panic("failed to parse HTML template: " + err.Error())
155 }
156 textTmpl, err = texttemplate.ParseFS(templateFS, "templates/digest.txt")
157 if err != nil {
158 panic("failed to parse text template: " + err.Error())
159 }
160
161 // Initialize HTML sanitization policy
162 // UGCPolicy allows safe HTML tags but strips styles and unsafe elements
163 // This prevents XSS attacks while allowing basic formatting
164 policy = bluemonday.UGCPolicy()
165}
166
167func RenderDigest(data *DigestData, inline bool, daysUntilExpiry int, showUrgentBanner, showWarningBanner bool) (html string, text string, err error) {
168 // Convert FeedGroups to templateFeedGroups with sanitized HTML content
169 sanitizedGroups := make([]templateFeedGroup, len(data.FeedGroups))
170 for i, group := range data.FeedGroups {
171 sanitizedItems := make([]templateFeedItem, len(group.Items))
172 for j, item := range group.Items {
173 sanitizedItems[j] = templateFeedItem{
174 Title: item.Title,
175 Link: item.Link,
176 Content: item.Content,
177 PlainContent: stripHTML(item.Content),
178 SanitizedContent: htmltemplate.HTML(sanitizeHTML(item.Content)), // #nosec G203 -- Content is sanitized by bluemonday before conversion
179 Published: item.Published,
180 }
181 }
182 sanitizedGroups[i] = templateFeedGroup{
183 FeedName: group.FeedName,
184 FeedURL: group.FeedURL,
185 Items: sanitizedItems,
186 }
187 }
188
189 // Prepare template data for HTML template (with sanitized content)
190 htmlTmplData := struct {
191 ConfigName string
192 TotalItems int
193 FeedGroups []templateFeedGroup
194 Inline bool
195 DaysUntilExpiry int
196 ShowUrgentBanner bool
197 ShowWarningBanner bool
198 }{
199 ConfigName: data.ConfigName,
200 TotalItems: data.TotalItems,
201 FeedGroups: sanitizedGroups,
202 Inline: inline,
203 DaysUntilExpiry: daysUntilExpiry,
204 ShowUrgentBanner: showUrgentBanner,
205 ShowWarningBanner: showWarningBanner,
206 }
207
208 // Prepare template data for text template (with plain text content)
209 textTmplData := struct {
210 ConfigName string
211 TotalItems int
212 FeedGroups []templateFeedGroup
213 Inline bool
214 DaysUntilExpiry int
215 ShowUrgentBanner bool
216 ShowWarningBanner bool
217 }{
218 ConfigName: data.ConfigName,
219 TotalItems: data.TotalItems,
220 FeedGroups: sanitizedGroups,
221 Inline: inline,
222 DaysUntilExpiry: daysUntilExpiry,
223 ShowUrgentBanner: showUrgentBanner,
224 ShowWarningBanner: showWarningBanner,
225 }
226
227 var htmlBuf, textBuf bytes.Buffer
228
229 if err = htmlTmpl.Execute(&htmlBuf, htmlTmplData); err != nil {
230 return "", "", err
231 }
232
233 if err = textTmpl.Execute(&textBuf, textTmplData); err != nil {
234 return "", "", err
235 }
236
237 return htmlBuf.String(), textBuf.String(), nil
238}