tangled
alpha
login
or
join now
dunkirk.sh
/
herald
1
fork
atom
rss email digests over ssh because you're a cool kid
herald.dunkirk.sh
go
rss
rss-reader
ssh
charm
1
fork
atom
overview
issues
pulls
pipelines
feat: handle code blocks
dunkirk.sh
1 month ago
e88a96bc
bf4828ad
verified
This commit was signed with the committer's
known signature
.
dunkirk.sh
SSH Key Fingerprint:
SHA256:DqcG0RXYExE26KiWo3VxJnsxswN1QNfTBvB+bdSpk80=
+116
-10
2 changed files
expand all
collapse all
unified
split
email
render.go
render_test.go
+69
-10
email/render.go
···
3
3
import (
4
4
"bytes"
5
5
"embed"
6
6
+
"fmt"
6
7
htmltemplate "html/template"
7
8
"regexp"
8
9
"strings"
···
54
55
// emailUnsafeTags are HTML5 semantic tags not supported by most email clients (Gmail, Outlook, etc.)
55
56
var emailUnsafeTags = regexp.MustCompile(`</?(?:article|section|nav|header|footer|aside|main|figure|figcaption|details|summary|mark|time|dialog)(?:\s[^>]*)?>`)
56
57
58
58
+
// spanTags matches span tags (used to strip syntax highlighting noise from code blocks)
59
59
+
var spanTags = regexp.MustCompile(`</?span(?:\s[^>]*)?>`)
60
60
+
61
61
+
// preTagOpen matches opening pre tags to add styling
62
62
+
var preTagOpen = regexp.MustCompile(`<pre(?:\s[^>]*)?>`)
63
63
+
64
64
+
// codeBlockStyle is inline CSS for code blocks in emails
65
65
+
const codeBlockStyle = `<pre style="background-color:#f5f5f5;padding:12px;border-radius:4px;overflow-x:auto;font-family:monospace;font-size:13px;line-height:1.4">`
66
66
+
57
67
// sanitizeHTML sanitizes HTML content, allowing safe tags while stripping styles and unsafe elements
58
68
func sanitizeHTML(html string) string {
59
69
sanitized := policy.Sanitize(html)
60
70
// Strip HTML5 semantic tags that email clients don't support
61
61
-
return emailUnsafeTags.ReplaceAllString(sanitized, "")
71
71
+
sanitized = emailUnsafeTags.ReplaceAllString(sanitized, "")
72
72
+
// Strip span tags (removes syntax highlighting noise from code blocks)
73
73
+
sanitized = spanTags.ReplaceAllString(sanitized, "")
74
74
+
// Add styling to pre tags for better code block appearance
75
75
+
sanitized = preTagOpen.ReplaceAllString(sanitized, codeBlockStyle)
76
76
+
return sanitized
62
77
}
63
78
64
79
// htmlTagRegex matches HTML tags for stripping
65
80
var htmlTagRegex = regexp.MustCompile(`<[^>]*>`)
66
81
82
82
+
// preBlockRegex matches pre blocks including content
83
83
+
var preBlockRegex = regexp.MustCompile(`(?s)<pre[^>]*>(.*?)</pre>`)
84
84
+
85
85
+
// whitespaceCollapse collapses multiple whitespace chars
86
86
+
var whitespaceCollapse = regexp.MustCompile(`[ \t]+`)
87
87
+
88
88
+
// multipleNewlines collapses 3+ newlines to 2
89
89
+
var multipleNewlines = regexp.MustCompile(`\n{3,}`)
90
90
+
91
91
+
// decodeEntities decodes common HTML entities
92
92
+
func decodeEntities(text string) string {
93
93
+
text = strings.ReplaceAll(text, "&", "&")
94
94
+
text = strings.ReplaceAll(text, "<", "<")
95
95
+
text = strings.ReplaceAll(text, ">", ">")
96
96
+
text = strings.ReplaceAll(text, """, "\"")
97
97
+
text = strings.ReplaceAll(text, "'", "'")
98
98
+
text = strings.ReplaceAll(text, " ", " ")
99
99
+
return text
100
100
+
}
101
101
+
67
102
// stripHTML removes all HTML tags and decodes entities for plain text output
68
103
func stripHTML(html string) string {
69
104
// First sanitize to ensure we're working with clean HTML
70
105
sanitized := policy.Sanitize(html)
106
106
+
107
107
+
// Extract code blocks and replace with placeholders
108
108
+
var codeBlocks []string
109
109
+
sanitized = preBlockRegex.ReplaceAllStringFunc(sanitized, func(match string) string {
110
110
+
inner := preBlockRegex.FindStringSubmatch(match)
111
111
+
if len(inner) < 2 {
112
112
+
return match
113
113
+
}
114
114
+
code := inner[1]
115
115
+
// Strip any remaining tags (like spans for syntax highlighting)
116
116
+
code = htmlTagRegex.ReplaceAllString(code, "")
117
117
+
code = decodeEntities(code)
118
118
+
// Indent each line with 4 spaces
119
119
+
lines := strings.Split(strings.TrimSpace(code), "\n")
120
120
+
for i, line := range lines {
121
121
+
lines[i] = " " + line
122
122
+
}
123
123
+
codeBlocks = append(codeBlocks, strings.Join(lines, "\n"))
124
124
+
return fmt.Sprintf("\n\n__CODEBLOCK_%d__\n\n", len(codeBlocks)-1)
125
125
+
})
126
126
+
71
127
// Strip all remaining HTML tags
72
128
text := htmlTagRegex.ReplaceAllString(sanitized, "")
73
73
-
// Decode common HTML entities
74
74
-
text = strings.ReplaceAll(text, "&", "&")
75
75
-
text = strings.ReplaceAll(text, "<", "<")
76
76
-
text = strings.ReplaceAll(text, ">", ">")
77
77
-
text = strings.ReplaceAll(text, """, "\"")
78
78
-
text = strings.ReplaceAll(text, "'", "'")
79
79
-
text = strings.ReplaceAll(text, " ", " ")
80
80
-
// Collapse multiple whitespace/newlines
81
81
-
text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ")
129
129
+
// Decode entities
130
130
+
text = decodeEntities(text)
131
131
+
// Collapse horizontal whitespace (but preserve newlines for structure)
132
132
+
text = whitespaceCollapse.ReplaceAllString(text, " ")
133
133
+
// Collapse excessive newlines
134
134
+
text = multipleNewlines.ReplaceAllString(text, "\n\n")
135
135
+
136
136
+
// Restore code blocks
137
137
+
for i, block := range codeBlocks {
138
138
+
text = strings.ReplaceAll(text, fmt.Sprintf("__CODEBLOCK_%d__", i), block)
139
139
+
}
140
140
+
82
141
return strings.TrimSpace(text)
83
142
}
84
143
+47
email/render_test.go
···
134
134
t.Error("Text content was not preserved after HTML stripping")
135
135
}
136
136
}
137
137
+
138
138
+
func TestRenderDigest_CodeBlockFormatting(t *testing.T) {
139
139
+
data := &DigestData{
140
140
+
ConfigName: "Test Config",
141
141
+
TotalItems: 1,
142
142
+
FeedGroups: []FeedGroup{
143
143
+
{
144
144
+
FeedName: "Test Feed",
145
145
+
FeedURL: "https://example.com/feed",
146
146
+
Items: []FeedItem{
147
147
+
{
148
148
+
Title: "Test Article",
149
149
+
Link: "https://example.com/article",
150
150
+
Content: `<p>Code example:</p><pre><span class="c1"># comment</span>
151
151
+
echo hello</pre><p>Done.</p>`,
152
152
+
Published: time.Now(),
153
153
+
},
154
154
+
},
155
155
+
},
156
156
+
},
157
157
+
}
158
158
+
159
159
+
htmlOutput, textOutput, err := RenderDigest(data, true, 30, false, false)
160
160
+
if err != nil {
161
161
+
t.Fatalf("RenderDigest failed: %v", err)
162
162
+
}
163
163
+
164
164
+
// HTML: verify code block has styling
165
165
+
if !strings.Contains(htmlOutput, `<pre style="background-color:#f5f5f5`) {
166
166
+
t.Error("HTML code block missing styling")
167
167
+
}
168
168
+
169
169
+
// HTML: verify syntax highlighting spans are stripped
170
170
+
if strings.Contains(htmlOutput, `class="c1"`) {
171
171
+
t.Error("Syntax highlighting classes should be stripped")
172
172
+
}
173
173
+
174
174
+
// Text: verify code is indented
175
175
+
if !strings.Contains(textOutput, " # comment") {
176
176
+
t.Error("Text code block should be indented with 4 spaces")
177
177
+
}
178
178
+
179
179
+
// Text: verify no HTML tags in code block
180
180
+
if strings.Contains(textOutput, "<span") || strings.Contains(textOutput, "<pre") {
181
181
+
t.Error("Text output should not contain HTML tags")
182
182
+
}
183
183
+
}