this repo has no description
1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "maps"
9 "net/url"
10 "path"
11 "regexp"
12 "slices"
13 "strings"
14
15 "github.com/alecthomas/chroma/v2"
16 chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
17 "github.com/alecthomas/chroma/v2/styles"
18 "github.com/microcosm-cc/bluemonday"
19 "github.com/yuin/goldmark"
20 highlighting "github.com/yuin/goldmark-highlighting/v2"
21 "github.com/yuin/goldmark/ast"
22 "github.com/yuin/goldmark/extension"
23 "github.com/yuin/goldmark/parser"
24 "github.com/yuin/goldmark/renderer/html"
25 "github.com/yuin/goldmark/text"
26 "github.com/yuin/goldmark/util"
27 htmlparse "golang.org/x/net/html"
28
29 "tangled.sh/tangled.sh/core/appview/pages/repoinfo"
30)
31
32// RendererType defines the type of renderer to use based on context
33type RendererType int
34
35const (
36 // RendererTypeRepoMarkdown is for repository documentation markdown files
37 RendererTypeRepoMarkdown RendererType = iota
38 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
39 RendererTypeDefault
40)
41
42// RenderContext holds the contextual data for rendering markdown.
43// It can be initialized empty, and that'll skip any transformations.
44type RenderContext struct {
45 CamoUrl string
46 CamoSecret string
47 repoinfo.RepoInfo
48 IsDev bool
49 RendererType RendererType
50 Sanitizer Sanitizer
51}
52
53type Sanitizer struct {
54 defaultPolicy *bluemonday.Policy
55}
56
57func (rctx *RenderContext) RenderMarkdown(source string) string {
58 md := goldmark.New(
59 goldmark.WithExtensions(
60 extension.GFM,
61 highlighting.NewHighlighting(
62 highlighting.WithFormatOptions(
63 chromahtml.Standalone(false),
64 chromahtml.WithClasses(true),
65 ),
66 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")),
67 ),
68 ),
69 goldmark.WithParserOptions(
70 parser.WithAutoHeadingID(),
71 ),
72 goldmark.WithRendererOptions(html.WithUnsafe()),
73 )
74
75 if rctx != nil {
76 var transformers []util.PrioritizedValue
77
78 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
79
80 md.Parser().AddOptions(
81 parser.WithASTTransformers(transformers...),
82 )
83 }
84
85 var buf bytes.Buffer
86 if err := md.Convert([]byte(source), &buf); err != nil {
87 return source
88 }
89
90 var processed strings.Builder
91 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
92 return source
93 }
94
95 return processed.String()
96}
97
98func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
99 node, err := htmlparse.Parse(io.MultiReader(
100 strings.NewReader("<html><body>"),
101 input,
102 strings.NewReader("</body></html>"),
103 ))
104 if err != nil {
105 return fmt.Errorf("failed to parse html: %w", err)
106 }
107
108 if node.Type == htmlparse.DocumentNode {
109 node = node.FirstChild
110 }
111
112 visitNode(ctx, node)
113
114 newNodes := make([]*htmlparse.Node, 0, 5)
115
116 if node.Data == "html" {
117 node = node.FirstChild
118 for node != nil && node.Data != "body" {
119 node = node.NextSibling
120 }
121 }
122 if node != nil {
123 if node.Data == "body" {
124 child := node.FirstChild
125 for child != nil {
126 newNodes = append(newNodes, child)
127 child = child.NextSibling
128 }
129 } else {
130 newNodes = append(newNodes, node)
131 }
132 }
133
134 for _, node := range newNodes {
135 if err := htmlparse.Render(output, node); err != nil {
136 return fmt.Errorf("failed to render processed html: %w", err)
137 }
138 }
139
140 return nil
141}
142
143func visitNode(ctx *RenderContext, node *htmlparse.Node) {
144 switch node.Type {
145 case htmlparse.ElementNode:
146 if node.Data == "img" || node.Data == "source" {
147 for i, attr := range node.Attr {
148 if attr.Key != "src" {
149 continue
150 }
151
152 camoUrl, _ := url.Parse(ctx.CamoUrl)
153 dstUrl, _ := url.Parse(attr.Val)
154 if dstUrl.Host != camoUrl.Host {
155 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
156 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
157 node.Attr[i] = attr
158 }
159 }
160 }
161
162 for n := node.FirstChild; n != nil; n = n.NextSibling {
163 visitNode(ctx, n)
164 }
165 default:
166 }
167}
168
169func (rctx *RenderContext) SanitizeDefault(html string) string {
170 return rctx.Sanitizer.defaultPolicy.Sanitize(html)
171}
172
173func NewSanitizer() Sanitizer {
174 return Sanitizer{
175 defaultPolicy: defaultPolicy(),
176 }
177}
178func defaultPolicy() *bluemonday.Policy {
179 policy := bluemonday.UGCPolicy()
180
181 // Allow generally safe attributes
182 generalSafeAttrs := []string{
183 "abbr", "accept", "accept-charset",
184 "accesskey", "action", "align", "alt",
185 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
186 "axis", "border", "cellpadding", "cellspacing", "char",
187 "charoff", "charset", "checked",
188 "clear", "cols", "colspan", "color",
189 "compact", "coords", "datetime", "dir",
190 "disabled", "enctype", "for", "frame",
191 "headers", "height", "hreflang",
192 "hspace", "ismap", "label", "lang",
193 "maxlength", "media", "method",
194 "multiple", "name", "nohref", "noshade",
195 "nowrap", "open", "prompt", "readonly", "rel", "rev",
196 "rows", "rowspan", "rules", "scope",
197 "selected", "shape", "size", "span",
198 "start", "summary", "tabindex", "target",
199 "title", "type", "usemap", "valign", "value",
200 "vspace", "width", "itemprop",
201 }
202
203 generalSafeElements := []string{
204 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
205 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
206 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
207 "details", "caption", "figure", "figcaption",
208 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
209 }
210
211 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
212
213 // video
214 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
215
216 // checkboxes
217 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
218 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
219
220 // for code blocks
221 policy.AllowAttrs("class").Matching(regexp.MustCompile(`chroma`)).OnElements("pre")
222 policy.AllowAttrs("class").Matching(regexp.MustCompile(strings.Join(slices.Collect(maps.Values(chroma.StandardTypes)), "|"))).OnElements("span")
223
224 // centering content
225 policy.AllowElements("center")
226
227 policy.AllowAttrs("align", "style", "width", "height").Globally()
228 policy.AllowStyles(
229 "margin",
230 "padding",
231 "text-align",
232 "font-weight",
233 "text-decoration",
234 "padding-left",
235 "padding-right",
236 "padding-top",
237 "padding-bottom",
238 "margin-left",
239 "margin-right",
240 "margin-top",
241 "margin-bottom",
242 )
243
244 return policy
245}
246
247type MarkdownTransformer struct {
248 rctx *RenderContext
249}
250
251func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
252 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
253 if !entering {
254 return ast.WalkContinue, nil
255 }
256
257 switch a.rctx.RendererType {
258 case RendererTypeRepoMarkdown:
259 switch n := n.(type) {
260 case *ast.Link:
261 a.rctx.relativeLinkTransformer(n)
262 case *ast.Image:
263 a.rctx.imageFromKnotAstTransformer(n)
264 a.rctx.camoImageLinkAstTransformer(n)
265 }
266 case RendererTypeDefault:
267 switch n := n.(type) {
268 case *ast.Image:
269 a.rctx.imageFromKnotAstTransformer(n)
270 a.rctx.camoImageLinkAstTransformer(n)
271 }
272 }
273
274 return ast.WalkContinue, nil
275 })
276}
277
278func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
279
280 dst := string(link.Destination)
281
282 if isAbsoluteUrl(dst) {
283 return
284 }
285
286 actualPath := rctx.actualPath(dst)
287
288 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
289 link.Destination = []byte(newPath)
290}
291
292func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
293 if isAbsoluteUrl(dst) {
294 return dst
295 }
296
297 scheme := "https"
298 if rctx.IsDev {
299 scheme = "http"
300 }
301
302 actualPath := rctx.actualPath(dst)
303
304 parsedURL := &url.URL{
305 Scheme: scheme,
306 Host: rctx.Knot,
307 Path: path.Join("/",
308 rctx.RepoInfo.OwnerDid,
309 rctx.RepoInfo.Name,
310 "raw",
311 url.PathEscape(rctx.RepoInfo.Ref),
312 actualPath),
313 }
314 newPath := parsedURL.String()
315 return newPath
316}
317
318func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
319 dst := string(img.Destination)
320 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
321}
322
323// actualPath decides when to join the file path with the
324// current repository directory (essentially only when the link
325// destination is relative. if it's absolute then we assume the
326// user knows what they're doing.)
327func (rctx *RenderContext) actualPath(dst string) string {
328 if path.IsAbs(dst) {
329 return dst
330 }
331
332 return path.Join(rctx.CurrentDir, dst)
333}
334
335func isAbsoluteUrl(link string) bool {
336 parsed, err := url.Parse(link)
337 if err != nil {
338 return false
339 }
340 return parsed.IsAbs()
341}