this repo has no description
1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "net/url"
9 "path"
10 "regexp"
11 "strings"
12
13 "github.com/microcosm-cc/bluemonday"
14 "github.com/yuin/goldmark"
15 "github.com/yuin/goldmark/ast"
16 "github.com/yuin/goldmark/extension"
17 "github.com/yuin/goldmark/parser"
18 "github.com/yuin/goldmark/renderer/html"
19 "github.com/yuin/goldmark/text"
20 "github.com/yuin/goldmark/util"
21 htmlparse "golang.org/x/net/html"
22
23 "tangled.sh/tangled.sh/core/appview/pages/repoinfo"
24)
25
26// RendererType defines the type of renderer to use based on context
27type RendererType int
28
29const (
30 // RendererTypeRepoMarkdown is for repository documentation markdown files
31 RendererTypeRepoMarkdown RendererType = iota
32 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
33 RendererTypeDefault
34)
35
36// RenderContext holds the contextual data for rendering markdown.
37// It can be initialized empty, and that'll skip any transformations.
38type RenderContext struct {
39 CamoUrl string
40 CamoSecret string
41 repoinfo.RepoInfo
42 IsDev bool
43 RendererType RendererType
44 Sanitizer Sanitizer
45}
46
47type Sanitizer struct {
48 defaultPolicy *bluemonday.Policy
49}
50
51func (rctx *RenderContext) RenderMarkdown(source string) string {
52 md := goldmark.New(
53 goldmark.WithExtensions(extension.GFM),
54 goldmark.WithParserOptions(
55 parser.WithAutoHeadingID(),
56 ),
57 goldmark.WithRendererOptions(html.WithUnsafe()),
58 )
59
60 if rctx != nil {
61 var transformers []util.PrioritizedValue
62
63 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
64
65 md.Parser().AddOptions(
66 parser.WithASTTransformers(transformers...),
67 )
68 }
69
70 var buf bytes.Buffer
71 if err := md.Convert([]byte(source), &buf); err != nil {
72 return source
73 }
74
75 var processed strings.Builder
76 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
77 return source
78 }
79
80 return processed.String()
81}
82
83func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
84 node, err := htmlparse.Parse(io.MultiReader(
85 strings.NewReader("<html><body>"),
86 input,
87 strings.NewReader("</body></html>"),
88 ))
89 if err != nil {
90 return fmt.Errorf("failed to parse html: %w", err)
91 }
92
93 if node.Type == htmlparse.DocumentNode {
94 node = node.FirstChild
95 }
96
97 visitNode(ctx, node)
98
99 newNodes := make([]*htmlparse.Node, 0, 5)
100
101 if node.Data == "html" {
102 node = node.FirstChild
103 for node != nil && node.Data != "body" {
104 node = node.NextSibling
105 }
106 }
107 if node != nil {
108 if node.Data == "body" {
109 child := node.FirstChild
110 for child != nil {
111 newNodes = append(newNodes, child)
112 child = child.NextSibling
113 }
114 } else {
115 newNodes = append(newNodes, node)
116 }
117 }
118
119 for _, node := range newNodes {
120 if err := htmlparse.Render(output, node); err != nil {
121 return fmt.Errorf("failed to render processed html: %w", err)
122 }
123 }
124
125 return nil
126}
127
128func visitNode(ctx *RenderContext, node *htmlparse.Node) {
129 switch node.Type {
130 case htmlparse.ElementNode:
131 if node.Data == "img" || node.Data == "source" {
132 for i, attr := range node.Attr {
133 if attr.Key != "src" {
134 continue
135 }
136
137 camoUrl, _ := url.Parse(ctx.CamoUrl)
138 dstUrl, _ := url.Parse(attr.Val)
139 if dstUrl.Host != camoUrl.Host {
140 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
141 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
142 node.Attr[i] = attr
143 }
144 }
145 }
146
147 for n := node.FirstChild; n != nil; n = n.NextSibling {
148 visitNode(ctx, n)
149 }
150 default:
151 }
152}
153
154func (rctx *RenderContext) SanitizeDefault(html string) string {
155 return rctx.Sanitizer.defaultPolicy.Sanitize(html)
156}
157
158func NewSanitizer() Sanitizer {
159 return Sanitizer{
160 defaultPolicy: defaultPolicy(),
161 }
162}
163func defaultPolicy() *bluemonday.Policy {
164 policy := bluemonday.UGCPolicy()
165
166 // Allow generally safe attributes
167 generalSafeAttrs := []string{
168 "abbr", "accept", "accept-charset",
169 "accesskey", "action", "align", "alt",
170 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
171 "axis", "border", "cellpadding", "cellspacing", "char",
172 "charoff", "charset", "checked",
173 "clear", "cols", "colspan", "color",
174 "compact", "coords", "datetime", "dir",
175 "disabled", "enctype", "for", "frame",
176 "headers", "height", "hreflang",
177 "hspace", "ismap", "label", "lang",
178 "maxlength", "media", "method",
179 "multiple", "name", "nohref", "noshade",
180 "nowrap", "open", "prompt", "readonly", "rel", "rev",
181 "rows", "rowspan", "rules", "scope",
182 "selected", "shape", "size", "span",
183 "start", "summary", "tabindex", "target",
184 "title", "type", "usemap", "valign", "value",
185 "vspace", "width", "itemprop",
186 }
187
188 generalSafeElements := []string{
189 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
190 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
191 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
192 "details", "caption", "figure", "figcaption",
193 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
194 }
195
196 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
197
198 // video
199 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
200
201 // checkboxes
202 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
203 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
204
205 // centering content
206 policy.AllowElements("center")
207
208 policy.AllowAttrs("align", "style", "width", "height").Globally()
209 policy.AllowStyles(
210 "margin",
211 "padding",
212 "text-align",
213 "font-weight",
214 "text-decoration",
215 "padding-left",
216 "padding-right",
217 "padding-top",
218 "padding-bottom",
219 "margin-left",
220 "margin-right",
221 "margin-top",
222 "margin-bottom",
223 )
224
225 return policy
226}
227
228type MarkdownTransformer struct {
229 rctx *RenderContext
230}
231
232func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
233 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
234 if !entering {
235 return ast.WalkContinue, nil
236 }
237
238 switch a.rctx.RendererType {
239 case RendererTypeRepoMarkdown:
240 switch n := n.(type) {
241 case *ast.Link:
242 a.rctx.relativeLinkTransformer(n)
243 case *ast.Image:
244 a.rctx.imageFromKnotAstTransformer(n)
245 a.rctx.camoImageLinkAstTransformer(n)
246 }
247 case RendererTypeDefault:
248 switch n := n.(type) {
249 case *ast.Image:
250 a.rctx.imageFromKnotAstTransformer(n)
251 a.rctx.camoImageLinkAstTransformer(n)
252 }
253 }
254
255 return ast.WalkContinue, nil
256 })
257}
258
259func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
260
261 dst := string(link.Destination)
262
263 if isAbsoluteUrl(dst) {
264 return
265 }
266
267 actualPath := rctx.actualPath(dst)
268
269 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
270 link.Destination = []byte(newPath)
271}
272
273func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
274 if isAbsoluteUrl(dst) {
275 return dst
276 }
277
278 scheme := "https"
279 if rctx.IsDev {
280 scheme = "http"
281 }
282
283 actualPath := rctx.actualPath(dst)
284
285 parsedURL := &url.URL{
286 Scheme: scheme,
287 Host: rctx.Knot,
288 Path: path.Join("/",
289 rctx.RepoInfo.OwnerDid,
290 rctx.RepoInfo.Name,
291 "raw",
292 url.PathEscape(rctx.RepoInfo.Ref),
293 actualPath),
294 }
295 newPath := parsedURL.String()
296 return newPath
297}
298
299func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
300 dst := string(img.Destination)
301 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
302}
303
304// actualPath decides when to join the file path with the
305// current repository directory (essentially only when the link
306// destination is relative. if it's absolute then we assume the
307// user knows what they're doing.)
308func (rctx *RenderContext) actualPath(dst string) string {
309 if path.IsAbs(dst) {
310 return dst
311 }
312
313 return path.Join(rctx.CurrentDir, dst)
314}
315
316func isAbsoluteUrl(link string) bool {
317 parsed, err := url.Parse(link)
318 if err != nil {
319 return false
320 }
321 return parsed.IsAbs()
322}