this repo has no description
1// Package markup is an umbrella package for all markups and their renderers. 2package markup 3 4import ( 5 "bytes" 6 "fmt" 7 "io" 8 "net/url" 9 "path" 10 "regexp" 11 "strings" 12 13 "github.com/microcosm-cc/bluemonday" 14 "github.com/yuin/goldmark" 15 "github.com/yuin/goldmark/ast" 16 "github.com/yuin/goldmark/extension" 17 "github.com/yuin/goldmark/parser" 18 "github.com/yuin/goldmark/renderer/html" 19 "github.com/yuin/goldmark/text" 20 "github.com/yuin/goldmark/util" 21 htmlparse "golang.org/x/net/html" 22 23 "tangled.sh/tangled.sh/core/appview/pages/repoinfo" 24) 25 26// RendererType defines the type of renderer to use based on context 27type RendererType int 28 29const ( 30 // RendererTypeRepoMarkdown is for repository documentation markdown files 31 RendererTypeRepoMarkdown RendererType = iota 32 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments. 33 RendererTypeDefault 34) 35 36// RenderContext holds the contextual data for rendering markdown. 37// It can be initialized empty, and that'll skip any transformations. 38type RenderContext struct { 39 CamoUrl string 40 CamoSecret string 41 repoinfo.RepoInfo 42 IsDev bool 43 RendererType RendererType 44 Sanitizer Sanitizer 45} 46 47type Sanitizer struct { 48 defaultPolicy *bluemonday.Policy 49} 50 51func (rctx *RenderContext) RenderMarkdown(source string) string { 52 md := goldmark.New( 53 goldmark.WithExtensions(extension.GFM), 54 goldmark.WithParserOptions( 55 parser.WithAutoHeadingID(), 56 ), 57 goldmark.WithRendererOptions(html.WithUnsafe()), 58 ) 59 60 if rctx != nil { 61 var transformers []util.PrioritizedValue 62 63 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000)) 64 65 md.Parser().AddOptions( 66 parser.WithASTTransformers(transformers...), 67 ) 68 } 69 70 var buf bytes.Buffer 71 if err := md.Convert([]byte(source), &buf); err != nil { 72 return source 73 } 74 75 var processed strings.Builder 76 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil { 77 return source 78 } 79 80 return processed.String() 81} 82 83func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { 84 node, err := htmlparse.Parse(io.MultiReader( 85 strings.NewReader("<html><body>"), 86 input, 87 strings.NewReader("</body></html>"), 88 )) 89 if err != nil { 90 return fmt.Errorf("failed to parse html: %w", err) 91 } 92 93 if node.Type == htmlparse.DocumentNode { 94 node = node.FirstChild 95 } 96 97 visitNode(ctx, node) 98 99 newNodes := make([]*htmlparse.Node, 0, 5) 100 101 if node.Data == "html" { 102 node = node.FirstChild 103 for node != nil && node.Data != "body" { 104 node = node.NextSibling 105 } 106 } 107 if node != nil { 108 if node.Data == "body" { 109 child := node.FirstChild 110 for child != nil { 111 newNodes = append(newNodes, child) 112 child = child.NextSibling 113 } 114 } else { 115 newNodes = append(newNodes, node) 116 } 117 } 118 119 for _, node := range newNodes { 120 if err := htmlparse.Render(output, node); err != nil { 121 return fmt.Errorf("failed to render processed html: %w", err) 122 } 123 } 124 125 return nil 126} 127 128func visitNode(ctx *RenderContext, node *htmlparse.Node) { 129 switch node.Type { 130 case htmlparse.ElementNode: 131 if node.Data == "img" || node.Data == "source" { 132 for i, attr := range node.Attr { 133 if attr.Key != "src" { 134 continue 135 } 136 137 camoUrl, _ := url.Parse(ctx.CamoUrl) 138 dstUrl, _ := url.Parse(attr.Val) 139 if dstUrl.Host != camoUrl.Host { 140 attr.Val = ctx.imageFromKnotTransformer(attr.Val) 141 attr.Val = ctx.camoImageLinkTransformer(attr.Val) 142 node.Attr[i] = attr 143 } 144 } 145 } 146 147 for n := node.FirstChild; n != nil; n = n.NextSibling { 148 visitNode(ctx, n) 149 } 150 default: 151 } 152} 153 154func (rctx *RenderContext) SanitizeDefault(html string) string { 155 return rctx.Sanitizer.defaultPolicy.Sanitize(html) 156} 157 158func NewSanitizer() Sanitizer { 159 return Sanitizer{ 160 defaultPolicy: defaultPolicy(), 161 } 162} 163func defaultPolicy() *bluemonday.Policy { 164 policy := bluemonday.UGCPolicy() 165 166 // Allow generally safe attributes 167 generalSafeAttrs := []string{ 168 "abbr", "accept", "accept-charset", 169 "accesskey", "action", "align", "alt", 170 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 171 "axis", "border", "cellpadding", "cellspacing", "char", 172 "charoff", "charset", "checked", 173 "clear", "cols", "colspan", "color", 174 "compact", "coords", "datetime", "dir", 175 "disabled", "enctype", "for", "frame", 176 "headers", "height", "hreflang", 177 "hspace", "ismap", "label", "lang", 178 "maxlength", "media", "method", 179 "multiple", "name", "nohref", "noshade", 180 "nowrap", "open", "prompt", "readonly", "rel", "rev", 181 "rows", "rowspan", "rules", "scope", 182 "selected", "shape", "size", "span", 183 "start", "summary", "tabindex", "target", 184 "title", "type", "usemap", "valign", "value", 185 "vspace", "width", "itemprop", 186 } 187 188 generalSafeElements := []string{ 189 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 190 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label", 191 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 192 "details", "caption", "figure", "figcaption", 193 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr", 194 } 195 196 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 197 198 // video 199 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") 200 201 // checkboxes 202 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 203 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 204 205 // centering content 206 policy.AllowElements("center") 207 208 policy.AllowAttrs("align", "style", "width", "height").Globally() 209 policy.AllowStyles( 210 "margin", 211 "padding", 212 "text-align", 213 "font-weight", 214 "text-decoration", 215 "padding-left", 216 "padding-right", 217 "padding-top", 218 "padding-bottom", 219 "margin-left", 220 "margin-right", 221 "margin-top", 222 "margin-bottom", 223 ) 224 225 return policy 226} 227 228type MarkdownTransformer struct { 229 rctx *RenderContext 230} 231 232func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { 233 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 234 if !entering { 235 return ast.WalkContinue, nil 236 } 237 238 switch a.rctx.RendererType { 239 case RendererTypeRepoMarkdown: 240 switch n := n.(type) { 241 case *ast.Link: 242 a.rctx.relativeLinkTransformer(n) 243 case *ast.Image: 244 a.rctx.imageFromKnotAstTransformer(n) 245 a.rctx.camoImageLinkAstTransformer(n) 246 } 247 case RendererTypeDefault: 248 switch n := n.(type) { 249 case *ast.Image: 250 a.rctx.imageFromKnotAstTransformer(n) 251 a.rctx.camoImageLinkAstTransformer(n) 252 } 253 } 254 255 return ast.WalkContinue, nil 256 }) 257} 258 259func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) { 260 261 dst := string(link.Destination) 262 263 if isAbsoluteUrl(dst) { 264 return 265 } 266 267 actualPath := rctx.actualPath(dst) 268 269 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath) 270 link.Destination = []byte(newPath) 271} 272 273func (rctx *RenderContext) imageFromKnotTransformer(dst string) string { 274 if isAbsoluteUrl(dst) { 275 return dst 276 } 277 278 scheme := "https" 279 if rctx.IsDev { 280 scheme = "http" 281 } 282 283 actualPath := rctx.actualPath(dst) 284 285 parsedURL := &url.URL{ 286 Scheme: scheme, 287 Host: rctx.Knot, 288 Path: path.Join("/", 289 rctx.RepoInfo.OwnerDid, 290 rctx.RepoInfo.Name, 291 "raw", 292 url.PathEscape(rctx.RepoInfo.Ref), 293 actualPath), 294 } 295 newPath := parsedURL.String() 296 return newPath 297} 298 299func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) { 300 dst := string(img.Destination) 301 img.Destination = []byte(rctx.imageFromKnotTransformer(dst)) 302} 303 304// actualPath decides when to join the file path with the 305// current repository directory (essentially only when the link 306// destination is relative. if it's absolute then we assume the 307// user knows what they're doing.) 308func (rctx *RenderContext) actualPath(dst string) string { 309 if path.IsAbs(dst) { 310 return dst 311 } 312 313 return path.Join(rctx.CurrentDir, dst) 314} 315 316func isAbsoluteUrl(link string) bool { 317 parsed, err := url.Parse(link) 318 if err != nil { 319 return false 320 } 321 return parsed.IsAbs() 322}