this repo has no description
1// Package markup is an umbrella package for all markups and their renderers. 2package markup 3 4import ( 5 "bytes" 6 "fmt" 7 "io" 8 "maps" 9 "net/url" 10 "path" 11 "regexp" 12 "slices" 13 "strings" 14 15 "github.com/alecthomas/chroma/v2" 16 chromahtml "github.com/alecthomas/chroma/v2/formatters/html" 17 "github.com/alecthomas/chroma/v2/styles" 18 "github.com/microcosm-cc/bluemonday" 19 "github.com/yuin/goldmark" 20 highlighting "github.com/yuin/goldmark-highlighting/v2" 21 "github.com/yuin/goldmark/ast" 22 "github.com/yuin/goldmark/extension" 23 "github.com/yuin/goldmark/parser" 24 "github.com/yuin/goldmark/renderer/html" 25 "github.com/yuin/goldmark/text" 26 "github.com/yuin/goldmark/util" 27 htmlparse "golang.org/x/net/html" 28 29 "tangled.sh/tangled.sh/core/appview/pages/repoinfo" 30) 31 32// RendererType defines the type of renderer to use based on context 33type RendererType int 34 35const ( 36 // RendererTypeRepoMarkdown is for repository documentation markdown files 37 RendererTypeRepoMarkdown RendererType = iota 38 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments. 39 RendererTypeDefault 40) 41 42// RenderContext holds the contextual data for rendering markdown. 43// It can be initialized empty, and that'll skip any transformations. 44type RenderContext struct { 45 CamoUrl string 46 CamoSecret string 47 repoinfo.RepoInfo 48 IsDev bool 49 RendererType RendererType 50 Sanitizer Sanitizer 51} 52 53type Sanitizer struct { 54 defaultPolicy *bluemonday.Policy 55} 56 57func (rctx *RenderContext) RenderMarkdown(source string) string { 58 md := goldmark.New( 59 goldmark.WithExtensions( 60 extension.GFM, 61 highlighting.NewHighlighting( 62 highlighting.WithFormatOptions( 63 chromahtml.Standalone(false), 64 chromahtml.WithClasses(true), 65 ), 66 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")), 67 ), 68 ), 69 goldmark.WithParserOptions( 70 parser.WithAutoHeadingID(), 71 ), 72 goldmark.WithRendererOptions(html.WithUnsafe()), 73 ) 74 75 if rctx != nil { 76 var transformers []util.PrioritizedValue 77 78 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000)) 79 80 md.Parser().AddOptions( 81 parser.WithASTTransformers(transformers...), 82 ) 83 } 84 85 var buf bytes.Buffer 86 if err := md.Convert([]byte(source), &buf); err != nil { 87 return source 88 } 89 90 var processed strings.Builder 91 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil { 92 return source 93 } 94 95 return processed.String() 96} 97 98func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { 99 node, err := htmlparse.Parse(io.MultiReader( 100 strings.NewReader("<html><body>"), 101 input, 102 strings.NewReader("</body></html>"), 103 )) 104 if err != nil { 105 return fmt.Errorf("failed to parse html: %w", err) 106 } 107 108 if node.Type == htmlparse.DocumentNode { 109 node = node.FirstChild 110 } 111 112 visitNode(ctx, node) 113 114 newNodes := make([]*htmlparse.Node, 0, 5) 115 116 if node.Data == "html" { 117 node = node.FirstChild 118 for node != nil && node.Data != "body" { 119 node = node.NextSibling 120 } 121 } 122 if node != nil { 123 if node.Data == "body" { 124 child := node.FirstChild 125 for child != nil { 126 newNodes = append(newNodes, child) 127 child = child.NextSibling 128 } 129 } else { 130 newNodes = append(newNodes, node) 131 } 132 } 133 134 for _, node := range newNodes { 135 if err := htmlparse.Render(output, node); err != nil { 136 return fmt.Errorf("failed to render processed html: %w", err) 137 } 138 } 139 140 return nil 141} 142 143func visitNode(ctx *RenderContext, node *htmlparse.Node) { 144 switch node.Type { 145 case htmlparse.ElementNode: 146 if node.Data == "img" || node.Data == "source" { 147 for i, attr := range node.Attr { 148 if attr.Key != "src" { 149 continue 150 } 151 152 camoUrl, _ := url.Parse(ctx.CamoUrl) 153 dstUrl, _ := url.Parse(attr.Val) 154 if dstUrl.Host != camoUrl.Host { 155 attr.Val = ctx.imageFromKnotTransformer(attr.Val) 156 attr.Val = ctx.camoImageLinkTransformer(attr.Val) 157 node.Attr[i] = attr 158 } 159 } 160 } 161 162 for n := node.FirstChild; n != nil; n = n.NextSibling { 163 visitNode(ctx, n) 164 } 165 default: 166 } 167} 168 169func (rctx *RenderContext) SanitizeDefault(html string) string { 170 return rctx.Sanitizer.defaultPolicy.Sanitize(html) 171} 172 173func NewSanitizer() Sanitizer { 174 return Sanitizer{ 175 defaultPolicy: defaultPolicy(), 176 } 177} 178func defaultPolicy() *bluemonday.Policy { 179 policy := bluemonday.UGCPolicy() 180 181 // Allow generally safe attributes 182 generalSafeAttrs := []string{ 183 "abbr", "accept", "accept-charset", 184 "accesskey", "action", "align", "alt", 185 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 186 "axis", "border", "cellpadding", "cellspacing", "char", 187 "charoff", "charset", "checked", 188 "clear", "cols", "colspan", "color", 189 "compact", "coords", "datetime", "dir", 190 "disabled", "enctype", "for", "frame", 191 "headers", "height", "hreflang", 192 "hspace", "ismap", "label", "lang", 193 "maxlength", "media", "method", 194 "multiple", "name", "nohref", "noshade", 195 "nowrap", "open", "prompt", "readonly", "rel", "rev", 196 "rows", "rowspan", "rules", "scope", 197 "selected", "shape", "size", "span", 198 "start", "summary", "tabindex", "target", 199 "title", "type", "usemap", "valign", "value", 200 "vspace", "width", "itemprop", 201 } 202 203 generalSafeElements := []string{ 204 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 205 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label", 206 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 207 "details", "caption", "figure", "figcaption", 208 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr", 209 } 210 211 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 212 213 // video 214 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") 215 216 // checkboxes 217 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 218 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 219 220 // for code blocks 221 policy.AllowAttrs("class").Matching(regexp.MustCompile(`chroma`)).OnElements("pre") 222 policy.AllowAttrs("class").Matching(regexp.MustCompile(strings.Join(slices.Collect(maps.Values(chroma.StandardTypes)), "|"))).OnElements("span") 223 224 // centering content 225 policy.AllowElements("center") 226 227 policy.AllowAttrs("align", "style", "width", "height").Globally() 228 policy.AllowStyles( 229 "margin", 230 "padding", 231 "text-align", 232 "font-weight", 233 "text-decoration", 234 "padding-left", 235 "padding-right", 236 "padding-top", 237 "padding-bottom", 238 "margin-left", 239 "margin-right", 240 "margin-top", 241 "margin-bottom", 242 ) 243 244 return policy 245} 246 247type MarkdownTransformer struct { 248 rctx *RenderContext 249} 250 251func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { 252 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 253 if !entering { 254 return ast.WalkContinue, nil 255 } 256 257 switch a.rctx.RendererType { 258 case RendererTypeRepoMarkdown: 259 switch n := n.(type) { 260 case *ast.Link: 261 a.rctx.relativeLinkTransformer(n) 262 case *ast.Image: 263 a.rctx.imageFromKnotAstTransformer(n) 264 a.rctx.camoImageLinkAstTransformer(n) 265 } 266 case RendererTypeDefault: 267 switch n := n.(type) { 268 case *ast.Image: 269 a.rctx.imageFromKnotAstTransformer(n) 270 a.rctx.camoImageLinkAstTransformer(n) 271 } 272 } 273 274 return ast.WalkContinue, nil 275 }) 276} 277 278func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) { 279 280 dst := string(link.Destination) 281 282 if isAbsoluteUrl(dst) { 283 return 284 } 285 286 actualPath := rctx.actualPath(dst) 287 288 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath) 289 link.Destination = []byte(newPath) 290} 291 292func (rctx *RenderContext) imageFromKnotTransformer(dst string) string { 293 if isAbsoluteUrl(dst) { 294 return dst 295 } 296 297 scheme := "https" 298 if rctx.IsDev { 299 scheme = "http" 300 } 301 302 actualPath := rctx.actualPath(dst) 303 304 parsedURL := &url.URL{ 305 Scheme: scheme, 306 Host: rctx.Knot, 307 Path: path.Join("/", 308 rctx.RepoInfo.OwnerDid, 309 rctx.RepoInfo.Name, 310 "raw", 311 url.PathEscape(rctx.RepoInfo.Ref), 312 actualPath), 313 } 314 newPath := parsedURL.String() 315 return newPath 316} 317 318func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) { 319 dst := string(img.Destination) 320 img.Destination = []byte(rctx.imageFromKnotTransformer(dst)) 321} 322 323// actualPath decides when to join the file path with the 324// current repository directory (essentially only when the link 325// destination is relative. if it's absolute then we assume the 326// user knows what they're doing.) 327func (rctx *RenderContext) actualPath(dst string) string { 328 if path.IsAbs(dst) { 329 return dst 330 } 331 332 return path.Join(rctx.CurrentDir, dst) 333} 334 335func isAbsoluteUrl(link string) bool { 336 parsed, err := url.Parse(link) 337 if err != nil { 338 return false 339 } 340 return parsed.IsAbs() 341}