Monorepo for Tangled
1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "io/fs"
9 "net/url"
10 "path"
11 "strings"
12
13 chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
14 "github.com/alecthomas/chroma/v2/styles"
15 "github.com/yuin/goldmark"
16 "github.com/yuin/goldmark-emoji"
17 highlighting "github.com/yuin/goldmark-highlighting/v2"
18 "github.com/yuin/goldmark/ast"
19 "github.com/yuin/goldmark/extension"
20 "github.com/yuin/goldmark/parser"
21 "github.com/yuin/goldmark/renderer/html"
22 "github.com/yuin/goldmark/text"
23 "github.com/yuin/goldmark/util"
24 callout "gitlab.com/staticnoise/goldmark-callout"
25 htmlparse "golang.org/x/net/html"
26
27 "tangled.org/core/api/tangled"
28 textension "tangled.org/core/appview/pages/markup/extension"
29 "tangled.org/core/appview/pages/repoinfo"
30)
31
32// RendererType defines the type of renderer to use based on context
33type RendererType int
34
35const (
36 // RendererTypeRepoMarkdown is for repository documentation markdown files
37 RendererTypeRepoMarkdown RendererType = iota
38 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
39 RendererTypeDefault
40)
41
42// RenderContext holds the contextual data for rendering markdown.
43// It can be initialized empty, and that'll skip any transformations.
44type RenderContext struct {
45 CamoUrl string
46 CamoSecret string
47 repoinfo.RepoInfo
48 IsDev bool
49 Hostname string
50 RendererType RendererType
51 Sanitizer Sanitizer
52 Files fs.FS
53}
54
55func NewMarkdown(hostname string) goldmark.Markdown {
56 md := goldmark.New(
57 goldmark.WithExtensions(
58 extension.GFM,
59 highlighting.NewHighlighting(
60 highlighting.WithFormatOptions(
61 chromahtml.Standalone(false),
62 chromahtml.WithClasses(true),
63 ),
64 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")),
65 ),
66 extension.NewFootnote(
67 extension.WithFootnoteIDPrefix([]byte("footnote")),
68 ),
69 callout.CalloutExtention,
70 textension.AtExt,
71 textension.NewTangledLinkExt(hostname),
72 emoji.Emoji,
73 ),
74 goldmark.WithParserOptions(
75 parser.WithAutoHeadingID(),
76 ),
77 goldmark.WithRendererOptions(html.WithUnsafe()),
78 )
79 return md
80}
81
82func (rctx *RenderContext) RenderMarkdown(source string) string {
83 return rctx.RenderMarkdownWith(source, NewMarkdown(rctx.Hostname))
84}
85
86func (rctx *RenderContext) RenderMarkdownWith(source string, md goldmark.Markdown) string {
87 if rctx != nil {
88 var transformers []util.PrioritizedValue
89
90 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
91
92 md.Parser().AddOptions(
93 parser.WithASTTransformers(transformers...),
94 )
95 }
96
97 var buf bytes.Buffer
98 if err := md.Convert([]byte(source), &buf); err != nil {
99 return source
100 }
101
102 var processed strings.Builder
103 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
104 return source
105 }
106
107 return processed.String()
108}
109
110func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
111 node, err := htmlparse.Parse(io.MultiReader(
112 strings.NewReader("<html><body>"),
113 input,
114 strings.NewReader("</body></html>"),
115 ))
116 if err != nil {
117 return fmt.Errorf("failed to parse html: %w", err)
118 }
119
120 if node.Type == htmlparse.DocumentNode {
121 node = node.FirstChild
122 }
123
124 visitNode(ctx, node)
125
126 newNodes := make([]*htmlparse.Node, 0, 5)
127
128 if node.Data == "html" {
129 node = node.FirstChild
130 for node != nil && node.Data != "body" {
131 node = node.NextSibling
132 }
133 }
134 if node != nil {
135 if node.Data == "body" {
136 child := node.FirstChild
137 for child != nil {
138 newNodes = append(newNodes, child)
139 child = child.NextSibling
140 }
141 } else {
142 newNodes = append(newNodes, node)
143 }
144 }
145
146 for _, node := range newNodes {
147 if err := htmlparse.Render(output, node); err != nil {
148 return fmt.Errorf("failed to render processed html: %w", err)
149 }
150 }
151
152 return nil
153}
154
155func visitNode(ctx *RenderContext, node *htmlparse.Node) {
156 switch node.Type {
157 case htmlparse.ElementNode:
158 switch node.Data {
159 case "img", "source":
160 for i, attr := range node.Attr {
161 if attr.Key != "src" {
162 continue
163 }
164
165 camoUrl, _ := url.Parse(ctx.CamoUrl)
166 dstUrl, _ := url.Parse(attr.Val)
167 if dstUrl.Host != camoUrl.Host {
168 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
169 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
170 node.Attr[i] = attr
171 }
172 }
173 }
174
175 for n := node.FirstChild; n != nil; n = n.NextSibling {
176 visitNode(ctx, n)
177 }
178 default:
179 }
180}
181
182func (rctx *RenderContext) SanitizeDefault(html string) string {
183 return rctx.Sanitizer.SanitizeDefault(html)
184}
185
186func (rctx *RenderContext) SanitizeDescription(html string) string {
187 return rctx.Sanitizer.SanitizeDescription(html)
188}
189
190type MarkdownTransformer struct {
191 rctx *RenderContext
192}
193
194func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
195 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
196 if !entering {
197 return ast.WalkContinue, nil
198 }
199
200 switch a.rctx.RendererType {
201 case RendererTypeRepoMarkdown:
202 switch n := n.(type) {
203 case *ast.Heading:
204 a.rctx.anchorHeadingTransformer(n)
205 case *ast.Link:
206 a.rctx.relativeLinkTransformer(n)
207 case *ast.Image:
208 a.rctx.imageFromKnotAstTransformer(n)
209 a.rctx.camoImageLinkAstTransformer(n)
210 }
211 case RendererTypeDefault:
212 switch n := n.(type) {
213 case *ast.Heading:
214 a.rctx.anchorHeadingTransformer(n)
215 case *ast.Image:
216 a.rctx.imageFromKnotAstTransformer(n)
217 a.rctx.camoImageLinkAstTransformer(n)
218 }
219 }
220
221 return ast.WalkContinue, nil
222 })
223}
224
225func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
226
227 dst := string(link.Destination)
228
229 if isAbsoluteUrl(dst) || isFragment(dst) || isMail(dst) {
230 return
231 }
232
233 actualPath := rctx.actualPath(dst)
234
235 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
236 link.Destination = []byte(newPath)
237}
238
239func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
240 if isAbsoluteUrl(dst) {
241 return dst
242 }
243
244 scheme := "https"
245 if rctx.IsDev {
246 scheme = "http"
247 }
248
249 actualPath := rctx.actualPath(dst)
250
251 repoName := fmt.Sprintf("%s/%s", rctx.RepoInfo.OwnerDid, rctx.RepoInfo.Name)
252
253 query := fmt.Sprintf("repo=%s&ref=%s&path=%s&raw=true",
254 url.QueryEscape(repoName), url.QueryEscape(rctx.RepoInfo.Ref), actualPath)
255
256 parsedURL := &url.URL{
257 Scheme: scheme,
258 Host: rctx.Knot,
259 Path: path.Join("/xrpc", tangled.RepoBlobNSID),
260 RawQuery: query,
261 }
262 newPath := parsedURL.String()
263 return newPath
264}
265
266func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
267 dst := string(img.Destination)
268 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
269}
270
271func (rctx *RenderContext) anchorHeadingTransformer(h *ast.Heading) {
272 idGeneric, exists := h.AttributeString("id")
273 if !exists {
274 return // no id, nothing to do
275 }
276 id, ok := idGeneric.([]byte)
277 if !ok {
278 return
279 }
280
281 // create anchor link
282 anchor := ast.NewLink()
283 anchor.Destination = fmt.Appendf(nil, "#%s", string(id))
284 anchor.SetAttribute([]byte("class"), []byte("anchor"))
285
286 // create icon text
287 iconText := ast.NewString([]byte("#"))
288 anchor.AppendChild(anchor, iconText)
289
290 // set class on heading
291 h.SetAttribute([]byte("class"), []byte("heading"))
292
293 // append anchor to heading
294 h.AppendChild(h, anchor)
295}
296
297// actualPath decides when to join the file path with the
298// current repository directory (essentially only when the link
299// destination is relative. if it's absolute then we assume the
300// user knows what they're doing.)
301func (rctx *RenderContext) actualPath(dst string) string {
302 if path.IsAbs(dst) {
303 return dst
304 }
305
306 return path.Join(rctx.CurrentDir, dst)
307}
308
309func isAbsoluteUrl(link string) bool {
310 parsed, err := url.Parse(link)
311 if err != nil {
312 return false
313 }
314 return parsed.IsAbs()
315}
316
317func isFragment(link string) bool {
318 return strings.HasPrefix(link, "#")
319}
320
321func isMail(link string) bool {
322 return strings.HasPrefix(link, "mailto:")
323}