[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "bytes"
5 "compress/gzip"
6 "context"
7 "encoding/json"
8 "errors"
9 "fmt"
10 "io"
11 "maps"
12 "net/http"
13 "net/url"
14 "os"
15 "path"
16 "slices"
17 "strconv"
18 "strings"
19 "time"
20
21 "github.com/klauspost/compress/zstd"
22 "github.com/pquerna/cachecontrol/cacheobject"
23 "github.com/prometheus/client_golang/prometheus"
24 "github.com/prometheus/client_golang/prometheus/promauto"
25 "google.golang.org/protobuf/proto"
26)
27
28const notFoundPage = "404.html"
29
30var (
31 serveEncodingCount = promauto.NewCounterVec(prometheus.CounterOpts{
32 Name: "git_pages_serve_encoding_count",
33 Help: "Count of blob transform vs negotiated encoding",
34 }, []string{"transform", "negotiated"})
35
36 siteUpdatesCount = promauto.NewCounterVec(prometheus.CounterOpts{
37 Name: "git_pages_site_updates",
38 Help: "Count of site updates in total",
39 }, []string{"via"})
40 siteUpdateOkCount = promauto.NewCounterVec(prometheus.CounterOpts{
41 Name: "git_pages_site_update_ok",
42 Help: "Count of successful site updates",
43 }, []string{"outcome"})
44 siteUpdateErrorCount = promauto.NewCounterVec(prometheus.CounterOpts{
45 Name: "git_pages_site_update_error",
46 Help: "Count of failed site updates",
47 }, []string{"cause"})
48)
49
50func observeSiteUpdate(via string, result *UpdateResult) {
51 siteUpdatesCount.With(prometheus.Labels{"via": via}).Inc()
52 switch result.outcome {
53 case UpdateError:
54 siteUpdateErrorCount.With(prometheus.Labels{"cause": "other"}).Inc()
55 case UpdateTimeout:
56 siteUpdateErrorCount.With(prometheus.Labels{"cause": "timeout"}).Inc()
57 case UpdateNoChange:
58 siteUpdateOkCount.With(prometheus.Labels{"outcome": "no-change"}).Inc()
59 case UpdateCreated:
60 siteUpdateOkCount.With(prometheus.Labels{"outcome": "created"}).Inc()
61 case UpdateReplaced:
62 siteUpdateOkCount.With(prometheus.Labels{"outcome": "replaced"}).Inc()
63 case UpdateDeleted:
64 siteUpdateOkCount.With(prometheus.Labels{"outcome": "deleted"}).Inc()
65 }
66}
67
68func makeWebRoot(host string, projectName string) string {
69 return path.Join(strings.ToLower(host), projectName)
70}
71
72func getWebRoot(r *http.Request) (string, error) {
73 host, err := GetHost(r)
74 if err != nil {
75 return "", err
76 }
77
78 projectName, err := GetProjectName(r)
79 if err != nil {
80 return "", err
81 }
82
83 return makeWebRoot(host, projectName), nil
84}
85
86func writeRedirect(w http.ResponseWriter, code int, path string) {
87 w.Header().Set("Location", path)
88 w.WriteHeader(code)
89 fmt.Fprintf(w, "see %s\n", path)
90}
91
92// The `clauspost/compress/zstd` package recommends reusing a decompressor to avoid repeated
93// allocations of internal buffers.
94var zstdDecoder, _ = zstd.NewReader(nil)
95
96func getPage(w http.ResponseWriter, r *http.Request) error {
97 var err error
98 var sitePath string
99 var manifest *Manifest
100 var metadata ManifestMetadata
101
102 cacheControl, err := cacheobject.ParseRequestCacheControl(r.Header.Get("Cache-Control"))
103 if err != nil {
104 cacheControl = &cacheobject.RequestCacheDirectives{
105 MaxAge: -1,
106 MaxStale: -1,
107 MinFresh: -1,
108 }
109 }
110
111 bypassCache := cacheControl.NoCache || cacheControl.MaxAge == 0
112
113 host, err := GetHost(r)
114 if err != nil {
115 return err
116 }
117
118 type indexManifestResult struct {
119 manifest *Manifest
120 metadata ManifestMetadata
121 err error
122 }
123 indexManifestCh := make(chan indexManifestResult, 1)
124 go func() {
125 manifest, metadata, err := backend.GetManifest(
126 r.Context(), makeWebRoot(host, ".index"),
127 GetManifestOptions{BypassCache: bypassCache},
128 )
129 indexManifestCh <- (indexManifestResult{manifest, metadata, err})
130 }()
131
132 err = nil
133 sitePath = strings.TrimPrefix(r.URL.Path, "/")
134 if projectName, projectPath, hasProjectSlash := strings.Cut(sitePath, "/"); projectName != "" {
135 if IsValidProjectName(projectName) {
136 var projectManifest *Manifest
137 var projectMetadata ManifestMetadata
138 projectManifest, projectMetadata, err = backend.GetManifest(
139 r.Context(), makeWebRoot(host, projectName),
140 GetManifestOptions{BypassCache: bypassCache},
141 )
142 if err == nil {
143 if !hasProjectSlash {
144 writeRedirect(w, http.StatusFound, r.URL.Path+"/")
145 return nil
146 }
147 sitePath, manifest, metadata = projectPath, projectManifest, projectMetadata
148 }
149 }
150 }
151 if manifest == nil && (err == nil || errors.Is(err, ErrObjectNotFound)) {
152 result := <-indexManifestCh
153 manifest, metadata, err = result.manifest, result.metadata, result.err
154 if manifest == nil && errors.Is(err, ErrObjectNotFound) {
155 if fallback != nil {
156 logc.Printf(r.Context(), "fallback: %s via %s", host, config.Fallback.ProxyTo)
157 fallback.ServeHTTP(w, r)
158 return nil
159 } else {
160 w.WriteHeader(http.StatusNotFound)
161 fmt.Fprintf(w, "site not found\n")
162 return err
163 }
164 }
165 }
166 if err != nil {
167 ObserveError(err) // all storage errors must be reported
168 w.WriteHeader(http.StatusInternalServerError)
169 fmt.Fprintf(w, "internal server error (%s)\n", err)
170 return err
171 }
172
173 if r.Header.Get("Origin") != "" {
174 // allow JavaScript code to access responses (including errors) even across origins
175 w.Header().Set("Access-Control-Allow-Origin", "*")
176 }
177
178 if sitePath == ".git-pages" {
179 // metadata directory name shouldn't be served even if present in site manifest
180 w.WriteHeader(http.StatusNotFound)
181 fmt.Fprintf(w, "not found\n")
182 return nil
183 }
184 if metadataPath, found := strings.CutPrefix(sitePath, ".git-pages/"); found {
185 lastModified := metadata.LastModified.UTC().Format(http.TimeFormat)
186 switch {
187 case metadataPath == "health":
188 w.Header().Add("Last-Modified", lastModified)
189 w.Header().Add("ETag", fmt.Sprintf("\"%s\"", metadata.ETag))
190 w.WriteHeader(http.StatusOK)
191 fmt.Fprintf(w, "ok\n")
192 return nil
193
194 case metadataPath == "manifest.json":
195 // metadata requests require authorization to avoid making pushes from private
196 // repositories enumerable
197 _, err := AuthorizeMetadataRetrieval(r)
198 if err != nil {
199 return err
200 }
201
202 w.Header().Add("Content-Type", "application/json; charset=utf-8")
203 w.Header().Add("Last-Modified", lastModified)
204 w.Header().Add("ETag", fmt.Sprintf("\"%s-manifest\"", metadata.ETag))
205 w.WriteHeader(http.StatusOK)
206 w.Write(ManifestJSON(manifest))
207 return nil
208
209 case metadataPath == "archive.tar":
210 // same as above
211 _, err := AuthorizeMetadataRetrieval(r)
212 if err != nil {
213 return err
214 }
215
216 // we only offer `/.git-pages/archive.tar` and not the `.tar.gz`/`.tar.zst` variants
217 // because HTTP can already request compression using the `Content-Encoding` mechanism
218 acceptedEncodings := ParseAcceptEncodingHeader(r.Header.Get("Accept-Encoding"))
219 w.Header().Add("Vary", "Accept-Encoding")
220 negotiated := acceptedEncodings.Negotiate("zstd", "gzip", "identity")
221 if negotiated != "" {
222 w.Header().Set("Content-Encoding", negotiated)
223 }
224 w.Header().Add("Content-Type", "application/x-tar")
225 w.Header().Add("Last-Modified", lastModified)
226 w.Header().Add("ETag", fmt.Sprintf("\"%s-archive\"", metadata.ETag))
227 w.Header().Add("Transfer-Encoding", "chunked")
228 w.WriteHeader(http.StatusOK)
229 var iow io.Writer
230 switch negotiated {
231 case "", "identity":
232 iow = w
233 case "gzip":
234 iow = gzip.NewWriter(w)
235 case "zstd":
236 iow, _ = zstd.NewWriter(w)
237 }
238 return CollectTar(r.Context(), iow, manifest, metadata)
239
240 default:
241 w.WriteHeader(http.StatusNotFound)
242 fmt.Fprintf(w, "not found\n")
243 return nil
244 }
245 }
246
247 entryPath := sitePath
248 entry := (*Entry)(nil)
249 appliedRedirect := false
250 status := http.StatusOK
251 reader := io.ReadSeeker(nil)
252 mtime := time.Time{}
253 for {
254 entryPath, _ = strings.CutSuffix(entryPath, "/")
255 entryPath, err = ExpandSymlinks(manifest, entryPath)
256 if err != nil {
257 w.WriteHeader(http.StatusInternalServerError)
258 fmt.Fprintln(w, err)
259 return err
260 }
261 entry = manifest.Contents[entryPath]
262 if !appliedRedirect {
263 redirectKind := RedirectAny
264 if entry != nil && entry.GetType() != Type_InvalidEntry {
265 redirectKind = RedirectForce
266 }
267 originalURL := (&url.URL{Host: r.Host}).ResolveReference(r.URL)
268 _, redirectURL, redirectStatus := ApplyRedirectRules(manifest, originalURL, redirectKind)
269 if Is3xxHTTPStatus(redirectStatus) {
270 writeRedirect(w, redirectStatus, redirectURL.String())
271 return nil
272 } else if redirectURL != nil {
273 entryPath = strings.TrimPrefix(redirectURL.Path, "/")
274 status = int(redirectStatus)
275 // Apply user redirects at most once; if something ends in a loop, it should be
276 // the user agent, not the pages server.
277 appliedRedirect = true
278 continue
279 }
280 }
281 if entry == nil || entry.GetType() == Type_InvalidEntry {
282 status = http.StatusNotFound
283 if entryPath != notFoundPage {
284 entryPath = notFoundPage
285 continue
286 } else {
287 reader = bytes.NewReader([]byte("not found\n"))
288 break
289 }
290 } else if entry.GetType() == Type_InlineFile {
291 reader = bytes.NewReader(entry.Data)
292 } else if entry.GetType() == Type_ExternalFile {
293 etag := fmt.Sprintf(`"%s"`, entry.Data)
294 if r.Header.Get("If-None-Match") == etag {
295 w.WriteHeader(http.StatusNotModified)
296 return nil
297 } else {
298 var metadata BlobMetadata
299 reader, metadata, err = backend.GetBlob(r.Context(), string(entry.Data))
300 if err != nil {
301 ObserveError(err) // all storage errors must be reported
302 w.WriteHeader(http.StatusInternalServerError)
303 fmt.Fprintf(w, "internal server error: %s\n", err)
304 return err
305 }
306 mtime = metadata.LastModified
307 w.Header().Set("ETag", etag)
308 }
309 } else if entry.GetType() == Type_Directory {
310 if strings.HasSuffix(r.URL.Path, "/") {
311 entryPath = path.Join(entryPath, "index.html")
312 continue
313 } else {
314 // redirect from `dir` to `dir/`, otherwise when `dir/index.html` is served,
315 // links in it will have the wrong base URL
316 newPath := r.URL.Path + "/"
317 writeRedirect(w, http.StatusFound, newPath)
318 return nil
319 }
320 } else if entry.GetType() == Type_Symlink {
321 return fmt.Errorf("unexpected symlink")
322 }
323 break
324 }
325 if closer, ok := reader.(io.Closer); ok {
326 defer closer.Close()
327 }
328
329 var offeredEncodings []string
330 acceptedEncodings := ParseAcceptEncodingHeader(r.Header.Get("Accept-Encoding"))
331 w.Header().Add("Vary", "Accept-Encoding")
332 negotiatedEncoding := true
333 switch entry.GetTransform() {
334 case Transform_Identity:
335 offeredEncodings = []string{"identity"}
336 switch acceptedEncodings.Negotiate(offeredEncodings...) {
337 case "identity":
338 serveEncodingCount.
339 With(prometheus.Labels{"transform": "identity", "negotiated": "identity"}).
340 Inc()
341 default:
342 negotiatedEncoding = false
343 serveEncodingCount.
344 With(prometheus.Labels{"transform": "identity", "negotiated": "failure"}).
345 Inc()
346 }
347 case Transform_Zstd:
348 offeredEncodings = []string{"zstd", "identity"}
349 if entry.ContentType == nil {
350 // If Content-Type is unset, `http.ServeContent` will try to sniff
351 // the file contents. That won't work if it's compressed.
352 offeredEncodings = []string{"identity"}
353 }
354 switch acceptedEncodings.Negotiate(offeredEncodings...) {
355 case "zstd":
356 // Set Content-Length ourselves since `http.ServeContent` only sets
357 // it if Content-Encoding is unset or if it's a range request.
358 w.Header().Set("Content-Length", strconv.FormatInt(entry.GetCompressedSize(), 10))
359 w.Header().Set("Content-Encoding", "zstd")
360 serveEncodingCount.
361 With(prometheus.Labels{"transform": "zstd", "negotiated": "zstd"}).
362 Inc()
363 case "identity":
364 compressedData, _ := io.ReadAll(reader)
365 decompressedData, err := zstdDecoder.DecodeAll(compressedData, []byte{})
366 if err != nil {
367 w.WriteHeader(http.StatusInternalServerError)
368 fmt.Fprintf(w, "internal server error: %s\n", err)
369 return err
370 }
371 reader = bytes.NewReader(decompressedData)
372 serveEncodingCount.
373 With(prometheus.Labels{"transform": "zstd", "negotiated": "identity"}).
374 Inc()
375 default:
376 negotiatedEncoding = false
377 serveEncodingCount.
378 With(prometheus.Labels{"transform": "zstd", "negotiated": "failure"}).
379 Inc()
380 }
381 default:
382 return fmt.Errorf("unexpected transform")
383 }
384 if !negotiatedEncoding {
385 w.Header().Set("Accept-Encoding", strings.Join(offeredEncodings, ", "))
386 w.WriteHeader(http.StatusNotAcceptable)
387 return fmt.Errorf("no supported content encodings (Accept-Encoding: %s)",
388 r.Header.Get("Accept-Encoding"))
389 }
390
391 if entry != nil && entry.ContentType != nil {
392 w.Header().Set("X-Content-Type-Options", "nosniff")
393 w.Header().Set("Content-Type", *entry.ContentType)
394 }
395
396 customHeaders, err := ApplyHeaderRules(manifest, &url.URL{Path: entryPath})
397 if err != nil {
398 // This is an "internal server error" from an HTTP point of view, but also
399 // either an issue with the site or a misconfiguration from our point of view.
400 // Since it's not a problem with the server we don't observe the error.
401 //
402 // Note that this behavior is different from a site upload with a malformed
403 // `_headers` file (where it is semantically ignored); this is because a broken
404 // upload is something the uploader can notice and fix, but a change in server
405 // configuration is something they are unaware of and won't be notified of.
406 w.WriteHeader(http.StatusInternalServerError)
407 fmt.Fprintf(w, "%s\n", err)
408 return err
409 } else {
410 // If the header has passed all of our stringent, deny-by-default checks, it means
411 // it's good enough to overwrite whatever was our builtin option (if any).
412 maps.Copy(w.Header(), customHeaders)
413 }
414
415 // decide on the HTTP status
416 if status != 200 {
417 w.WriteHeader(status)
418 if reader != nil {
419 io.Copy(w, reader)
420 }
421 } else {
422 if _, hasCacheControl := w.Header()["Cache-Control"]; !hasCacheControl {
423 // consider content fresh for 60 seconds (the same as the freshness interval of
424 // manifests in the S3 backend), and use stale content anyway as long as it's not
425 // older than a hour; while it is cheap to handle If-Modified-Since queries
426 // server-side, on the client `max-age=0, must-revalidate` causes every resource
427 // to block the page load every time
428 w.Header().Set("Cache-Control", "max-age=60, stale-while-revalidate=3600")
429 // see https://web.dev/articles/stale-while-revalidate for details
430 }
431
432 // http.ServeContent handles conditional requests and range requests
433 http.ServeContent(w, r, entryPath, mtime, reader)
434 }
435 return nil
436}
437
438func checkDryRun(w http.ResponseWriter, r *http.Request) bool {
439 // "Dry run" requests are used to non-destructively check if the request would have
440 // successfully been authorized.
441 if r.Header.Get("Dry-Run") != "" {
442 fmt.Fprintln(w, "dry-run ok")
443 return true
444 }
445 return false
446}
447
448func putPage(w http.ResponseWriter, r *http.Request) error {
449 var result UpdateResult
450
451 for _, header := range []string{
452 "If-Modified-Since", "If-Unmodified-Since", "If-Match", "If-None-Match",
453 } {
454 if r.Header.Get(header) != "" {
455 http.Error(w, fmt.Sprintf("unsupported precondition %s", header), http.StatusBadRequest)
456 return nil
457 }
458 }
459
460 webRoot, err := getWebRoot(r)
461 if err != nil {
462 return err
463 }
464
465 ctx, cancel := context.WithTimeout(r.Context(), time.Duration(config.Limits.UpdateTimeout))
466 defer cancel()
467
468 contentType := getMediaType(r.Header.Get("Content-Type"))
469 switch contentType {
470 case "application/x-www-form-urlencoded":
471 auth, err := AuthorizeUpdateFromRepository(r)
472 if err != nil {
473 return err
474 }
475
476 // URLs have no length limit, but 64K seems enough for a repository URL
477 requestBody, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 65536))
478 if err != nil {
479 return fmt.Errorf("body read: %w", err)
480 }
481
482 repoURL := string(requestBody)
483 if err := AuthorizeRepository(repoURL, auth); err != nil {
484 return err
485 }
486
487 branch := "pages"
488 if customBranch := r.Header.Get("Branch"); customBranch != "" {
489 branch = customBranch
490 }
491 if err := AuthorizeBranch(branch, auth); err != nil {
492 return err
493 }
494
495 if checkDryRun(w, r) {
496 return nil
497 }
498
499 result = UpdateFromRepository(ctx, webRoot, repoURL, branch)
500
501 default:
502 if auth, err := AuthorizeUpdateFromArchive(r); err != nil {
503 return err
504 } else if auth.forgeUser != nil {
505 GetPrincipal(r.Context()).ForgeUser = auth.forgeUser
506 }
507
508 if checkDryRun(w, r) {
509 return nil
510 }
511
512 // request body contains archive
513 reader := http.MaxBytesReader(w, r.Body, int64(config.Limits.MaxSiteSize.Bytes()))
514 result = UpdateFromArchive(ctx, webRoot, contentType, reader)
515 }
516
517 return reportUpdateResult(w, r, result)
518}
519
520func patchPage(w http.ResponseWriter, r *http.Request) error {
521 for _, header := range []string{
522 "If-Modified-Since", "If-Unmodified-Since", "If-Match", "If-None-Match",
523 } {
524 if r.Header.Get(header) != "" {
525 http.Error(w, fmt.Sprintf("unsupported precondition %s", header), http.StatusBadRequest)
526 return nil
527 }
528 }
529
530 webRoot, err := getWebRoot(r)
531 if err != nil {
532 return err
533 }
534
535 if auth, err := AuthorizeUpdateFromArchive(r); err != nil {
536 return err
537 } else if auth.forgeUser != nil {
538 GetPrincipal(r.Context()).ForgeUser = auth.forgeUser
539 }
540
541 if checkDryRun(w, r) {
542 return nil
543 }
544
545 // Providing atomic compare-and-swap operations might be difficult or impossible depending
546 // on the backend in use and its configuration, but for applications where a mostly-atomic
547 // compare-and-swap operation is good enough (e.g. generating page previews) we don't want
548 // to prevent the use of partial updates.
549 wantAtomicCAS := r.Header.Get("Atomic")
550 hasAtomicCAS := backend.HasAtomicCAS(r.Context())
551 switch {
552 case wantAtomicCAS == "yes" && hasAtomicCAS || wantAtomicCAS == "no":
553 // all good
554 case wantAtomicCAS == "yes":
555 http.Error(w, "atomic partial updates unsupported", http.StatusPreconditionFailed)
556 return nil
557 case wantAtomicCAS == "":
558 http.Error(w, "must provide \"Atomic: yes|no\" header", http.StatusPreconditionRequired)
559 return nil
560 default:
561 http.Error(w, "malformed Atomic: header", http.StatusBadRequest)
562 return nil
563 }
564
565 var parents CreateParentsMode
566 switch r.Header.Get("Create-Parents") {
567 case "", "no":
568 parents = RequireParents
569 case "yes":
570 parents = CreateParents
571 default:
572 http.Error(w, "malformed Create-Parents: header", http.StatusBadRequest)
573 return nil
574 }
575
576 ctx, cancel := context.WithTimeout(r.Context(), time.Duration(config.Limits.UpdateTimeout))
577 defer cancel()
578
579 contentType := getMediaType(r.Header.Get("Content-Type"))
580 reader := http.MaxBytesReader(w, r.Body, int64(config.Limits.MaxSiteSize.Bytes()))
581 result := PartialUpdateFromArchive(ctx, webRoot, contentType, reader, parents)
582 return reportUpdateResult(w, r, result)
583}
584
585func reportUpdateResult(w http.ResponseWriter, r *http.Request, result UpdateResult) error {
586 var unresolvedRefErr UnresolvedRefError
587 if result.outcome == UpdateError && errors.As(result.err, &unresolvedRefErr) {
588 offeredContentTypes := []string{"text/plain", "application/vnd.git-pages.unresolved"}
589 acceptedContentTypes := ParseAcceptHeader(r.Header.Get("Accept"))
590 switch acceptedContentTypes.Negotiate(offeredContentTypes...) {
591 default:
592 w.Header().Set("Accept", strings.Join(offeredContentTypes, ", "))
593 w.WriteHeader(http.StatusNotAcceptable)
594 return fmt.Errorf("no supported content types (Accept: %s)", r.Header.Get("Accept"))
595 case "application/vnd.git-pages.unresolved":
596 w.Header().Set("Content-Type", "application/vnd.git-pages.unresolved")
597 w.WriteHeader(http.StatusUnprocessableEntity)
598 for _, missingRef := range unresolvedRefErr.missing {
599 fmt.Fprintln(w, missingRef)
600 }
601 return nil
602 case "text/plain":
603 // handled below
604 }
605 }
606
607 switch result.outcome {
608 case UpdateError:
609 if errors.Is(result.err, ErrSiteTooLarge) {
610 w.WriteHeader(http.StatusUnprocessableEntity)
611 } else if errors.Is(result.err, ErrManifestTooLarge) {
612 w.WriteHeader(http.StatusUnprocessableEntity)
613 } else if errors.Is(result.err, errArchiveFormat) {
614 w.WriteHeader(http.StatusUnsupportedMediaType)
615 } else if errors.Is(result.err, ErrArchiveTooLarge) {
616 w.WriteHeader(http.StatusRequestEntityTooLarge)
617 } else if errors.Is(result.err, ErrRepositoryTooLarge) {
618 w.WriteHeader(http.StatusUnprocessableEntity)
619 } else if errors.Is(result.err, ErrMalformedPatch) {
620 w.WriteHeader(http.StatusUnprocessableEntity)
621 } else if errors.Is(result.err, ErrPreconditionFailed) {
622 w.WriteHeader(http.StatusPreconditionFailed)
623 } else if errors.Is(result.err, ErrWriteConflict) {
624 w.WriteHeader(http.StatusConflict)
625 } else if errors.Is(result.err, ErrDomainFrozen) {
626 w.WriteHeader(http.StatusForbidden)
627 } else if errors.As(result.err, &unresolvedRefErr) {
628 w.WriteHeader(http.StatusUnprocessableEntity)
629 } else {
630 w.WriteHeader(http.StatusServiceUnavailable)
631 }
632 case UpdateTimeout:
633 w.WriteHeader(http.StatusGatewayTimeout)
634 case UpdateNoChange:
635 w.Header().Add("Update-Result", "no-change")
636 case UpdateCreated:
637 w.Header().Add("Update-Result", "created")
638 case UpdateReplaced:
639 w.Header().Add("Update-Result", "replaced")
640 case UpdateDeleted:
641 w.Header().Add("Update-Result", "deleted")
642 }
643 if result.manifest != nil {
644 if result.manifest.Commit != nil {
645 fmt.Fprintln(w, *result.manifest.Commit)
646 } else {
647 fmt.Fprintln(w, "(archive)")
648 }
649 for _, problem := range GetProblemReport(result.manifest) {
650 fmt.Fprintln(w, problem)
651 }
652 } else if result.err != nil {
653 fmt.Fprintln(w, result.err)
654 } else {
655 fmt.Fprintln(w, "internal error")
656 }
657 observeSiteUpdate("rest", &result)
658 return nil
659}
660
661func deletePage(w http.ResponseWriter, r *http.Request) error {
662 webRoot, err := getWebRoot(r)
663 if err != nil {
664 return err
665 }
666
667 if auth, err := AuthorizeDeletion(r); err != nil {
668 return err
669 } else if auth.forgeUser != nil {
670 GetPrincipal(r.Context()).ForgeUser = auth.forgeUser
671 }
672
673 if checkDryRun(w, r) {
674 return nil
675 }
676
677 if err = backend.DeleteManifest(r.Context(), webRoot, ModifyManifestOptions{}); err != nil {
678 w.WriteHeader(http.StatusInternalServerError)
679 fmt.Fprintln(w, err)
680 } else {
681 w.Header().Add("Update-Result", "deleted")
682 w.WriteHeader(http.StatusOK)
683 }
684 return err
685}
686
687func postPage(w http.ResponseWriter, r *http.Request) error {
688 // The HTTP requests for webhook delivery usually have a short timeout. We start the timer
689 // before doing any time-consuming work so that it's closely aligned to the client's timeout and
690 // we can respond before the webhook delivery is considered failed.
691 requestTimeout := 3 * time.Second
692 requestTimer := time.NewTimer(requestTimeout)
693
694 webRoot, err := getWebRoot(r)
695 if err != nil {
696 return err
697 }
698
699 auth, err := AuthorizeUpdateFromRepository(r)
700 if err != nil {
701 return err
702 }
703
704 eventName := ""
705 for _, header := range []string{
706 "X-Forgejo-Event",
707 "X-GitHub-Event",
708 "X-Gitea-Event",
709 "X-Gogs-Event",
710 } {
711 eventName = r.Header.Get(header)
712 if eventName != "" {
713 break
714 }
715 }
716
717 if eventName == "" {
718 http.Error(w,
719 "expected a Forgejo, GitHub, Gitea, or Gogs webhook request", http.StatusBadRequest)
720 return fmt.Errorf("event expected")
721 }
722
723 if eventName != "push" {
724 http.Error(w, "only push events are allowed", http.StatusBadRequest)
725 return fmt.Errorf("invalid event")
726 }
727
728 if r.Header.Get("Content-Type") != "application/json" {
729 http.Error(w, "only JSON payload is allowed", http.StatusBadRequest)
730 return fmt.Errorf("invalid content type")
731 }
732
733 // Event payloads have no length limit, but events bigger than 16M seem excessive.
734 requestBody, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 16*1048576))
735 if err != nil {
736 return fmt.Errorf("body read: %w", err)
737 }
738
739 var event struct {
740 Ref string `json:"ref"`
741 Repository struct {
742 CloneURL string `json:"clone_url"`
743 } `json:"repository"`
744 }
745 err = json.NewDecoder(bytes.NewReader(requestBody)).Decode(&event)
746 if err != nil {
747 http.Error(w, fmt.Sprintf("invalid request body: %s", err), http.StatusBadRequest)
748 return err
749 }
750
751 if event.Ref != path.Join("refs", "heads", auth.branch) {
752 code := http.StatusUnauthorized
753 if strings.Contains(r.Header.Get("User-Agent"), "GitHub-Hookshot") {
754 // GitHub has no way to restrict branches for a webhook, and responding with 401
755 // for every non-pages branch makes the "Recent Deliveries" tab look awful.
756 code = http.StatusOK
757 }
758 http.Error(w,
759 fmt.Sprintf("ref %s not in allowlist [refs/heads/%v]", event.Ref, auth.branch),
760 code)
761 return nil
762 }
763
764 repoURL := event.Repository.CloneURL
765 if err := AuthorizeRepository(repoURL, auth); err != nil {
766 return err
767 }
768
769 if checkDryRun(w, r) {
770 return nil
771 }
772
773 resultChan := make(chan UpdateResult)
774 go func(ctx context.Context) {
775 ctx, cancel := context.WithTimeout(ctx, time.Duration(config.Limits.UpdateTimeout))
776 defer cancel()
777
778 result := UpdateFromRepository(ctx, webRoot, repoURL, auth.branch)
779 resultChan <- result
780 observeSiteUpdate("webhook", &result)
781 }(context.WithoutCancel(r.Context()))
782
783 var result UpdateResult
784 select {
785 case result = <-resultChan:
786 case <-requestTimer.C:
787 w.WriteHeader(http.StatusAccepted)
788 fmt.Fprintf(w, "updating (taking longer than %s)", requestTimeout)
789 return nil
790 }
791
792 switch result.outcome {
793 case UpdateError:
794 w.WriteHeader(http.StatusServiceUnavailable)
795 fmt.Fprintf(w, "update error: %s\n", result.err)
796 case UpdateTimeout:
797 w.WriteHeader(http.StatusGatewayTimeout)
798 fmt.Fprintln(w, "update timeout")
799 case UpdateNoChange:
800 fmt.Fprintln(w, "unchanged")
801 case UpdateCreated:
802 fmt.Fprintln(w, "created")
803 case UpdateReplaced:
804 fmt.Fprintln(w, "replaced")
805 case UpdateDeleted:
806 fmt.Fprintln(w, "deleted")
807 }
808 if result.manifest != nil {
809 report := GetProblemReport(result.manifest)
810 if len(report) > 0 {
811 fmt.Fprintln(w, "problems:")
812 }
813 for _, problem := range report {
814 fmt.Fprintf(w, "- %s\n", problem)
815 }
816 }
817 return nil
818}
819
820func ServePages(w http.ResponseWriter, r *http.Request) {
821 r = r.WithContext(WithPrincipal(r.Context()))
822 if config.Audit.IncludeIPs != "" {
823 GetPrincipal(r.Context()).IpAddress = proto.String(r.RemoteAddr)
824 }
825 // We want upstream health checks to be done as closely to the normal flow as possible;
826 // any intentional deviation is an opportunity to miss an issue that will affect our
827 // visitors but not our health checks.
828 if r.Header.Get("Health-Check") == "" {
829 var mediaType string
830 switch r.Method {
831 case "HEAD", "GET":
832 mediaType = r.Header.Get("Accept")
833 default:
834 mediaType = r.Header.Get("Content-Type")
835 }
836 logc.Println(r.Context(), "pages:", r.Method, r.Host, r.URL, mediaType)
837 if region := os.Getenv("FLY_REGION"); region != "" {
838 machine_id := os.Getenv("FLY_MACHINE_ID")
839 w.Header().Add("Server", fmt.Sprintf("git-pages (fly.io; %s; %s)", region, machine_id))
840 ObserveData(r.Context(), "server.name", machine_id, "server.region", region)
841 } else if hostname, err := os.Hostname(); err == nil {
842 if region := os.Getenv("PAGES_REGION"); region != "" {
843 w.Header().Add("Server", fmt.Sprintf("git-pages (%s; %s)", region, hostname))
844 ObserveData(r.Context(), "server.name", hostname, "server.region", region)
845 } else {
846 w.Header().Add("Server", fmt.Sprintf("git-pages (%s)", hostname))
847 ObserveData(r.Context(), "server.name", hostname)
848 }
849 }
850 }
851 allowedMethods := []string{"OPTIONS", "HEAD", "GET", "PUT", "PATCH", "DELETE", "POST"}
852 if r.Method == "OPTIONS" || !slices.Contains(allowedMethods, r.Method) {
853 w.Header().Add("Allow", strings.Join(allowedMethods, ", "))
854 }
855 err := error(nil)
856 switch r.Method {
857 // REST API
858 case "OPTIONS":
859 // no preflight options
860 case "HEAD", "GET":
861 err = getPage(w, r)
862 case "PUT":
863 err = putPage(w, r)
864 case "PATCH":
865 err = patchPage(w, r)
866 case "DELETE":
867 err = deletePage(w, r)
868 // webhook API
869 case "POST":
870 err = postPage(w, r)
871 default:
872 http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
873 err = fmt.Errorf("method %s not allowed", r.Method)
874 }
875 if err != nil {
876 var authErr AuthError
877 if errors.As(err, &authErr) {
878 http.Error(w, prettyErrMsg(err), authErr.code)
879 }
880 var tooLargeErr *http.MaxBytesError
881 if errors.As(err, &tooLargeErr) {
882 message := "request body too large"
883 http.Error(w, message, http.StatusRequestEntityTooLarge)
884 err = errors.New(message)
885 }
886 logc.Println(r.Context(), "pages err:", err)
887 }
888}