Bluesky app fork with some witchin' additions 💫

Sitemap Handlers for bsky.app (#9525)

* implement sitemap handlers for users

* ensure compressed payload is passed through

* improved handling

* reverse header order

* add the sitemap to robots.txt

authored by

Jim Calabro and committed by
GitHub
f7e61056 1fded4e9

+74
+72
bskyweb/cmd/bskyweb/server.go
··· 9 9 "encoding/json" 10 10 "errors" 11 11 "fmt" 12 + "io" 12 13 "io/fs" 14 + "log/slog" 13 15 "net/http" 14 16 "net/netip" 15 17 "net/url" ··· 40 42 cfg *Config 41 43 42 44 ipccClient http.Client 45 + 46 + // sitemapClient is used for fetching sitemaps from the appview. It has 47 + // DisableCompression set to true so that gzipped responses are passed 48 + // through without being decompressed. 49 + sitemapClient http.Client 43 50 } 44 51 45 52 type Config struct { ··· 114 121 TLSClientConfig: &tls.Config{ 115 122 InsecureSkipVerify: true, 116 123 }, 124 + }, 125 + }, 126 + sitemapClient: http.Client{ 127 + Transport: &http.Transport{ 128 + MaxIdleConns: 100, 129 + MaxIdleConnsPerHost: 10, 130 + IdleConnTimeout: 90 * time.Second, 131 + TLSHandshakeTimeout: 10 * time.Second, 132 + ForceAttemptHTTP2: true, 133 + DisableCompression: true, 117 134 }, 118 135 }, 119 136 } ··· 336 353 337 354 // ipcc 338 355 e.GET("/ipcc", server.WebIpCC) 356 + 357 + // sitemap handlers 358 + e.GET("/sitemap/users.xml.gz", server.handleSitemapUsersIndex) 359 + e.GET("/sitemap/users/*", server.handleSitemapUsersSubpage) 339 360 340 361 if linkHost != "" { 341 362 linkUrl, err := url.Parse(linkHost) ··· 753 774 } 754 775 return c.JSON(200, outResponse) 755 776 } 777 + 778 + func (srv *Server) handleSitemapUsersIndex(c echo.Context) error { 779 + url := fmt.Sprintf("%s/external/sitemap/users.xml.gz", srv.cfg.appviewHost) 780 + return srv.serveSitemapRequest(c, url, "user index") 781 + } 782 + 783 + func (srv *Server) handleSitemapUsersSubpage(c echo.Context) error { 784 + path := c.Param("*") 785 + url := fmt.Sprintf("%s/external/sitemap/users/%s", srv.cfg.appviewHost, path) 786 + return srv.serveSitemapRequest(c, url, "user subpage") 787 + } 788 + 789 + func (srv *Server) serveSitemapRequest(c echo.Context, url, sitemapType string) error { 790 + req, err := http.NewRequest(http.MethodGet, url, nil) 791 + if err != nil { 792 + slog.Error("failed to construct sitemap request", "err", err, "type", sitemapType) 793 + return c.String(http.StatusInternalServerError, "Internal Server Error") 794 + } 795 + 796 + resp, err := srv.sitemapClient.Do(req) 797 + if err != nil { 798 + slog.Error("failed to send sitemap request to appview", "err", err, "type", sitemapType) 799 + return c.String(http.StatusInternalServerError, "Internal Server Error") 800 + } 801 + defer resp.Body.Close() 802 + 803 + if resp.StatusCode != http.StatusOK { 804 + buf, err := io.ReadAll(resp.Body) 805 + if err != nil { 806 + slog.Error("failed to read sitemap error response body", "err", err) 807 + } 808 + 809 + slog.Error("invalid sitemap response code", 810 + "err", err, 811 + "type", sitemapType, 812 + "code", resp.StatusCode, 813 + "body", string(buf), 814 + ) 815 + return c.String(http.StatusInternalServerError, "Internal Server Error") 816 + } 817 + 818 + c.Response().Header().Set("Content-Type", "application/xml") 819 + c.Response().Header().Set("Content-Encoding", "gzip") 820 + c.Response().WriteHeader(resp.StatusCode) 821 + 822 + if _, err = io.Copy(c.Response().Writer, resp.Body); err != nil { 823 + slog.Error("failed to copy sitemap response body to client", "err", err, "type", sitemapType) 824 + } 825 + 826 + return nil 827 + }
+2
bskyweb/static/robots.txt
··· 7 7 # be ok. 8 8 User-Agent: * 9 9 Allow: / 10 + 11 + Sitemap: https://bsky.app/sitemap/users.xml.gz