···9494 Value: "",
9595 EnvVars: []string{"STATIC_CDN_HOST"},
9696 },
9797+ &cli.BoolFlag{
9898+ Name: "bsky-canonical-instance",
9999+ Usage: "Enable if this is the canonical deployment (bsky.app)",
100100+ Value: false,
101101+ Required: false,
102102+ EnvVars: []string{"BSKY_CANONICAL_INSTANCE"},
103103+ },
104104+ &cli.BoolFlag{
105105+ Name: "robots-disallow-all",
106106+ Usage: "Crawling is allowed by default. Enable this flag to Disallow all",
107107+ Value: false,
108108+ Required: false,
109109+ EnvVars: []string{"ROBOTS_DISALLOW_ALL"},
110110+ },
97111 },
98112 },
99113 }
+19-7
bskyweb/cmd/bskyweb/server.go
···6363 corsOrigins := cctx.StringSlice("cors-allowed-origins")
6464 staticCDNHost := cctx.String("static-cdn-host")
6565 staticCDNHost = strings.TrimSuffix(staticCDNHost, "/")
6666+ canonicalInstance := cctx.Bool("bsky-canonical-instance")
6767+ robotsDisallowAll := cctx.Bool("robots-disallow-all")
66686769 // Echo
6870 e := echo.New()
···204206 return http.FS(fsys)
205207 }())
206208207207- e.GET("/robots.txt", echo.WrapHandler(staticHandler))
208208- e.GET("/ips-v4", echo.WrapHandler(staticHandler))
209209- e.GET("/ips-v6", echo.WrapHandler(staticHandler))
210210- e.GET("/.well-known/*", echo.WrapHandler(staticHandler))
211211- e.GET("/security.txt", func(c echo.Context) error {
212212- return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt")
213213- })
209209+ // enable some special endpoints for the "canonical" deployment (bsky.app). not having these enabled should *not* impact regular operation
210210+ if canonicalInstance {
211211+ e.GET("/ips-v4", echo.WrapHandler(staticHandler))
212212+ e.GET("/ips-v6", echo.WrapHandler(staticHandler))
213213+ e.GET("/security.txt", func(c echo.Context) error {
214214+ return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt")
215215+ })
216216+ e.GET("/.well-known/*", echo.WrapHandler(staticHandler))
217217+ }
218218+219219+ // default to permissive, but Disallow all if flag set
220220+ if robotsDisallowAll {
221221+ e.File("/robots.txt", "static/robots-disallow-all.txt")
222222+ } else {
223223+ e.GET("/robots.txt", echo.WrapHandler(staticHandler))
224224+ }
225225+214226 e.GET("/iframe/youtube.html", echo.WrapHandler(staticHandler))
215227 e.GET("/static/*", echo.WrapHandler(http.StripPrefix("/static/", staticHandler)), func(next echo.HandlerFunc) echo.HandlerFunc {
216228 return func(c echo.Context) error {
+3
bskyweb/static/robots-disallow-all.txt
···11+# This is an development or self-hosted instance of the bsky web app, and crawling has been disallowed by the operator team.
22+User-Agent: *
33+Disallow: /