An ATproto social media client -- with an independent Appview.

bskyweb: add robots disallow and 'canonical' config flags (#4760)

authored by bnewbold.net and committed by

GitHub 462708e2 36a7a8d8

+36 -7
+14
bskyweb/cmd/bskyweb/main.go
··· 94 94 Value: "", 95 95 EnvVars: []string{"STATIC_CDN_HOST"}, 96 96 }, 97 + &cli.BoolFlag{ 98 + Name: "bsky-canonical-instance", 99 + Usage: "Enable if this is the canonical deployment (bsky.app)", 100 + Value: false, 101 + Required: false, 102 + EnvVars: []string{"BSKY_CANONICAL_INSTANCE"}, 103 + }, 104 + &cli.BoolFlag{ 105 + Name: "robots-disallow-all", 106 + Usage: "Crawling is allowed by default. Enable this flag to Disallow all", 107 + Value: false, 108 + Required: false, 109 + EnvVars: []string{"ROBOTS_DISALLOW_ALL"}, 110 + }, 97 111 }, 98 112 }, 99 113 }
+19 -7
bskyweb/cmd/bskyweb/server.go
··· 63 63 corsOrigins := cctx.StringSlice("cors-allowed-origins") 64 64 staticCDNHost := cctx.String("static-cdn-host") 65 65 staticCDNHost = strings.TrimSuffix(staticCDNHost, "/") 66 + canonicalInstance := cctx.Bool("bsky-canonical-instance") 67 + robotsDisallowAll := cctx.Bool("robots-disallow-all") 66 68 67 69 // Echo 68 70 e := echo.New() ··· 204 206 return http.FS(fsys) 205 207 }()) 206 208 207 - e.GET("/robots.txt", echo.WrapHandler(staticHandler)) 208 - e.GET("/ips-v4", echo.WrapHandler(staticHandler)) 209 - e.GET("/ips-v6", echo.WrapHandler(staticHandler)) 210 - e.GET("/.well-known/*", echo.WrapHandler(staticHandler)) 211 - e.GET("/security.txt", func(c echo.Context) error { 212 - return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt") 213 - }) 209 + // enable some special endpoints for the "canonical" deployment (bsky.app). not having these enabled should *not* impact regular operation 210 + if canonicalInstance { 211 + e.GET("/ips-v4", echo.WrapHandler(staticHandler)) 212 + e.GET("/ips-v6", echo.WrapHandler(staticHandler)) 213 + e.GET("/security.txt", func(c echo.Context) error { 214 + return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt") 215 + }) 216 + e.GET("/.well-known/*", echo.WrapHandler(staticHandler)) 217 + } 218 + 219 + // default to permissive, but Disallow all if flag set 220 + if robotsDisallowAll { 221 + e.File("/robots.txt", "static/robots-disallow-all.txt") 222 + } else { 223 + e.GET("/robots.txt", echo.WrapHandler(staticHandler)) 224 + } 225 + 214 226 e.GET("/iframe/youtube.html", echo.WrapHandler(staticHandler)) 215 227 e.GET("/static/*", echo.WrapHandler(http.StripPrefix("/static/", staticHandler)), func(next echo.HandlerFunc) echo.HandlerFunc { 216 228 return func(c echo.Context) error {
+3
bskyweb/static/robots-disallow-all.txt
··· 1 + # This is an development or self-hosted instance of the bsky web app, and crawling has been disallowed by the operator team. 2 + User-Agent: * 3 + Disallow: /