go scratch code for atproto

copy lexidex code from unmerged indigo branch

+2029
+39
cmd/lexidex/README.md
··· 1 + 2 + lexidex: experimental atproto Lexicon index 3 + =========================================== 4 + 5 + ⚠️ This is a fun little proof-of-concept ⚠️ 6 + 7 + 8 + ## Run It 9 + 10 + The recommended way to run `lexidex` is behind a `caddy` HTTPS server which does automatic on-demand SSL certificate registration (using Let's Encrypt). 11 + 12 + Build and run `lexidex`: 13 + 14 + go build ./cmd/lexidex 15 + 16 + # will listen on :8400 by default 17 + ./lexidex serve 18 + 19 + Create a `Caddyfile`: 20 + 21 + ``` 22 + { 23 + on_demand_tls { 24 + interval 1h 25 + burst 8 26 + } 27 + } 28 + 29 + :443 { 30 + reverse_proxy localhost:8400 31 + tls YOUREMAIL@example.com { 32 + on_demand 33 + } 34 + } 35 + ``` 36 + 37 + Run `caddy`: 38 + 39 + caddy run
+55
cmd/lexidex/consumer.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "log/slog" 7 + 8 + "github.com/bluesky-social/indigo/atproto/syntax" 9 + 10 + "github.com/bluesky-social/jetstream/pkg/client" 11 + "github.com/bluesky-social/jetstream/pkg/client/schedulers/sequential" 12 + "github.com/bluesky-social/jetstream/pkg/models" 13 + ) 14 + 15 + func (srv *WebServer) processJetstreamEvent(ctx context.Context, event *models.Event) error { 16 + if event.Commit != nil { 17 + if event.Commit.Collection != "com.atproto.lexicon.schema" { 18 + return nil 19 + } 20 + slog.Info("jetstream event", "did", event.Did, "collection", event.Commit.Collection, "rkey", event.Commit.RKey, "rev", event.Commit.Rev) 21 + nsid, err := syntax.ParseNSID(event.Commit.RKey) 22 + if err != nil { 23 + return fmt.Errorf("invalid NSID in lexicon record: %s", event.Commit.RKey) 24 + } 25 + if err := CrawlLexicon(ctx, srv.db, nsid, "firehose"); err != nil { 26 + slog.Error("failed to crawl lexicon", "nsid", nsid, "reason", "firehose") 27 + } 28 + } 29 + return nil 30 + } 31 + 32 + func (srv *WebServer) RunConsumer() error { 33 + 34 + logger := slog.Default() 35 + 36 + cfg := client.DefaultClientConfig() 37 + cfg.Compress = true 38 + cfg.WebsocketURL = srv.jetstreamHost 39 + cfg.WantedCollections = []string{"com.atproto.lexicon.schema"} 40 + 41 + sched := sequential.NewScheduler("lexidex", logger, srv.processJetstreamEvent) 42 + 43 + jc, err := client.NewClient(cfg, logger, sched) 44 + if err != nil { 45 + return err 46 + } 47 + 48 + var cursor *int64 49 + go func() { 50 + ctx := context.Background() 51 + logger.Info("starting jetstream consumer", "cursor", cursor) 52 + jc.ConnectAndRead(ctx, cursor) 53 + }() 54 + return nil 55 + }
+138
cmd/lexidex/crawl.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "errors" 7 + "fmt" 8 + 9 + "github.com/bluesky-social/indigo/api/agnostic" 10 + comatproto "github.com/bluesky-social/indigo/api/atproto" 11 + "github.com/bluesky-social/indigo/atproto/identity" 12 + "github.com/bluesky-social/indigo/atproto/lexicon" 13 + "github.com/bluesky-social/indigo/atproto/syntax" 14 + "github.com/bluesky-social/indigo/xrpc" 15 + 16 + "gorm.io/gorm" 17 + "gorm.io/gorm/clause" 18 + ) 19 + 20 + func CrawlLexicon(ctx context.Context, db *gorm.DB, nsid syntax.NSID, reason string) error { 21 + 22 + // TODO: inject directory 23 + dir := identity.BaseDirectory{} 24 + 25 + domain, err := extractDomain(nsid) 26 + if err != nil { 27 + return fmt.Errorf("extracting domain for NSID: %w", err) 28 + } 29 + group, err := extractGroup(nsid) 30 + if err != nil { 31 + return fmt.Errorf("extracting group for NSID: %w", err) 32 + } 33 + 34 + tx := db.WithContext(ctx) 35 + crawl := &Crawl{ 36 + NSID: nsid, 37 + Reason: reason, 38 + } 39 + 40 + // check that domain isn't blocked 41 + var dom Domain 42 + err = tx.Limit(1).Find(&dom, "domain = ?", domain).Error 43 + if err != nil && errors.Is(err, gorm.ErrRecordNotFound) { 44 + // pass 45 + } else if err != nil { 46 + return err 47 + } 48 + if dom.Disabled { 49 + return fmt.Errorf("not crawling disabled domain: %s", nsid) 50 + } 51 + 52 + // resolve 53 + did, err := dir.ResolveNSID(ctx, nsid) 54 + if err != nil { 55 + crawl.Status = "error-resolve-nsid" 56 + tx.Create(crawl) 57 + return err 58 + } 59 + crawl.DID = did 60 + // TODO: normalize DID? 61 + ident, err := dir.LookupDID(ctx, did) 62 + if err != nil { 63 + crawl.Status = "error-did" 64 + tx.Create(crawl) 65 + return err 66 + } 67 + // TODO: "proof chain" 68 + xrpcc := &xrpc.Client{ 69 + Host: ident.PDSEndpoint(), 70 + } 71 + resp, err := agnostic.RepoGetRecord(ctx, xrpcc, "", "com.atproto.lexicon.schema", ident.DID.String(), nsid.String()) 72 + if err != nil { 73 + crawl.Status = "error-repo-fetch" 74 + tx.Create(crawl) 75 + return err 76 + } 77 + if nil == resp.Value { 78 + crawl.Status = "empty-record" 79 + tx.Create(crawl) 80 + return fmt.Errorf("empty record in response") 81 + } 82 + cid, err := syntax.ParseCID(*resp.Cid) 83 + if err != nil { 84 + crawl.Status = "bad-record-cid" 85 + tx.Create(crawl) 86 + return err 87 + } 88 + crawl.RecordCID = cid 89 + 90 + // verify schema 91 + var sf lexicon.SchemaFile 92 + if err := json.Unmarshal(*resp.Value, &sf); err != nil { 93 + return fmt.Errorf("fetched Lexicon schema record was invalid: %w", err) 94 + } 95 + // TODO: check that NSID matches record field 96 + cat := lexicon.NewBaseCatalog() 97 + if err := cat.AddSchemaFile(sf); err != nil { 98 + crawl.Status = "bad-schema-check" 99 + tx.Create(crawl) 100 + return fmt.Errorf("lexicon format was invalid: %w", err) 101 + } 102 + 103 + latest, err := comatproto.SyncGetLatestCommit(ctx, xrpcc, did.String()) 104 + if err != nil { 105 + return err 106 + } 107 + crawl.RepoRev = latest.Rev 108 + 109 + if dom.Domain != domain { 110 + dom = Domain{Domain: domain} 111 + tx.Create(&dom) 112 + } 113 + 114 + crawl.Status = "success" 115 + tx.Create(crawl) 116 + 117 + version := &Version{ 118 + RecordCID: cid, 119 + NSID: nsid, 120 + Record: *resp.Value, 121 + } 122 + tx.Clauses(clause.OnConflict{DoNothing: true}).Create(version) 123 + 124 + lex := &Lexicon{ 125 + NSID: nsid, 126 + Domain: domain, 127 + Group: group, 128 + Latest: cid, 129 + } 130 + res := tx.Clauses(clause.OnConflict{ 131 + Columns: []clause.Column{{Name: "nsid"}}, 132 + DoUpdates: clause.AssignmentColumns([]string{"latest"}), 133 + }).Create(lex) 134 + if res.Error != nil { 135 + return fmt.Errorf("error saving crawl to database: %w", res.Error) 136 + } 137 + return nil 138 + }
+70
cmd/lexidex/example.lexicon.query.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "example.lexicon.query", 4 + "revision": 1, 5 + "description": "exersizes many lexicon features for the query type", 6 + "defs": { 7 + "main": { 8 + "type": "query", 9 + "description": "a query type", 10 + "parameters": { 11 + "type": "params", 12 + "description": "a params type", 13 + "required": ["string"], 14 + "properties": { 15 + "boolean": { 16 + "type": "boolean", 17 + "description": "field of type boolean" 18 + }, 19 + "integer": { 20 + "type": "integer", 21 + "description": "field of type integer" 22 + }, 23 + "string": { 24 + "type": "string", 25 + "description": "field of type string" 26 + }, 27 + "handle": { 28 + "type": "string", 29 + "format": "handle", 30 + "description": "field of type string, format handle" 31 + }, 32 + "unknown": { 33 + "type": "unknown", 34 + "description": "field of type unknown" 35 + }, 36 + "array": { 37 + "type": "array", 38 + "description": "field of type array", 39 + "items": { "type": "integer" } 40 + } 41 + } 42 + }, 43 + "output": { 44 + "description": "output body type", 45 + "encoding": "application/json", 46 + "schema": { 47 + "type": "object", 48 + "properties": { 49 + "a": { 50 + "type": "integer" 51 + }, 52 + "b": { 53 + "type": "integer" 54 + } 55 + } 56 + } 57 + }, 58 + "errors": [ 59 + { 60 + "name": "DemoError", 61 + "description": "demo error value" 62 + }, 63 + { 64 + "name": "AnotherDemoError", 65 + "description": "another demo error value" 66 + } 67 + ] 68 + } 69 + } 70 + }
+234
cmd/lexidex/example.lexicon.record.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "example.lexicon.record", 4 + "revision": 1, 5 + "description": "exersizes many lexicon features for the record type", 6 + "defs": { 7 + "main": { 8 + "type": "record", 9 + "key": "literal:demo", 10 + "description": "a record type with many field", 11 + "record": { 12 + "required": [ "integer" ], 13 + "nullable": [ "nullableString" ], 14 + "properties": { 15 + "null": { 16 + "type": "null", 17 + "description": "field of type null" 18 + }, 19 + "boolean": { 20 + "type": "boolean", 21 + "description": "field of type boolean" 22 + }, 23 + "integer": { 24 + "type": "integer", 25 + "description": "field of type integer" 26 + }, 27 + "string": { 28 + "type": "string", 29 + "description": "field of type string" 30 + }, 31 + "nullableString": { 32 + "type": "string", 33 + "description": "field of type string; value is nullable" 34 + }, 35 + "bytes": { 36 + "type": "bytes", 37 + "description": "field of type bytes" 38 + }, 39 + "cid-link": { 40 + "type": "cid-link", 41 + "description": "field of type cid-link" 42 + }, 43 + "blob": { 44 + "type": "blob", 45 + "description": "field of type blob" 46 + }, 47 + "unknown": { 48 + "type": "unknown", 49 + "description": "field of type unknown" 50 + }, 51 + "array": { 52 + "type": "array", 53 + "description": "field of type array", 54 + "items": { "type": "integer" } 55 + }, 56 + "object": { 57 + "type": "object", 58 + "description": "field of type object", 59 + "properties": { 60 + "a": { "type": "integer" }, 61 + "b": { "type": "integer" } 62 + } 63 + }, 64 + "ref": { 65 + "type": "ref", 66 + "description": "field of type ref", 67 + "ref": "example.lexicon.record#demoToken" 68 + }, 69 + "union": { 70 + "type": "union", 71 + "refs": [ 72 + "example.lexicon.record#demoObject", 73 + "example.lexicon.record#demoObjectTwo" 74 + ] 75 + }, 76 + "formats": { 77 + "type": "ref", 78 + "ref": "example.lexicon.record#stringFormats" 79 + }, 80 + "constInteger": { 81 + "type": "integer", 82 + "const": 42 83 + }, 84 + "defaultInteger": { 85 + "type": "integer", 86 + "default": 42 87 + }, 88 + "enumInteger": { 89 + "type": "integer", 90 + "enum": [4, 9, 16, 25] 91 + }, 92 + "rangeInteger": { 93 + "type": "integer", 94 + "minimum": 10, 95 + "maximum": 20 96 + }, 97 + "lenString": { 98 + "type": "string", 99 + "minLength": 10, 100 + "maxLength": 20 101 + }, 102 + "graphemeString": { 103 + "type": "string", 104 + "minGraphemes": 10, 105 + "maxGraphemes": 20 106 + }, 107 + "enumString": { 108 + "type": "string", 109 + "enum": ["fish", "tree", "rock"] 110 + }, 111 + "knownString": { 112 + "type": "string", 113 + "knownValues": ["blue", "green", "red"] 114 + }, 115 + "sizeBytes": { 116 + "type": "bytes", 117 + "minLength": 10, 118 + "maxLength": 20 119 + }, 120 + "lenArray": { 121 + "type": "array", 122 + "items": { "type": "integer" }, 123 + "minLength": 2, 124 + "maxLength": 5 125 + }, 126 + "sizeBlob": { 127 + "type": "blob", 128 + "maxSize": 20 129 + }, 130 + "acceptBlob": { 131 + "type": "blob", 132 + "accept": [ "image/*" ] 133 + }, 134 + "closedUnion": { 135 + "type": "union", 136 + "refs": [ 137 + "example.lexicon.record#demoObject" 138 + ], 139 + "closed": true 140 + } 141 + } 142 + } 143 + }, 144 + "stringFormats": { 145 + "type": "object", 146 + "description": "all the various string format types", 147 + "properties": { 148 + "did": { 149 + "type": "string", 150 + "format": "did", 151 + "description": "a did string" 152 + }, 153 + "handle": { 154 + "type": "string", 155 + "format": "handle", 156 + "description": "a did string" 157 + }, 158 + "atidentifier": { 159 + "type": "string", 160 + "format": "at-identifier", 161 + "description": "an at-identifier string" 162 + }, 163 + "nsid": { 164 + "type": "string", 165 + "format": "nsid", 166 + "description": "an nsid string" 167 + }, 168 + "aturi": { 169 + "type": "string", 170 + "format": "at-uri", 171 + "description": "an at-uri string" 172 + }, 173 + "cid": { 174 + "type": "string", 175 + "format": "cid", 176 + "description": "a cid string (not a cid-link)" 177 + }, 178 + "datetime": { 179 + "type": "string", 180 + "format": "datetime", 181 + "description": "a datetime string" 182 + }, 183 + "language": { 184 + "type": "string", 185 + "format": "language", 186 + "description": "a language string" 187 + }, 188 + "uri": { 189 + "type": "string", 190 + "format": "uri", 191 + "description": "a generic URI field" 192 + }, 193 + "tid": { 194 + "type": "string", 195 + "format": "tid", 196 + "description": "a generic TID field" 197 + }, 198 + "recordkey": { 199 + "type": "string", 200 + "format": "record-key", 201 + "description": "a generic record-key field" 202 + } 203 + } 204 + }, 205 + "demoToken": { 206 + "type": "token", 207 + "description": "an example of what a token looks like" 208 + }, 209 + "demoObject": { 210 + "type": "object", 211 + "description": "smaller object schema for unions", 212 + "properties": { 213 + "a": { 214 + "type": "integer" 215 + }, 216 + "b": { 217 + "type": "integer" 218 + } 219 + } 220 + }, 221 + "demoObjectTwo": { 222 + "type": "object", 223 + "description": "smaller object schema for unions", 224 + "properties": { 225 + "c": { 226 + "type": "integer" 227 + }, 228 + "d": { 229 + "type": "integer" 230 + } 231 + } 232 + } 233 + } 234 + }
+145
cmd/lexidex/load.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "errors" 7 + "fmt" 8 + "io" 9 + "io/fs" 10 + "log/slog" 11 + "os" 12 + "path/filepath" 13 + "strings" 14 + 15 + "github.com/bluesky-social/indigo/atproto/data" 16 + "github.com/bluesky-social/indigo/atproto/lexicon" 17 + "github.com/bluesky-social/indigo/atproto/syntax" 18 + 19 + "github.com/ipfs/go-cid" 20 + "github.com/multiformats/go-multihash" 21 + "gorm.io/gorm" 22 + "gorm.io/gorm/clause" 23 + ) 24 + 25 + func loadSchema(ctx context.Context, db *gorm.DB, raw json.RawMessage) error { 26 + 27 + // verify schema 28 + var sf lexicon.SchemaFile 29 + if err := json.Unmarshal(raw, &sf); err != nil { 30 + return fmt.Errorf("fetched Lexicon schema record was invalid: %w", err) 31 + } 32 + cat := lexicon.NewBaseCatalog() 33 + if err := cat.AddSchemaFile(sf); err != nil { 34 + return fmt.Errorf("lexicon format was invalid: %w", err) 35 + } 36 + 37 + nsid, err := syntax.ParseNSID(sf.ID) 38 + if err != nil { 39 + return err 40 + } 41 + domain, err := extractDomain(nsid) 42 + if err != nil { 43 + return fmt.Errorf("extracting domain for NSID: %w", err) 44 + } 45 + group, err := extractGroup(nsid) 46 + if err != nil { 47 + return fmt.Errorf("extracting group for NSID: %w", err) 48 + } 49 + 50 + // compute CID 51 + rec, err := data.UnmarshalJSON(raw) 52 + if err != nil { 53 + return err 54 + } 55 + cbytes, err := data.MarshalCBOR(rec) 56 + if err != nil { 57 + return err 58 + } 59 + c, err := cid.NewPrefixV1(cid.Raw, multihash.SHA2_256).Sum(cbytes) 60 + if err != nil { 61 + return err 62 + } 63 + recordCID := syntax.CID(c.String()) 64 + 65 + tx := db.WithContext(ctx) 66 + crawl := &Crawl{ 67 + NSID: nsid, 68 + Reason: "load-file", 69 + RecordCID: recordCID, 70 + } 71 + 72 + // check that domain isn't blocked 73 + var dom Domain 74 + err = tx.Limit(1).Find(&dom, "domain = ?", domain).Error 75 + if err != nil && errors.Is(err, gorm.ErrRecordNotFound) { 76 + // pass 77 + } else if err != nil { 78 + return err 79 + } 80 + if dom.Disabled { 81 + return fmt.Errorf("not crawling disabled domain: %s", nsid) 82 + } 83 + 84 + if dom.Domain != domain { 85 + dom = Domain{Domain: domain} 86 + tx.Create(&dom) 87 + } 88 + 89 + crawl.Status = "success" 90 + tx.Create(crawl) 91 + 92 + version := &Version{ 93 + RecordCID: recordCID, 94 + NSID: nsid, 95 + Record: raw, 96 + } 97 + tx.Clauses(clause.OnConflict{DoNothing: true}).Create(version) 98 + 99 + lex := &Lexicon{ 100 + NSID: nsid, 101 + Domain: domain, 102 + Group: group, 103 + Latest: recordCID, 104 + } 105 + res := tx.Clauses(clause.OnConflict{ 106 + Columns: []clause.Column{{Name: "nsid"}}, 107 + DoUpdates: clause.AssignmentColumns([]string{"latest"}), 108 + }).Create(lex) 109 + if res.Error != nil { 110 + return fmt.Errorf("error saving crawl to database: %w", res.Error) 111 + } 112 + return nil 113 + } 114 + 115 + func LoadDirectory(ctx context.Context, db *gorm.DB, p string) error { 116 + 117 + walkFunc := func(p string, d fs.DirEntry, err error) error { 118 + if err != nil { 119 + return err 120 + } 121 + if d.IsDir() { 122 + return nil 123 + } 124 + if !strings.HasSuffix(p, ".json") { 125 + return nil 126 + } 127 + slog.Info("loading Lexicon schema file", "path", p) 128 + f, err := os.Open(p) 129 + if err != nil { 130 + return err 131 + } 132 + defer func() { _ = f.Close() }() 133 + 134 + b, err := io.ReadAll(f) 135 + if err != nil { 136 + return err 137 + } 138 + var obj json.RawMessage 139 + if err := json.Unmarshal(b, &obj); err != nil { 140 + return fmt.Errorf("Lexicon schema record was invalid: %w", err) 141 + } 142 + return loadSchema(ctx, db, obj) 143 + } 144 + return filepath.WalkDir(p, walkFunc) 145 + }
+135
cmd/lexidex/main.go
··· 1 + package main 2 + 3 + import ( 4 + "fmt" 5 + "log/slog" 6 + "os" 7 + 8 + _ "github.com/joho/godotenv/autoload" 9 + 10 + "github.com/bluesky-social/indigo/atproto/syntax" 11 + 12 + "github.com/carlmjohnson/versioninfo" 13 + "github.com/urfave/cli/v2" 14 + "gorm.io/driver/sqlite" 15 + "gorm.io/gorm" 16 + ) 17 + 18 + var ( 19 + version = versioninfo.Short() 20 + ) 21 + 22 + func main() { 23 + if err := run(os.Args); err != nil { 24 + slog.Error("fatal", "err", err) 25 + os.Exit(-1) 26 + } 27 + } 28 + 29 + func run(args []string) error { 30 + 31 + app := cli.App{ 32 + Name: "lexidex", 33 + Usage: "atproto Lexicon index and schema browser", 34 + Flags: []cli.Flag{ 35 + &cli.StringFlag{ 36 + Name: "sqlite-path", 37 + Usage: "Database file path", 38 + Value: "./lexidex.sqlite", 39 + EnvVars: []string{"LEXIDEX_SQLITE_PATH"}, 40 + }, 41 + &cli.StringFlag{ 42 + Name: "jetstream-host", 43 + Usage: "URL (scheme, host, path) to jetstream host for firehose consumption", 44 + Value: "wss://jetstream2.us-west.bsky.network/subscribe", 45 + EnvVars: []string{"LEXIDEX_JETSTREAM_HOST"}, 46 + }, 47 + }, 48 + } 49 + 50 + app.Commands = []*cli.Command{ 51 + &cli.Command{ 52 + Name: "serve", 53 + Usage: "run the server", 54 + Action: runServe, 55 + Flags: []cli.Flag{ 56 + &cli.StringFlag{ 57 + Name: "bind", 58 + Usage: "Specify the local IP/port to bind to", 59 + Required: false, 60 + Value: ":8500", 61 + EnvVars: []string{"LEXIDEX_BIND"}, 62 + }, 63 + }, 64 + }, 65 + &cli.Command{ 66 + Name: "crawl", 67 + Usage: "crawl a single NSID", 68 + Action: runCrawl, 69 + }, 70 + &cli.Command{ 71 + Name: "load-dir", 72 + Usage: "load lexicons from a local file directory", 73 + Action: runLoadDir, 74 + }, 75 + &cli.Command{ 76 + Name: "version", 77 + Usage: "print version", 78 + Action: func(cctx *cli.Context) error { 79 + fmt.Println(version) 80 + return nil 81 + }, 82 + }, 83 + } 84 + 85 + return app.Run(args) 86 + } 87 + 88 + func runServe(cctx *cli.Context) error { 89 + srv, err := NewWebServer(cctx) 90 + if err != nil { 91 + return err 92 + } 93 + RunAllMigrations(srv.db) 94 + 95 + srv.RunWeb() 96 + srv.RunConsumer() 97 + return srv.RunSignalHandler() 98 + } 99 + 100 + func runCrawl(cctx *cli.Context) error { 101 + ctx := cctx.Context 102 + 103 + s := cctx.Args().First() 104 + if s == "" { 105 + return fmt.Errorf("need to provide Lexicon NSID as an argument") 106 + } 107 + nsid, err := syntax.ParseNSID(s) 108 + if err != nil { 109 + return err 110 + } 111 + 112 + db, err := gorm.Open(sqlite.Open(cctx.String("sqlite-path"))) 113 + if err != nil { 114 + return fmt.Errorf("failed to open db: %w", err) 115 + } 116 + 117 + RunAllMigrations(db) 118 + return CrawlLexicon(ctx, db, nsid, "cli") 119 + } 120 + 121 + func runLoadDir(cctx *cli.Context) error { 122 + ctx := cctx.Context 123 + 124 + p := cctx.Args().First() 125 + if p == "" { 126 + return fmt.Errorf("need to provide directory path as an argument") 127 + } 128 + 129 + db, err := gorm.Open(sqlite.Open(cctx.String("sqlite-path"))) 130 + if err != nil { 131 + return fmt.Errorf("failed to open db: %w", err) 132 + } 133 + RunAllMigrations(db) 134 + return LoadDirectory(ctx, db, p) 135 + }
+50
cmd/lexidex/models.go
··· 1 + package main 2 + 3 + import ( 4 + "encoding/json" 5 + "time" 6 + 7 + "github.com/bluesky-social/indigo/atproto/syntax" 8 + 9 + "gorm.io/gorm" 10 + ) 11 + 12 + func RunAllMigrations(db *gorm.DB) { 13 + db.AutoMigrate(&Domain{}) 14 + db.AutoMigrate(&Lexicon{}) 15 + db.AutoMigrate(&Version{}) 16 + db.AutoMigrate(&Crawl{}) 17 + } 18 + 19 + // A domain name with NSIDs grouped below it. Roughly aligns with public-suffix-list, though we might end up with atmosphere-specific overrides. Can hide indexing from the front page, or entirely disable indexing. 20 + type Domain struct { 21 + Domain string `gorm:"primaryKey"` 22 + Hidden bool 23 + Disabled bool 24 + } 25 + 26 + type Lexicon struct { 27 + NSID syntax.NSID `gorm:"primaryKey;column:nsid"` 28 + Domain string 29 + Group string 30 + Latest syntax.CID 31 + } 32 + 33 + type Version struct { 34 + RecordCID syntax.CID `gorm:"primaryKey;column:record_cid"` 35 + NSID syntax.NSID `gorm:"index;column:nsid"` 36 + Record json.RawMessage `gorm:"serializer:json"` 37 + } 38 + 39 + type Crawl struct { 40 + ID uint `gorm:"primaryKey"` 41 + CreatedAt time.Time 42 + NSID syntax.NSID `gorm:"index;column:nsid"` 43 + Reason string 44 + Status string 45 + 46 + DID syntax.DID `gorm:"index;column:did"` 47 + RecordCID syntax.CID `gorm:"index;column:record_cid"` 48 + RepoRev string 49 + Extra map[string]any `gorm:"serializer:json"` 50 + }
+85
cmd/lexidex/renderer.go
··· 1 + package main 2 + 3 + import ( 4 + "bytes" 5 + "embed" 6 + "errors" 7 + "fmt" 8 + "io" 9 + "path/filepath" 10 + 11 + "github.com/flosch/pongo2/v6" 12 + "github.com/labstack/echo/v4" 13 + ) 14 + 15 + //go:embed templates/* 16 + var TemplateFS embed.FS 17 + 18 + type RendererLoader struct { 19 + prefix string 20 + fs *embed.FS 21 + } 22 + 23 + func NewRendererLoader(prefix string, fs *embed.FS) pongo2.TemplateLoader { 24 + return &RendererLoader{ 25 + prefix: prefix, 26 + fs: fs, 27 + } 28 + } 29 + func (l *RendererLoader) Abs(_, name string) string { 30 + // TODO: remove this workaround 31 + // Figure out why this method is being called 32 + // twice on template names resulting in a failure to resolve 33 + // the template name. 34 + if filepath.HasPrefix(name, l.prefix) { 35 + return name 36 + } 37 + return filepath.Join(l.prefix, name) 38 + } 39 + 40 + func (l *RendererLoader) Get(path string) (io.Reader, error) { 41 + b, err := l.fs.ReadFile(path) 42 + if err != nil { 43 + return nil, fmt.Errorf("reading template %q failed: %w", path, err) 44 + } 45 + return bytes.NewReader(b), nil 46 + } 47 + 48 + type Renderer struct { 49 + TemplateSet *pongo2.TemplateSet 50 + Debug bool 51 + } 52 + 53 + func NewRenderer(prefix string, fs *embed.FS, debug bool) *Renderer { 54 + return &Renderer{ 55 + TemplateSet: pongo2.NewSet(prefix, NewRendererLoader(prefix, fs)), 56 + Debug: debug, 57 + } 58 + } 59 + 60 + func (r Renderer) Render(w io.Writer, name string, data interface{}, c echo.Context) error { 61 + var ctx pongo2.Context 62 + 63 + if data != nil { 64 + var ok bool 65 + ctx, ok = data.(pongo2.Context) 66 + if !ok { 67 + return errors.New("no pongo2.Context data was passed") 68 + } 69 + } 70 + 71 + var t *pongo2.Template 72 + var err error 73 + 74 + if r.Debug { 75 + t, err = pongo2.FromFile(name) 76 + } else { 77 + t, err = r.TemplateSet.FromFile(name) 78 + } 79 + 80 + if err != nil { 81 + return err 82 + } 83 + 84 + return t.ExecuteWriter(ctx, w) 85 + }
+268
cmd/lexidex/schema.go
··· 1 + package main 2 + 3 + import ( 4 + "fmt" 5 + "sort" 6 + 7 + "github.com/bluesky-social/indigo/atproto/lexicon" 8 + "github.com/bluesky-social/indigo/atproto/syntax" 9 + ) 10 + 11 + // Re-structured version of `lexicon.SchemaFile` which is much easier to render as documentation. 12 + type Def struct { 13 + NSID syntax.NSID 14 + Type string 15 + Description *string 16 + Name string // def fragment; or field name 17 + 18 + //ViaRef string // if this was included via reference 19 + 20 + // objects, records, API endpoints, etc 21 + Fields []Def 22 + Errors []lexicon.SchemaError 23 + 24 + // API methods: query, procedure 25 + QueryParams []Def 26 + Output *Def 27 + OutputEncoding string 28 + Input *Def 29 + InputEncoding string 30 + 31 + // record 32 + KeyType string 33 + 34 + // unions 35 + // TODO: transclude these in? with some depth limit? 36 + Options []string 37 + Closed bool 38 + 39 + // array 40 + Items *Def 41 + 42 + // fields 43 + Required bool 44 + Nullable bool 45 + 46 + // ref 47 + Ref string 48 + 49 + Min *int 50 + Max *int 51 + 52 + // concrete types 53 + SchemaBoolean *lexicon.SchemaBoolean 54 + SchemaInteger *lexicon.SchemaInteger 55 + SchemaString *lexicon.SchemaString 56 + SchemaBytes *lexicon.SchemaBytes 57 + SchemaBlob *lexicon.SchemaBlob 58 + } 59 + 60 + func sortedKeys(m map[string]lexicon.SchemaDef) []string { 61 + keys := make([]string, len(m)) 62 + i := 0 63 + for k, _ := range m { 64 + keys[i] = k 65 + i++ 66 + } 67 + sort.Strings(keys) 68 + return keys 69 + } 70 + 71 + func ParseSchemaFile(sf *lexicon.SchemaFile, nsid syntax.NSID) ([]Def, error) { 72 + out := make([]Def, len(sf.Defs)) 73 + i := 0 74 + mainIndex := -1 75 + for _, name := range sortedKeys(sf.Defs) { 76 + sd := sf.Defs[name] 77 + if name == "main" { 78 + mainIndex = i 79 + } 80 + doc, err := ParseSchemaDef(sd.Inner, nsid, name) 81 + if err != nil { 82 + return nil, fmt.Errorf("failed to parse %s: %w", name, err) 83 + } 84 + out[i] = *doc 85 + i++ 86 + } 87 + // sort 'main' to front (if it exists) 88 + // TODO: have this not mess up other sort order 89 + if mainIndex > 0 { 90 + out[0], out[mainIndex] = out[mainIndex], out[0] 91 + } 92 + return out, nil 93 + } 94 + 95 + func stringInList(list []string, val string) bool { 96 + for _, v := range list { 97 + if v == val { 98 + return true 99 + } 100 + } 101 + return false 102 + } 103 + 104 + func ParseFields(properties map[string]lexicon.SchemaDef, required []string, nullable []string, nsid syntax.NSID) ([]Def, error) { 105 + out := []Def{} 106 + // TODO: sort keys 107 + for _, name := range sortedKeys(properties) { 108 + def, err := ParseSchemaDef(properties[name].Inner, nsid, name) 109 + if err != nil { 110 + return nil, err 111 + } 112 + if stringInList(required, name) { 113 + def.Required = true 114 + } 115 + if stringInList(nullable, name) { 116 + def.Nullable = true 117 + } 118 + out = append(out, *def) 119 + } 120 + return out, nil 121 + } 122 + 123 + func ParseSchemaDef(raw any, nsid syntax.NSID, name string) (*Def, error) { 124 + def := Def{ 125 + NSID: nsid, 126 + Name: name, 127 + } 128 + if raw == nil { 129 + return nil, fmt.Errorf("nil SchemaDef") 130 + } 131 + switch s := raw.(type) { 132 + case lexicon.SchemaRecord: 133 + def.Type = "record" 134 + def.Description = s.Description 135 + def.KeyType = s.Key 136 + fields, err := ParseFields(s.Record.Properties, s.Record.Required, s.Record.Nullable, nsid) 137 + if err != nil { 138 + return nil, err 139 + } 140 + def.Fields = fields 141 + case lexicon.SchemaQuery: 142 + def.Type = "query" 143 + def.Description = s.Description 144 + def.Errors = s.Errors 145 + qp, err := ParseFields(s.Parameters.Properties, s.Parameters.Required, []string{}, nsid) 146 + if err != nil { 147 + return nil, err 148 + } 149 + def.Fields = qp 150 + if s.Output != nil { 151 + def.OutputEncoding = s.Output.Encoding 152 + if s.Output.Schema != nil { 153 + def.Output, err = ParseSchemaDef(s.Output.Schema.Inner, nsid, "") 154 + if err != nil { 155 + return nil, err 156 + } 157 + } 158 + } 159 + case lexicon.SchemaProcedure: 160 + def.Type = "procedure" 161 + def.Description = s.Description 162 + def.Errors = s.Errors 163 + qp, err := ParseFields(s.Parameters.Properties, s.Parameters.Required, []string{}, nsid) 164 + if err != nil { 165 + return nil, err 166 + } 167 + def.Fields = qp 168 + if s.Output != nil { 169 + def.OutputEncoding = s.Output.Encoding 170 + if s.Output.Schema != nil { 171 + def.Output, err = ParseSchemaDef(s.Output.Schema.Inner, nsid, "") 172 + if err != nil { 173 + return nil, err 174 + } 175 + } 176 + } 177 + if s.Input != nil { 178 + def.InputEncoding = s.Input.Encoding 179 + if s.Input.Schema != nil { 180 + def.Input, err = ParseSchemaDef(s.Input.Schema.Inner, nsid, "") 181 + if err != nil { 182 + return nil, err 183 + } 184 + } 185 + } 186 + case lexicon.SchemaSubscription: 187 + def.Type = "subscription" 188 + def.Description = s.Description 189 + qp, err := ParseFields(s.Parameters.Properties, s.Parameters.Required, []string{}, nsid) 190 + if err != nil { 191 + return nil, err 192 + } 193 + def.Fields = qp 194 + if s.Message == nil { 195 + return nil, fmt.Errorf("empty subscription message type") 196 + } 197 + u, ok := s.Message.Schema.Inner.(lexicon.SchemaUnion) 198 + if !ok { 199 + return nil, fmt.Errorf("subscription message must be a union") 200 + } 201 + def.Closed = u.Closed != nil && *u.Closed 202 + def.Options = u.Refs 203 + 204 + case lexicon.SchemaBoolean: 205 + def.Type = "boolean" 206 + def.Description = s.Description 207 + def.SchemaBoolean = &s 208 + case lexicon.SchemaInteger: 209 + def.Type = "integer" 210 + def.Description = s.Description 211 + def.SchemaInteger = &s 212 + case lexicon.SchemaString: 213 + def.Type = "string" 214 + def.Description = s.Description 215 + def.SchemaString = &s 216 + case lexicon.SchemaBytes: 217 + def.Type = "bytes" 218 + def.Description = s.Description 219 + def.SchemaBytes = &s 220 + case lexicon.SchemaBlob: 221 + def.Type = "blob" 222 + def.Description = s.Description 223 + def.SchemaBlob = &s 224 + case lexicon.SchemaNull: 225 + def.Type = "null" 226 + def.Description = s.Description 227 + case lexicon.SchemaCIDLink: 228 + def.Type = "cid-link" 229 + def.Description = s.Description 230 + case lexicon.SchemaArray: 231 + def.Type = "array" 232 + def.Description = s.Description 233 + def.Min = s.MinLength 234 + def.Max = s.MaxLength 235 + d, err := ParseSchemaDef(s.Items.Inner, nsid, "") 236 + if err != nil { 237 + return nil, err 238 + } 239 + def.Items = d 240 + case lexicon.SchemaObject: 241 + def.Type = "object" 242 + def.Description = s.Description 243 + f, err := ParseFields(s.Properties, s.Required, s.Nullable, nsid) 244 + if err != nil { 245 + return nil, err 246 + } 247 + def.Fields = f 248 + 249 + case lexicon.SchemaToken: 250 + def.Type = "token" 251 + def.Description = s.Description 252 + case lexicon.SchemaRef: 253 + def.Type = "ref" 254 + def.Description = s.Description 255 + def.Ref = s.Ref 256 + case lexicon.SchemaUnion: 257 + def.Type = "union" 258 + def.Description = s.Description 259 + def.Closed = s.Closed != nil && *s.Closed 260 + def.Options = s.Refs 261 + case lexicon.SchemaUnknown: 262 + def.Type = "unknown" 263 + def.Description = s.Description 264 + default: 265 + return nil, fmt.Errorf("unhandled SchemaDef type: %T", s) 266 + } 267 + return &def, nil 268 + }
cmd/lexidex/static/apple-touch-icon.png

This is a binary file and will not be displayed.

cmd/lexidex/static/default-avatar.png

This is a binary file and will not be displayed.

cmd/lexidex/static/favicon-16x16.png

This is a binary file and will not be displayed.

cmd/lexidex/static/favicon-32x32.png

This is a binary file and will not be displayed.

cmd/lexidex/static/favicon.ico

This is a binary file and will not be displayed.

cmd/lexidex/static/favicon.png

This is a binary file and will not be displayed.

+9
cmd/lexidex/static/robots.txt
··· 1 + # Hello Friends! 2 + # If you are considering bulk or automated crawling, you may want to look in 3 + # to our protocol (API), including a firehose of updates. See: https://atproto.com/ 4 + 5 + # By default, may crawl anything on this domain. HTTP 429 ("backoff") status 6 + # codes are used for rate-limiting. Up to a handful concurrent requests should 7 + # be ok. 8 + User-Agent: * 9 + Allow: /
+44
cmd/lexidex/templates/base.html
··· 1 + <!doctype html> 2 + <html lang="en"> 3 + <head> 4 + <meta charset="utf-8"> 5 + <meta name="referrer" content="origin-when-cross-origin"> 6 + <meta name="viewport" content="width=device-width, initial-scale=1"> 7 + <meta name="color-scheme" content="light dark" /> 8 + <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.green.min.css" /> 9 + <style> 10 + :root { 11 + --pico-card-sectioning-background-color: #b7c9b7 !important; 12 + } 13 + html { position: relative; min-height: 100%; height: auto; } 14 + body { margin-bottom: 3em; } 15 + body > nav { background-color: var(--pico-muted-border-color); } 16 + body > footer { position: absolute; bottom: 0px; padding: 2em; background-color: var(--pico-muted-border-color); } 17 + thead th { font-weight: bold; } 18 + main article { margin: 2.5rem 0; } 19 + code { background: none; } 20 + td { padding: 0; } 21 + </style> 22 + <meta name="generator" name="lexidex"> 23 + <title>{% block head_title %}lexidex{% endblock %}</title> 24 + </head> 25 + <body> 26 + <nav class="container-fluid"> 27 + <ul> 28 + <li><a href="/"><strong>lexidex</strong></a></li> 29 + </ul> 30 + <form action="/query" method="get" style="width: 80%;"> 31 + <input type="text" name="q" placeholder="Lookup by NSID..." {% if uri %}value="{{ uri }}"{% endif %} style="margin: 0.5em;"> 32 + </form> 33 + <ul> 34 + <li><a href="https://atproto.com/specs/lexicon">Specs</a></li> 35 + <li><a href="https://github.com/bluesky-social/indigo">Code</a></li> 36 + </ul> 37 + </nav> 38 + 39 + <main class="container"> 40 + {% block main_content %}Base Template{% endblock %} 41 + </main> 42 + 43 + </body> 44 + </html>
+13
cmd/lexidex/templates/domain.html
··· 1 + {% extends "base.html" %} 2 + 3 + {% block main_content %} 4 + <h2 style="font-family: monospace;">{{ domain }} - Domain Index</h2> 5 + 6 + Known Lexicons: 7 + <ul> 8 + {% for lex in lexicons %} 9 + <li><a href="/lexicon/{{ lex.NSID }}">{{ lex.NSID }}</a> 10 + {% endfor %} 11 + </ul> 12 + 13 + {% endblock %}
+14
cmd/lexidex/templates/error.html
··· 1 + {% extends "base.html" %} 2 + 3 + {% block head_title %}Error {{ statusCode }} - lexidex{% endblock %} 4 + 5 + {% block main_content %} 6 + <br> 7 + <center> 8 + <h1 style="font-size: 8em;">{{ statusCode }}</h1> 9 + <h2 style="font-size: 3em;">Error!</h2> 10 + {% if errorMessage %} 11 + <p><code>{{ errorMessage }}</code></p> 12 + {% endif %} 13 + </center> 14 + {% endblock %}
+25
cmd/lexidex/templates/history.html
··· 1 + {% extends "base.html" %} 2 + 3 + {% block main_content %} 4 + <h2 style="font-family: monospace;">{{ lexicon.NSID }} - Crawl History</h2> 5 + 6 + <table> 7 + <thead> 8 + <tr><th>When</th> 9 + <th>Reason</th> 10 + <th>Repo DID</th> 11 + <th>Repo Rev</th> 12 + <th>Record CID</th> 13 + </thead> 14 + <tbody> 15 + {% for crawl in history %} 16 + <tr><td>{{ crawl.CreatedAt|date:"2006-01-02T15:04:05Z" }}</td> 17 + <td>{{ crawl.Reason }}</td> 18 + <td><code>{{ crawl.DID }}</code></td> 19 + <td><code>{{ crawl.RepoRev }}</code></td> 20 + <td style="max-width: 5em; text-overflow: ellipsis; overflow: hidden;"><code>{{ crawl.RecordCID }}</code></td> 21 + {% endfor %} 22 + </tbody> 23 + </table> 24 + 25 + {% endblock %}
+21
cmd/lexidex/templates/home.html
··· 1 + {% extends "base.html" %} 2 + 3 + {% block main_content %} 4 + <p>This is a web index of <a href="https://atproto.com">atproto</a> data and API schemas: <a href="https://atproto.com/specs/lexicon">Lexicons</a>. It is an ✨Experimental✨ project from the protocol team at <a href="https://bsky.social">Bluesky</a>. 5 + 6 + <h3>How It Works</h3> 7 + 8 + <p>Lexicon schemas are publicly published as records in atproto repos. This service listens on the firehose for new lexicons and auto-updates when they are discovered. For more details about publishing Lexicons, see the <a href="https://github.com/bluesky-social/atproto/discussions/3074">Lexicon Resolution RFC</a>. 9 + 10 + <p>You can browse <a href="/recent">Recent Crawl Attempts</a>. 11 + 12 + {% if domains %} 13 + <h3>Known Domains</h3> 14 + <ul> 15 + {% for d in domains %} 16 + <li><a href="/domain/{{ d.Domain }}">{{ d.Domain }}</a> 17 + {% endfor %} 18 + </ul> 19 + {% endif %} 20 + 21 + {% endblock %}
+178
cmd/lexidex/templates/lexicon.html
··· 1 + {% extends "base.html" %} 2 + 3 + {% block main_content %} 4 + 5 + {% macro fields_table(fields) %} 6 + {% if fields %} 7 + <table> 8 + <thead> 9 + <tr><th>Name</th> 10 + <th>Type</th> 11 + <th>Details</th> 12 + </thead> 13 + <tbody> 14 + {% for f in fields %} 15 + <tr><td><code>{{ f.Name }}</code></td> 16 + <td>{{ f.Type }} {% if f.Required %}<mark>(required)</mark>{% elif f.Nullable %}(nullable){% endif %}</td> 17 + <td> 18 + {% if f.Description %}<p>{{ f.Description }}</p>{% endif %} 19 + {% if f.Type == "string" and f.SchemaString %} 20 + {% if f.SchemaString.Format %}<p>Syntax Format: <code>{{ f.SchemaString.Format }}</code>{% endif %} 21 + {% if f.SchemaString.Default %}<p>Default: <code>{{ f.SchemaString.Default }}</code>{% endif %} 22 + {% if f.SchemaString.Const %}<p>Constant Value: <code>{{ f.SchemaString.Const }}</code>{% endif %} 23 + {% if f.SchemaString.KnownValues %}<p>Known Values: {% for v in f.SchemaString.KnownValues %}<code>{{ v }}</code>, {% endfor %}{% endif %} 24 + {% if f.SchemaString.Enum %}<p>Enum Values: {% for v in f.SchemaString.Enum %}<code>{{ v }}</code>, {% endfor %}{% endif %} 25 + {% if f.SchemaString.MinGraphemes or f.SchemaString.MaxGraphemes %} 26 + <p>Length in Graphemes: 27 + {% if f.SchemaString.MinGraphemes %}{{ f.SchemaString.MinGraphemes }} min{% endif %} 28 + {% if f.SchemaString.MaxGraphemes %}{{ f.SchemaString.MaxGraphemes }} max{% endif %} 29 + {% endif %} 30 + {% if f.SchemaString.MinLength or f.SchemaString.MaxLength %} 31 + <p>Length in bytes (when UTF-8 encoded): 32 + {% if f.SchemaString.MinLength %}{{ f.SchemaString.MinLength }} min{% endif %} 33 + {% if f.SchemaString.MaxLength %}{{ f.SchemaString.MaxLength }} max{% endif %} 34 + {% endif %} 35 + {% elif f.Type == "boolean" and f.SchemaBoolean %} 36 + {% if f.SchemaBoolean.Format %}<p>Format: <code>{{ f.SchemaBoolean.Format }}</code>{% endif %} 37 + {% if f.SchemaBoolean.Default %}<p>Default: <code>{{ f.SchemaBoolean.Default }}</code>{% endif %} 38 + {% elif f.Type == "integer" and f.SchemaInteger %} 39 + {% if f.SchemaInteger.Const %}<p>Constant Value: <code>{{ f.SchemaInteger.Const }}</code>{% endif %} 40 + {% if f.SchemaInteger.Default %}<p>Default: <code>{{ f.SchemaInteger.Default }}</code>{% endif %} 41 + {% if f.SchemaInteger.Minimum %}<p>Minimum: <code>{{ f.SchemaInteger.Minimum }}</code>{% endif %} 42 + {% if f.SchemaInteger.Maximum %}<p>Maxumum: <code>{{ f.SchemaInteger.Maxumum }}</code>{% endif %} 43 + {% if f.SchemaInteger.Enum %}<p>Enum Values: {% for v in f.SchemaInteger.Enum %}<code>{{ v }}</code>, {% endfor %}{% endif %} 44 + {% elif f.Type == "bytes" and f.SchemaBytes %} 45 + {% if f.SchemaBytes.MinLength or f.SchemaString.MaxLength %} 46 + <p>Length in bytes (when UTF-8 encoded): 47 + {% if f.SchemaBytes.MinLength %}{{ f.SchemaString.MinLength }} min{% endif %} 48 + {% if f.SchemaBytes.MaxLength %}{{ f.SchemaString.MaxLength }} max{% endif %} 49 + {% endif %} 50 + {% elif f.Type == "blob" and f.SchemaBlob %} 51 + {% if f.SchemaBlob.Accept %} 52 + <p>Accepted Content Types: 53 + {% for t in f.SchemaBlob.Accept %} 54 + <code>{{ t }}</code></li> 55 + {% endfor %} 56 + {% endif %} 57 + {% if f.SchemaBlob.MaxSize %} 58 + <p>Max Size (bytes): <code>{{ f.SchemaBlob.MaxSize }}</code> 59 + {% endif %} 60 + {% elif f.Type == "ref" %} 61 + <p>Reference: <code>{{ f.Ref }}</code> 62 + {% elif f.Type == "array" %} 63 + {% if f.Min or f.Max %} 64 + <p>Array Length: 65 + {% if f.Min %}{{ f.Min }} min{% endif %} 66 + {% if f.Max %}{{ f.Max }} max{% endif %} 67 + {% endif %} 68 + {% if f.Items.Type == "ref" %} 69 + <p>Elements are of type: <code>{{ f.Item.Ref }}</code> 70 + {% elif f.Items.Type == "object" %} 71 + <article> 72 + <strong>Element Object Schema</strong> 73 + {{ fields_table(f.Items.Fields) }} 74 + </article> 75 + {% elif f.Items.Type == "integer" %} 76 + <p>Elements are integers 77 + {# TODO #} 78 + {% elif f.Items.Type == "integer" %} 79 + <p>Elements are CID Links 80 + {% else %}<p>UNHANDLED IN ARRAYS: {{ f.Items.Type }}{% endif %} 81 + {% elif f.Type == "object" %} 82 + <article> 83 + {{ fields_table(f.Fields) }} 84 + </article> 85 + {% elif f.Type == "union" %} 86 + {{ union_list(f.Items) }} 87 + {% elif f.Type == "token" %} 88 + {% elif f.Type == "null" %} 89 + {% elif f.Type == "cid-link" %} 90 + {% elif f.Type == "unknown" %} 91 + <i>Field can contain arbitrary JSON/CBOR object data</i> 92 + {% else %}<p>UNHANDLED IN FIELDS: {{ f.Type }}{% endif %} 93 + </td> 94 + {% endfor %} 95 + </tbody> 96 + </table> 97 + {% else %} 98 + <p><i>No fields defined</i> 99 + {% endif %} 100 + {% endmacro %} 101 + 102 + {% macro record_schema(def) %} 103 + {% if def.KeyType %}<p>Record Key Type: <code>{{ def.KeyType }}</code>{% endif %} 104 + <h3>Data Fields</h3> 105 + {{ fields_table(def.Fields) }} 106 + {% endmacro %} 107 + 108 + {% macro api_schema(def) %} 109 + <h3>Query Parameters</h3> 110 + {{ fields_table(def.Fields) }} 111 + {% if def.Input or def.InputEncoding %} 112 + <h3>Request Body</h3> 113 + {% if def.InputEncoding%}<p>Content Type: <code>{{ def.InputEncoding }}</code>{% endif %} 114 + {% if def.Input %}{{ record_schema(def.Input) }}{% endif %} 115 + {% endif %} 116 + <h3>Response Body</h3> 117 + {% if def.OutputEncoding%}<p>Content Type: <code>{{ def.OutputEncoding }}</code>{% endif %} 118 + {% if def.Output %}{{ record_schema(def.Output) }}{% endif %} 119 + {{ errors_table(def.Errors) }} 120 + {% endmacro %} 121 + 122 + {% macro errors_table(errors) %} 123 + <h3>Errors</h3> 124 + {% if errors %} 125 + <table> 126 + <thead> 127 + <tr><th>Name</th> 128 + <th>Description</th> 129 + </thead> 130 + <tbody> 131 + {% for e in errors %} 132 + <tr><td><code>{{ e.Name }}</code></td> 133 + <td>{{ e.Description }}</td> 134 + {% endfor %} 135 + </tbody> 136 + </table> 137 + {% else %} 138 + <p><i>No error types defined</i> 139 + {% endif %} 140 + {% endmacro %} 141 + 142 + {% macro union_list(def) %} 143 + <p>{% if def.Closed %}Closed Union:{% else %}Open Union:{% endif %} 144 + <ul> 145 + {% for o in def.Options %} 146 + <li><code>{{ o }}</code> 147 + {% endfor %} 148 + </ul> 149 + {% endmacro %} 150 + 151 + <h2 style="font-family: monospace;">{{ lexicon.NSID }}</h2> 152 + <p>Domain Index: <a href="/domain/{{ lexicon.Domain }}">{{ lexicon.Domain }}</a> 153 + <p><a href="/lexicon/{{ lexicon.NSID }}/history">Crawl History</a> 154 + 155 + {% for def in defs %} 156 + <article> 157 + <header><code>#{{ def.Name }}</code> - {{ def.Type }}</header> 158 + {% if def.Description %}<p>{{ def.Description }}{% endif %} 159 + {% if def.Type == "record" %}{{ record_schema(def) }} 160 + {% elif def.Type == "query" or def.Type == "procedure" %}{{ api_schema(def) }} 161 + {% elif def.Type == "subscription" %} 162 + <h3>Query Parameters</h3> 163 + {{ fields_table(def.Fields) }} 164 + <h3>Message Types</h3> 165 + {{ union_list(def) }} 166 + {% elif def.Type == "object" %}{{ record_schema(def) }} 167 + {% elif def.Type == "union" %}{{ union_list(def) }} 168 + {% elif def.Type == "token" %} 169 + <p><i>"Tokens" in atproto are simply NSID reference strings.</i> 170 + {% elif def.Type == "unknown" %} 171 + <p><i>"Unknown" fields may contain arbitrary JSON/CBOR object as data.</i> 172 + {% elif def.Type == "ref" %} 173 + <p>This definition is a pointer to: <code>{{ def.Ref }}</code> 174 + {% else %}UNHANDLED IN DEFS: {{ def.Type }}{% endif %} 175 + </article> 176 + {% endfor %} 177 + 178 + {% endblock %}
+23
cmd/lexidex/templates/recent.html
··· 1 + {% extends "base.html" %} 2 + 3 + {% block main_content %} 4 + <h2>Recent Crawls</h2> 5 + 6 + <table> 7 + <thead> 8 + <tr><th>When</th> 9 + <th>Reason</th> 10 + <th>NSID</th> 11 + <th>Record CID</th> 12 + </thead> 13 + <tbody> 14 + {% for crawl in history %} 15 + <tr><td>{{ crawl.CreatedAt|date:"2006-01-02T15:04:05Z" }}</td> 16 + <td>{{ crawl.Reason }}</td> 17 + <td><code><a href="/lexicon/{{ crawl.NSID }}">{{ crawl.NSID }}</a></code></td> 18 + <td style="max-width: 5em; text-overflow: ellipsis; overflow: hidden;"><code>{{ crawl.RecordCID }}</code></td> 19 + {% endfor %} 20 + </tbody> 21 + </table> 22 + 23 + {% endblock %}
+20
cmd/lexidex/util.go
··· 1 + package main 2 + 3 + import ( 4 + "strings" 5 + 6 + "golang.org/x/net/publicsuffix" 7 + 8 + "github.com/bluesky-social/indigo/atproto/syntax" 9 + ) 10 + 11 + func extractDomain(nsid syntax.NSID) (string, error) { 12 + // TODO: might have additional atproto-specific tweaks here in the future (for stuff which isn't in PSL yet) 13 + return publicsuffix.EffectiveTLDPlusOne(nsid.Authority()) 14 + } 15 + 16 + func extractGroup(nsid syntax.NSID) (string, error) { 17 + parts := strings.Split(string(nsid), ".") 18 + group := strings.ToLower(strings.Join(parts[:len(parts)-1], ".")) 19 + return group, nil 20 + }
+106
cmd/lexidex/web_demo.go
··· 1 + package main 2 + 3 + import ( 4 + _ "embed" 5 + "encoding/json" 6 + "fmt" 7 + "net/http" 8 + 9 + "github.com/bluesky-social/indigo/atproto/lexicon" 10 + "github.com/bluesky-social/indigo/atproto/syntax" 11 + 12 + "github.com/flosch/pongo2/v6" 13 + "github.com/labstack/echo/v4" 14 + ) 15 + 16 + //go:embed example.lexicon.record.json 17 + var exampleRecordJSON []byte 18 + 19 + //go:embed example.lexicon.query.json 20 + var exampleQueryJSON []byte 21 + 22 + // e.GET("/demo/record", srv.WebDemoRecord) 23 + func (srv *WebServer) WebDemoRecord(c echo.Context) error { 24 + info := pongo2.Context{} 25 + 26 + nsid := syntax.NSID("example.lexicon.record") 27 + lex := Lexicon{ 28 + NSID: nsid, 29 + Domain: "lexicon.example", 30 + Group: "example.lexicon", 31 + Latest: "bafyreihbqdb64s3mwyfr2n3pt7dwxxaebbxntz7fchlaafzbtu5ps45i2u", 32 + } 33 + ver := Version{ 34 + RecordCID: "bafyreihbqdb64s3mwyfr2n3pt7dwxxaebbxntz7fchlaafzbtu5ps45i2u", 35 + NSID: nsid, 36 + Record: exampleRecordJSON, 37 + } 38 + crawl := Crawl{ 39 + ID: 123, 40 + // XXX: CreatedAt: "2025-02-04T00:03:07.091Z", 41 + NSID: nsid, 42 + DID: syntax.DID("did:web:lexicon.example"), 43 + RecordCID: "bafyreihbqdb64s3mwyfr2n3pt7dwxxaebbxntz7fchlaafzbtu5ps45i2u", 44 + RepoRev: "3lhcqvfmmqh22", 45 + Reason: "demo", 46 + //Extra 47 + } 48 + 49 + var sf lexicon.SchemaFile 50 + if err := json.Unmarshal(ver.Record, &sf); err != nil { 51 + return fmt.Errorf("Lexicon schema record was invalid: %w", err) 52 + } 53 + defs, err := ParseSchemaFile(&sf, nsid) 54 + if err != nil { 55 + return err 56 + } 57 + 58 + info["lexicon"] = lex 59 + info["version"] = ver 60 + info["crawl"] = crawl 61 + info["defs"] = defs 62 + return c.Render(http.StatusOK, "lexicon.html", info) 63 + } 64 + 65 + // e.GET("/demo/query", srv.WebDemoQuery) 66 + func (srv *WebServer) WebDemoQuery(c echo.Context) error { 67 + info := pongo2.Context{} 68 + 69 + nsid := syntax.NSID("example.lexicon.query") 70 + lex := Lexicon{ 71 + NSID: nsid, 72 + Domain: "lexicon.example", 73 + Group: "example.lexicon", 74 + Latest: "bafyreihbqdb64s3mwyfr2n3pt7dwxxaebbxntz7fchlaafzbtu5ps45i2u", 75 + } 76 + ver := Version{ 77 + RecordCID: "bafyreihbqdb64s3mwyfr2n3pt7dwxxaebbxntz7fchlaafzbtu5ps45i2u", 78 + NSID: nsid, 79 + Record: exampleQueryJSON, 80 + } 81 + crawl := Crawl{ 82 + ID: 123, 83 + // XXX: CreatedAt: "2025-02-04T00:03:07.091Z", 84 + NSID: nsid, 85 + DID: syntax.DID("did:web:lexicon.example"), 86 + RecordCID: "bafyreihbqdb64s3mwyfr2n3pt7dwxxaebbxntz7fchlaafzbtu5ps45i2u", 87 + RepoRev: "3lhcqvfmmqh22", 88 + Reason: "demo", 89 + //Extra 90 + } 91 + 92 + var sf lexicon.SchemaFile 93 + if err := json.Unmarshal(ver.Record, &sf); err != nil { 94 + return fmt.Errorf("Lexicon schema record was invalid: %w", err) 95 + } 96 + defs, err := ParseSchemaFile(&sf, nsid) 97 + if err != nil { 98 + return err 99 + } 100 + 101 + info["lexicon"] = lex 102 + info["version"] = ver 103 + info["crawl"] = crawl 104 + info["defs"] = defs 105 + return c.Render(http.StatusOK, "lexicon.html", info) 106 + }
+156
cmd/lexidex/web_handlers.go
··· 1 + package main 2 + 3 + import ( 4 + "encoding/json" 5 + "errors" 6 + "fmt" 7 + "net/http" 8 + 9 + "github.com/bluesky-social/indigo/atproto/lexicon" 10 + "github.com/bluesky-social/indigo/atproto/syntax" 11 + 12 + "github.com/flosch/pongo2/v6" 13 + "github.com/labstack/echo/v4" 14 + "gorm.io/gorm" 15 + ) 16 + 17 + func (srv *WebServer) WebHome(c echo.Context) error { 18 + ctx := c.Request().Context() 19 + info := pongo2.Context{} 20 + 21 + tx := srv.db.WithContext(ctx) 22 + var domains []Domain 23 + if err := tx.Where("hidden IS false AND disabled IS false").Find(&domains).Error; err != nil { 24 + return err 25 + } 26 + 27 + info["domains"] = domains 28 + return c.Render(http.StatusOK, "home.html", info) 29 + } 30 + 31 + // e.GET("/query", srv.WebQuery) 32 + func (srv *WebServer) WebQuery(c echo.Context) error { 33 + 34 + // parse the q query param, redirect based on that 35 + q := c.QueryParam("q") 36 + if q == "" { 37 + return c.Redirect(http.StatusFound, "/") 38 + } 39 + 40 + nsid, err := syntax.ParseNSID(q) 41 + if nil == err { 42 + return c.Redirect(http.StatusFound, fmt.Sprintf("/lexicon/%s", nsid)) 43 + } 44 + return echo.NewHTTPError(400, "failed to parse query") 45 + } 46 + 47 + // e.GET("/domain/:domain", srv.WebDomain) 48 + func (srv *WebServer) WebDomain(c echo.Context) error { 49 + ctx := c.Request().Context() 50 + info := pongo2.Context{} 51 + 52 + domain := c.Param("domain") 53 + _, err := syntax.ParseHandle(c.Param("domain")) 54 + if err != nil { 55 + return echo.NewHTTPError(400, "not a valid domain name") 56 + } 57 + 58 + tx := srv.db.WithContext(ctx) 59 + var lexicons []Lexicon 60 + if err := tx.Where("domain = ?", domain).Find(&lexicons).Error; err != nil { 61 + return err 62 + } 63 + 64 + info["domain"] = domain 65 + info["lexicons"] = lexicons 66 + return c.Render(http.StatusOK, "domain.html", info) 67 + } 68 + 69 + // e.GET("/lexicon/:nsid", srv.WebLexicon) 70 + func (srv *WebServer) WebLexicon(c echo.Context) error { 71 + ctx := c.Request().Context() 72 + info := pongo2.Context{} 73 + 74 + nsid, err := syntax.ParseNSID(c.Param("nsid")) 75 + if err != nil { 76 + return echo.NewHTTPError(400, "failed to parse lexicon NSID") 77 + } 78 + 79 + tx := srv.db.WithContext(ctx) 80 + var lex Lexicon 81 + if err := tx.First(&lex, "nsid = ?", nsid).Error; err != nil { 82 + if errors.Is(err, gorm.ErrRecordNotFound) { 83 + return echo.NewHTTPError(404, "lexicon not known") 84 + } else { 85 + return err 86 + } 87 + } 88 + var ver Version 89 + if err := tx.First(&ver, "record_cid = ?", lex.Latest).Error; err != nil { 90 + return err 91 + } 92 + var crawl Crawl 93 + if err := tx.Last(&crawl, "record_cid = ?", lex.Latest).Error; err != nil { 94 + return err 95 + } 96 + 97 + var sf lexicon.SchemaFile 98 + if err := json.Unmarshal(ver.Record, &sf); err != nil { 99 + return fmt.Errorf("Lexicon schema record was invalid: %w", err) 100 + } 101 + defs, err := ParseSchemaFile(&sf, nsid) 102 + if err != nil { 103 + return err 104 + } 105 + 106 + info["lexicon"] = lex 107 + info["version"] = ver 108 + info["crawl"] = crawl 109 + info["defs"] = defs 110 + info["uri"] = nsid 111 + return c.Render(http.StatusOK, "lexicon.html", info) 112 + } 113 + 114 + func (srv *WebServer) WebLexiconHistory(c echo.Context) error { 115 + ctx := c.Request().Context() 116 + info := pongo2.Context{} 117 + 118 + nsid, err := syntax.ParseNSID(c.Param("nsid")) 119 + if err != nil { 120 + return echo.NewHTTPError(400, "failed to parse lexicon NSID") 121 + } 122 + 123 + tx := srv.db.WithContext(ctx) 124 + var lex Lexicon 125 + if err := tx.First(&lex, "nsid = ?", nsid).Error; err != nil { 126 + if errors.Is(err, gorm.ErrRecordNotFound) { 127 + return echo.NewHTTPError(404, "lexicon not known") 128 + } else { 129 + return err 130 + } 131 + } 132 + 133 + var history []Crawl 134 + if err := tx.Where("nsid = ?", nsid).Find(&history).Error; err != nil { 135 + return err 136 + } 137 + 138 + info["lexicon"] = lex 139 + info["history"] = history 140 + info["uri"] = nsid 141 + return c.Render(http.StatusOK, "history.html", info) 142 + } 143 + 144 + func (srv *WebServer) WebRecent(c echo.Context) error { 145 + ctx := c.Request().Context() 146 + info := pongo2.Context{} 147 + 148 + tx := srv.db.WithContext(ctx) 149 + var history []Crawl 150 + if err := tx.Order("created_at desc").Limit(20).Find(&history).Error; err != nil { 151 + return err 152 + } 153 + 154 + info["history"] = history 155 + return c.Render(http.StatusOK, "recent.html", info) 156 + }
+201
cmd/lexidex/web_server.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "embed" 6 + "errors" 7 + "fmt" 8 + "io/fs" 9 + "log/slog" 10 + "net/http" 11 + "os" 12 + "os/signal" 13 + "syscall" 14 + "time" 15 + 16 + "github.com/bluesky-social/indigo/atproto/identity" 17 + 18 + "github.com/flosch/pongo2/v6" 19 + "github.com/labstack/echo/v4" 20 + "github.com/labstack/echo/v4/middleware" 21 + slogecho "github.com/samber/slog-echo" 22 + "github.com/urfave/cli/v2" 23 + "gorm.io/driver/sqlite" 24 + "gorm.io/gorm" 25 + ) 26 + 27 + //go:embed static/* 28 + var StaticFS embed.FS 29 + 30 + type WebServer struct { 31 + echo *echo.Echo 32 + httpd *http.Server 33 + db *gorm.DB 34 + dir identity.Directory 35 + jetstreamHost string 36 + } 37 + 38 + func NewWebServer(cctx *cli.Context) (*WebServer, error) { 39 + debug := cctx.Bool("debug") 40 + httpAddress := cctx.String("bind") 41 + jetstreamHost := cctx.String("jetstream-host") 42 + db, err := gorm.Open(sqlite.Open(cctx.String("sqlite-path"))) 43 + if err != nil { 44 + return nil, fmt.Errorf("failed to open db: %w", err) 45 + } 46 + 47 + e := echo.New() 48 + 49 + // httpd 50 + var ( 51 + httpTimeout = 1 * time.Minute 52 + httpMaxHeaderBytes = 1 * (1024 * 1024) 53 + ) 54 + 55 + srv := &WebServer{ 56 + echo: e, 57 + db: db, 58 + dir: identity.DefaultDirectory(), 59 + jetstreamHost: jetstreamHost, 60 + } 61 + srv.httpd = &http.Server{ 62 + Handler: srv, 63 + Addr: httpAddress, 64 + WriteTimeout: httpTimeout, 65 + ReadTimeout: httpTimeout, 66 + MaxHeaderBytes: httpMaxHeaderBytes, 67 + } 68 + 69 + e.HideBanner = true 70 + e.Use(slogecho.New(slog.Default())) 71 + e.Use(middleware.Recover()) 72 + e.Use(middleware.BodyLimit("64M")) 73 + e.HTTPErrorHandler = srv.errorHandler 74 + e.Renderer = NewRenderer("templates/", &TemplateFS, debug) 75 + e.Use(middleware.SecureWithConfig(middleware.SecureConfig{ 76 + ContentTypeNosniff: "nosniff", 77 + XFrameOptions: "SAMEORIGIN", 78 + HSTSMaxAge: 31536000, // 365 days 79 + // TODO: 80 + // ContentSecurityPolicy 81 + // XSSProtection 82 + })) 83 + 84 + // redirect trailing slash to non-trailing slash. 85 + // all of our current endpoints have no trailing slash. 86 + e.Use(middleware.RemoveTrailingSlashWithConfig(middleware.TrailingSlashConfig{ 87 + RedirectCode: http.StatusFound, 88 + })) 89 + 90 + staticHandler := http.FileServer(func() http.FileSystem { 91 + if debug { 92 + return http.FS(os.DirFS("static")) 93 + } 94 + fsys, err := fs.Sub(StaticFS, "static") 95 + if err != nil { 96 + slog.Error("static template error", "err", err) 97 + os.Exit(-1) 98 + } 99 + return http.FS(fsys) 100 + }()) 101 + 102 + e.GET("/static/*", echo.WrapHandler(http.StripPrefix("/static/", staticHandler))) 103 + e.GET("/_health", srv.HandleHealthCheck) 104 + 105 + // basic static routes 106 + e.GET("/robots.txt", echo.WrapHandler(staticHandler)) 107 + e.GET("/favicon.ico", echo.WrapHandler(staticHandler)) 108 + 109 + // actual content 110 + e.GET("/", srv.WebHome) 111 + e.GET("/query", srv.WebQuery) 112 + e.GET("/recent", srv.WebRecent) 113 + e.GET("/domain/:domain", srv.WebDomain) 114 + e.GET("/lexicon/:nsid", srv.WebLexicon) 115 + e.GET("/lexicon/:nsid/history", srv.WebLexiconHistory) 116 + // TODO: e.GET("/lexicon/:nsid/def/:name", srv.WebLexiconDef) 117 + 118 + e.GET("/demo/record", srv.WebDemoRecord) 119 + e.GET("/demo/query", srv.WebDemoQuery) 120 + 121 + return srv, nil 122 + } 123 + 124 + // Starts the server in a goroutine, and returns 125 + func (srv *WebServer) RunWeb() { 126 + // Start the server 127 + slog.Info("starting server", "bind", srv.httpd.Addr) 128 + go func() { 129 + if err := srv.httpd.ListenAndServe(); err != nil { 130 + if !errors.Is(err, http.ErrServerClosed) { 131 + slog.Error("HTTP server shutting down unexpectedly", "err", err) 132 + } 133 + } 134 + }() 135 + } 136 + 137 + // Runs in this thread 138 + func (srv *WebServer) RunSignalHandler() error { 139 + // Wait for a signal to exit. 140 + slog.Info("registering OS exit signal handler") 141 + quit := make(chan struct{}) 142 + exitSignals := make(chan os.Signal, 1) 143 + signal.Notify(exitSignals, syscall.SIGINT, syscall.SIGTERM) 144 + go func() { 145 + sig := <-exitSignals 146 + slog.Info("received OS exit signal", "signal", sig) 147 + 148 + // Shut down the HTTP server 149 + if err := srv.Shutdown(); err != nil { 150 + slog.Error("HTTP server shutdown error", "err", err) 151 + } 152 + 153 + // Trigger the return that causes an exit. 154 + close(quit) 155 + }() 156 + <-quit 157 + slog.Info("graceful shutdown complete") 158 + return nil 159 + } 160 + 161 + type GenericStatus struct { 162 + Daemon string `json:"daemon"` 163 + Status string `json:"status"` 164 + Message string `json:"msg,omitempty"` 165 + } 166 + 167 + func (srv *WebServer) errorHandler(err error, c echo.Context) { 168 + code := http.StatusInternalServerError 169 + var errorMessage string 170 + if he, ok := err.(*echo.HTTPError); ok { 171 + code = he.Code 172 + errorMessage = fmt.Sprintf("%s", he.Message) 173 + } 174 + if code >= 500 { 175 + slog.Warn("lexidex-http-internal-error", "err", err) 176 + } 177 + data := pongo2.Context{ 178 + "statusCode": code, 179 + "errorMessage": errorMessage, 180 + } 181 + if !c.Response().Committed { 182 + c.Render(code, "error.html", data) 183 + } 184 + } 185 + 186 + func (srv *WebServer) ServeHTTP(rw http.ResponseWriter, req *http.Request) { 187 + srv.echo.ServeHTTP(rw, req) 188 + } 189 + 190 + func (srv *WebServer) Shutdown() error { 191 + slog.Info("shutting down") 192 + 193 + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 194 + defer cancel() 195 + 196 + return srv.httpd.Shutdown(ctx) 197 + } 198 + 199 + func (s *WebServer) HandleHealthCheck(c echo.Context) error { 200 + return c.JSON(200, GenericStatus{Status: "ok", Daemon: "lexidex"}) 201 + }