···1313# MSVC Windows builds of rustc generate these, which store debugging information
1414*.pdb
15151616-# RustRover
1717-# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
1818-# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
1919-# and can be added to the global gitignore or merged into this file. For a more nuclear
2020-# option (not recommended) you can uncomment the following to ignore the entire idea folder.
2121-#.idea/1616+# Project specific files
1717+config.yml
1818+development.db
1919+jetstream_zstd_dictionary
2020+
+39
Cargo.toml
···11+[package]
22+name = "supercell"
33+version = "0.1.0"
44+edition = "2021"
55+66+[profile.release]
77+lto = true
88+strip = true
99+1010+[dependencies]
1111+anyhow = "1.0.88"
1212+async-trait = "0.1.82"
1313+axum-extra = { version = "0.9.4", features = ["query"] }
1414+axum = { version = "0.7.5", features = ["http2", "macros"] }
1515+base64 = "0.22.1"
1616+chrono-tz = "0.10.0"
1717+chrono = { version = "0.4.38", default-features = false, features = ["std", "alloc", "now"] }
1818+ecdsa = { version = "0.16.9", features = ["std", "signing", "verifying"] }
1919+futures-util = { version = "0.3.31", features = ["sink"] }
2020+headers = "0.4.0"
2121+http = "1.1.0"
2222+k256 = { version = "0.13.4", features = ["ecdsa"] }
2323+multibase = "0.9.1"
2424+p256 = { version = "0.13.2", features = ["ecdsa"] }
2525+serde_json_path = "0.7.1"
2626+serde_json = { version = "1.0.132", features = ["alloc"] }
2727+serde = { version = "1.0.214", features = ["alloc", "derive"] }
2828+serde_yaml = "0.9.34"
2929+sqlx-cli = { version = "0.8.2", features = ["sqlite"] }
3030+sqlx = { version = "0.8.2", features = ["chrono", "sqlite"] }
3131+thiserror = "1.0.63"
3232+tokio-util = { version = "0.7.12", features = ["net", "rt", "tracing"] }
3333+tokio = { version = "1.41.0", features = ["bytes", "macros", "net", "rt", "rt-multi-thread", "signal", "sync"] }
3434+tokio-websockets = { version = "0.10.1", features = ["client", "native-tls", "rand", "ring"] }
3535+tower-http = { version = "0.5.2", features = ["cors", "fs", "timeout", "trace", "tracing"] }
3636+tower = { version = "0.5.1", features = ["limit", "timeout", "tokio", "tracing"] }
3737+tracing-subscriber = { version = "0.3.18", features = ["env-filter", "chrono", "json"] }
3838+tracing = { version = "0.1.40", features = ["async-await", "log", "valuable"] }
3939+zstd = "0.13.2"
···11-# supercell11+# supercell
22+33+> A supercell is a thunderstorm characterized by the presence of a mesocyclone, a deep, persistently rotating updraft.
44+55+Supercell is a lightweight and configurable atproto feed generator.
66+77+# Configuration
88+99+The following environment variables are used:
1010+1111+* `HTTP_PORT` - The port to listen on for HTTP requests.
1212+* `EXTERNAL_BASE` - The hostname of the feed generator.
1313+* `DATABASE_URL` - The URL of the database to use.
1414+* `JETSTREAM_HOSTNAME` - The hostname of the JetStream server to consume events from.
1515+* `ZSTD_DICTIONARY` - The path to the ZSTD dictionary to use.
1616+* `CONSUMER_TASK_ENABLE` - Whether or not to enable the consumer tasks.
1717+* `FEEDS` - The path to the feeds configuration file.
1818+* `RUST_LOG` - Logging configuration. Defaults to `supercell=debug,info`
1919+2020+The feed configuration file is a YAML file that contains the feeds to serve and how to match events to the feed. It supports a variable number of matchers with different rules. Matching is done in order and uses json path plus the matcher implementation.
2121+2222+```yaml
2323+feeds:
2424+- uri: "at://did:plc:4acsffvbo4niovge362ptijz/app.bsky.feed.generator/3la5azib4xe2c"
2525+ name: "Smoke Signal Support"
2626+ description: "The Smoke Signal Support feed."
2727+ allow: ["did:plc:cbkjy5n7bk3ax2wplmtjofq2"]
2828+ deny: "at://did:plc:4acsffvbo4niovge362ptijz/app.bsky.feed.post/3la5bsyzj3j23"
2929+ matchers:
3030+ - path: "$.did"
3131+ value: "did:plc:tgudj2fjm77pzkuawquqhsxm"
3232+ type: equal
3333+ - path: "$.commit.record.text"
3434+ values: ["smoke", "signal"]
3535+ type: sequence
3636+ - path: "$.commit.record.facets[*].features[?(@['$type'] == 'app.bsky.richtext.facet#tag')].tag"
3737+ values: ["smoke", "signal"]
3838+ type: sequence
3939+ - path: "$.commit.record.reply.parent.uri"
4040+ value: "at://did:plc:tgudj2fjm77pzkuawquqhsxm/app.bsky.feed.post/"
4141+ type: prefix
4242+ - path: "$.commit.record.reply.root.uri"
4343+ value: "at://did:plc:tgudj2fjm77pzkuawquqhsxm/app.bsky.feed.post/"
4444+ type: prefix
4545+ - path: "$.commit.record.facets[*].features[?(@['$type'] == 'app.bsky.richtext.facet#link')].uri"
4646+ value: "https://smokesignal.events/"
4747+ type: prefix
4848+ - path: "$.commit.record.facets[*].features[?(@['$type'] == 'app.bsky.richtext.facet#mention')].did"
4949+ value: "did:plc:tgudj2fjm77pzkuawquqhsxm"
5050+ type: equal
5151+ - path: "$.commit.record.embed.external.uri"
5252+ value: "https://smokesignal.events/"
5353+ type: prefix
5454+ - path: "$.commit.record.embed.record.uri"
5555+ value: "at://did:plc:tgudj2fjm77pzkuawquqhsxm/"
5656+ type: prefix
5757+```
5858+5959+The `equal` matcher performs an exact string match matched paths.
6060+6161+The `prefix` matcher performs a prefix string match on matched paths. Given the value "foo bar baz", the following prefixes would match: "foo", "foo ", etc.
6262+6363+The `sequence` matcher performs a sequence string match on matched paths. This is used to match a list of values in order making flexible ordered matching without needing regex or complex reverse lookups.
6464+6565+Consider the example string "The quick brown fox jumps over the lazy dog". The following sequences would match:
6666+6767+* "the" "quick"
6868+* "brown"
6969+* "brow" "fox" "lazy" "dog"
7070+* "the" "dog"
7171+7272+JSONPath is a query language for JSON. When used with matchers, JSONPath will use all nodes as inputs and each matcher will match against any of the values.
7373+7474+For example, the following json would match the `equal` matcher with both `$.text` and `$.tags.*`:
7575+7676+```json
7777+{
7878+ "text": "foo",
7979+ "tags": ["foo", "bar"],
8080+}
8181+```
8282+8383+The site [https://jsonpath.com/](https://jsonpath.com/) is a great resource for testing JSONPath queries.
8484+8585+See the `config.example.yml` file for additional examples.
8686+8787+# TODO
8888+8989+* use i64, it's fine
9090+* look up keys on startup
9191+* possible scoring function for queries
9292+* add likes
9393+* support deletes
9494+* document how to register a feed
9595+9696+# License
9797+9898+This project is open source under the MIT license.
9999+100100+Copyright (c) 2023 Astrenox Cooperative. All Rights Reserved.
101101+
···11+-- Add down migration script here
22+33+DROP TABLE feed_content;
44+DROP TABLE consumer_control;
55+
+27
migrations/20241103180245_init.up.sql
···11+-- Add up migration script here
22+33+CREATE TABLE feed_content (
44+ feed_id TEXT NOT NULL,
55+ uri TEXT NOT NULL,
66+ indexed_at INTEGER NOT NULL,
77+ indexed_at_more INTEGER NOT NULL,
88+ cid TEXT NOT NULL,
99+ updated_at DATETIME NOT NULL DEFAULT (datetime('now')),
1010+ PRIMARY KEY (feed_id, uri)
1111+);
1212+1313+CREATE INDEX feed_content_idx_feed ON feed_content(feed_id, indexed_at DESC, indexed_at_more DESC, cid DESC);
1414+1515+CREATE TABLE consumer_control (
1616+ source TEXT NOT NULL,
1717+ time_us VARCHAR NOT NULL,
1818+ updated_at DATETIME NOT NULL DEFAULT (datetime('now')),
1919+ PRIMARY KEY (source)
2020+);
2121+2222+CREATE TABLE verification_method_cache (
2323+ did TEXT NOT NULL,
2424+ multikey TEXT NOT NULL,
2525+ updated_at DATETIME NOT NULL DEFAULT (datetime('now')),
2626+ PRIMARY KEY (did)
2727+);