···11+# These are supported funding model platforms
22+33+github: uniphil
44+patreon: # Replace with a single Patreon username
55+open_collective: # Replace with a single Open Collective username
66+ko_fi: # Replace with a single Ko-fi username
77+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
88+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
99+liberapay: # Replace with a single Liberapay username
1010+issuehunt: # Replace with a single IssueHunt username
1111+lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
1212+polar: # Replace with a single Polar username
1313+buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
1414+thanks_dev: # Replace with a single thanks.dev username
1515+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
···3636/// See [websocket_task](crate::websocket_task).
3737#[derive(Error, Debug)]
3838pub enum JetstreamEventError {
3939- #[error("received websocket message that could not be deserialized as JSON: {0}")]
4040- ReceivedMalformedJSON(#[from] serde_json::Error),
4139 #[error("failed to load built-in zstd dictionary for decoding: {0}")]
4240 CompressionDictionaryError(io::Error),
4343- #[error("failed to decode zstd-compressed message: {0}")]
4444- CompressionDecoderError(io::Error),
4545- #[error("all receivers were dropped but the websocket connection failed to close cleanly")]
4646- WebSocketCloseFailure,
4741 #[error("failed to send ping or pong: {0}")]
4842 PingPongError(#[from] tokio_tungstenite::tungstenite::Error),
4343+ #[error("no messages received within ttl")]
4444+ NoMessagesReceived,
4945 #[error("jetstream event receiver closed")]
5046 ReceiverClosedError,
5147}
+11
jetstream/src/events.rs
···142142 let t: SystemTime = self.into();
143143 t.elapsed()
144144 }
145145+ /// Compute the age of the cursor vs the local clock
146146+ ///
147147+ /// Converts the resulting duration into an f64, which can be negative!
148148+ ///
149149+ /// Warning: this exploits the internal implementation detail of jetstream cursors
150150+ pub fn elapsed_micros_f64(&self) -> f64 {
151151+ match self.elapsed() {
152152+ Ok(d) => d.as_micros() as f64,
153153+ Err(e) => -(e.duration().as_micros() as f64),
154154+ }
155155+ }
145156 /// Get the immediate next cursor value
146157 ///
147158 /// This is possible for the implementation of jetstream cursors
+128-8
jetstream/src/lib.rs
···1414 stream::StreamExt,
1515 SinkExt,
1616};
1717+#[cfg(feature = "metrics")]
1818+use metrics::{
1919+ counter,
2020+ describe_counter,
2121+ Unit,
2222+};
1723use tokio::{
1824 net::TcpStream,
1925 sync::mpsc::{
···2127 Receiver,
2228 Sender,
2329 },
3030+ time::timeout,
2431};
2532use tokio_tungstenite::{
2633 connect_async,
···194201 /// can help prevent that if your consumer sometimes pauses, at a cost of higher memory
195202 /// usage while events are buffered.
196203 pub channel_size: usize,
204204+ /// How long since the last jetstream message before we consider the connection dead
205205+ ///
206206+ /// Default: 15s
207207+ pub liveliness_ttl: Duration,
197208}
198209199210impl Default for JetstreamConfig {
···207218 omit_user_agent_jetstream_info: false,
208219 replay_on_reconnect: false,
209220 channel_size: 4096, // a few seconds of firehose buffer
221221+ liveliness_ttl: Duration::from_secs(15),
210222 }
211223 }
212224}
···299311 }
300312}
301313314314+#[cfg(feature = "metrics")]
315315+fn describe_metrics() {
316316+ describe_counter!(
317317+ "jetstream_connects",
318318+ Unit::Count,
319319+ "how many times we've tried to connect"
320320+ );
321321+ describe_counter!(
322322+ "jetstream_disconnects",
323323+ Unit::Count,
324324+ "how many times we've been disconnected"
325325+ );
326326+ describe_counter!(
327327+ "jetstream_total_events_received",
328328+ Unit::Count,
329329+ "total number of events received"
330330+ );
331331+ describe_counter!(
332332+ "jetstream_total_bytes_received",
333333+ Unit::Count,
334334+ "total uncompressed bytes received, not including websocket overhead"
335335+ );
336336+ describe_counter!(
337337+ "jetstream_total_event_errors",
338338+ Unit::Count,
339339+ "total errors when handling events"
340340+ );
341341+ describe_counter!(
342342+ "jetstream_total_events_sent",
343343+ Unit::Count,
344344+ "total events sent to the consumer"
345345+ );
346346+}
347347+302348impl JetstreamConnector {
303349 /// Create a Jetstream connector with a valid [JetstreamConfig].
304350 ///
305351 /// After creation, you can call [connect] to connect to the provided Jetstream instance.
306352 pub fn new(config: JetstreamConfig) -> Result<Self, ConfigValidationError> {
353353+ #[cfg(feature = "metrics")]
354354+ describe_metrics();
355355+307356 // We validate the configuration here so any issues are caught early.
308357 config.validate()?;
309358 Ok(JetstreamConnector { config })
···337386338387 let (send_channel, receive_channel) = channel(self.config.channel_size);
339388 let replay_on_reconnect = self.config.replay_on_reconnect;
389389+ let liveliness_ttl = self.config.liveliness_ttl;
340390 let build_request = self.config.get_request_builder();
341391342392 tokio::task::spawn(async move {
···359409 }
360410 };
361411412412+ #[cfg(feature = "metrics")]
413413+ if let Some(host) = req.uri().host() {
414414+ let retry = if retry_attempt > 0 { "yes" } else { "no" };
415415+ counter!("jetstream_connects", "host" => host.to_string(), "retry" => retry)
416416+ .increment(1);
417417+ }
418418+362419 let mut last_cursor = connect_cursor;
363420 retry_attempt += 1;
364421 if let Ok((ws_stream, _)) = connect_async(req).await {
365422 let t_connected = Instant::now();
366423 log::info!("jetstream connected. starting websocket task...");
367367- if let Err(e) =
368368- websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
369369- .await
424424+ if let Err(e) = websocket_task(
425425+ dict,
426426+ ws_stream,
427427+ send_channel.clone(),
428428+ &mut last_cursor,
429429+ liveliness_ttl,
430430+ )
431431+ .await
370432 {
371371- if let JetstreamEventError::ReceiverClosedError = e {
372372- log::error!("Jetstream receiver channel closed. Exiting consumer.");
373373- return;
433433+ match e {
434434+ JetstreamEventError::ReceiverClosedError => {
435435+ #[cfg(feature="metrics")]
436436+ counter!("jetstream_disconnects", "reason" => "channel", "fatal" => "yes").increment(1);
437437+ log::error!("Jetstream receiver channel closed. Exiting consumer.");
438438+ return;
439439+ }
440440+ JetstreamEventError::CompressionDictionaryError(_) => {
441441+ #[cfg(feature="metrics")]
442442+ counter!("jetstream_disconnects", "reason" => "zstd", "fatal" => "no").increment(1);
443443+ }
444444+ JetstreamEventError::NoMessagesReceived => {
445445+ #[cfg(feature="metrics")]
446446+ counter!("jetstream_disconnects", "reason" => "ttl", "fatal" => "no").increment(1);
447447+ }
448448+ JetstreamEventError::PingPongError(_) => {
449449+ #[cfg(feature="metrics")]
450450+ counter!("jetstream_disconnects", "reason" => "pingpong", "fatal" => "no").increment(1);
451451+ }
374452 }
375375- log::error!("Jetstream closed after encountering error: {e:?}");
453453+ log::warn!("Jetstream closed after encountering error: {e:?}");
376454 } else {
455455+ #[cfg(feature = "metrics")]
456456+ counter!("jetstream_disconnects", "reason" => "close", "fatal" => "no")
457457+ .increment(1);
377458 log::warn!("Jetstream connection closed cleanly");
378459 }
379460 if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
···416497 ws: WebSocketStream<MaybeTlsStream<TcpStream>>,
417498 send_channel: JetstreamSender,
418499 last_cursor: &mut Option<Cursor>,
500500+ liveliness_ttl: Duration,
419501) -> Result<(), JetstreamEventError> {
420502 // TODO: Use the write half to allow the user to change configuration settings on the fly.
421503 let (mut socket_write, mut socket_read) = ws.split();
422504423505 let mut closing_connection = false;
424506 loop {
425425- match socket_read.next().await {
507507+ let next = match timeout(liveliness_ttl, socket_read.next()).await {
508508+ Ok(n) => n,
509509+ Err(_) => {
510510+ log::warn!("jetstream no events for {liveliness_ttl:?}, closing");
511511+ _ = socket_write.close().await;
512512+ return Err(JetstreamEventError::NoMessagesReceived);
513513+ }
514514+ };
515515+ match next {
426516 Some(Ok(message)) => match message {
427517 Message::Text(json) => {
518518+ #[cfg(feature = "metrics")]
519519+ {
520520+ counter!("jetstream_total_events_received", "compressed" => "false")
521521+ .increment(1);
522522+ counter!("jetstream_total_bytes_received", "compressed" => "false")
523523+ .increment(json.len() as u64);
524524+ }
428525 let event: JetstreamEvent = match serde_json::from_str(&json) {
429526 Ok(ev) => ev,
430527 Err(e) => {
528528+ #[cfg(feature = "metrics")]
529529+ counter!("jetstream_total_event_errors", "reason" => "deserialize")
530530+ .increment(1);
431531 log::warn!(
432532 "failed to parse json: {e:?} (from {})",
433533 json.get(..24).unwrap_or(&json)
···439539440540 if let Some(last) = last_cursor {
441541 if event_cursor <= *last {
542542+ #[cfg(feature = "metrics")]
543543+ counter!("jetstream_total_event_errors", "reason" => "old")
544544+ .increment(1);
442545 log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
443546 continue;
444547 }
···453556 } else if let Some(last) = last_cursor.as_mut() {
454557 *last = event_cursor;
455558 }
559559+ #[cfg(feature = "metrics")]
560560+ counter!("jetstream_total_events_sent").increment(1);
456561 }
457562 Message::Binary(zstd_json) => {
563563+ #[cfg(feature = "metrics")]
564564+ {
565565+ counter!("jetstream_total_events_received", "compressed" => "true")
566566+ .increment(1);
567567+ counter!("jetstream_total_bytes_received", "compressed" => "true")
568568+ .increment(zstd_json.len() as u64);
569569+ }
458570 let mut cursor = IoCursor::new(zstd_json);
459571 let decoder =
460572 zstd::stream::Decoder::with_prepared_dictionary(&mut cursor, &dictionary)
···463575 let event: JetstreamEvent = match serde_json::from_reader(decoder) {
464576 Ok(ev) => ev,
465577 Err(e) => {
578578+ #[cfg(feature = "metrics")]
579579+ counter!("jetstream_total_event_errors", "reason" => "deserialize")
580580+ .increment(1);
466581 log::warn!("failed to parse json: {e:?}");
467582 continue;
468583 }
···471586472587 if let Some(last) = last_cursor {
473588 if event_cursor <= *last {
589589+ #[cfg(feature = "metrics")]
590590+ counter!("jetstream_total_event_errors", "reason" => "old")
591591+ .increment(1);
474592 log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
475593 continue;
476594 }
···485603 } else if let Some(last) = last_cursor.as_mut() {
486604 *last = event_cursor;
487605 }
606606+ #[cfg(feature = "metrics")]
607607+ counter!("jetstream_total_events_sent").increment(1);
488608 }
489609 Message::Ping(vec) => {
490610 log::trace!("Ping recieved, responding");
+35
legacy/old-readme-details.md
···11+[Constellation](./constellation/)
22+--------------------------------------------
33+44+A global atproto backlink index ✨
55+66+- Self hostable: handles the full write throughput of the global atproto firehose on a raspberry pi 4b + single SSD
77+- Storage efficient: less than 2GB/day disk consumption indexing all references in all lexicons and all non-atproto URLs
88+- Handles record deletion, account de/re-activation, and account deletion, ensuring accurate link counts and respecting users data choices
99+- Simple JSON API
1010+1111+All social interactions in atproto tend to be represented by links (or references) between PDS records. This index can answer questions like "how many likes does a bsky post have", "who follows an account", "what are all the comments on a [frontpage](https://frontpage.fyi/) post", and more.
1212+1313+- **status**: works! api is unstable and likely to change, and no known instances have a full network backfill yet.
1414+- source: [./constellation/](./constellation/)
1515+- public instance: [constellation.microcosm.blue](https://constellation.microcosm.blue/)
1616+1717+_note: the public instance currently runs on a little raspberry pi in my house, feel free to use it! it comes with only with best-effort uptime, no commitment to not breaking the api for now, and possible rate-limiting. if you want to be nice you can put your project name and bsky username (or email) in your user-agent header for api requests._
1818+1919+2020+App: Spacedust
2121+--------------
2222+2323+A notification subscription service 💫
2424+2525+using the same "link source" concept as [constellation](./constellation/), offer webhook notifications for new references created to records
2626+2727+- **status**: in design
2828+2929+3030+Library: [links](./links/)
3131+------------------------------------
3232+3333+A rust crate (not published on crates.io yet) for optimistically parsing links out of arbitrary atproto PDS records, and potentially canonicalizing them
3434+3535+- **status**: unstable, might remain an internal lib for constellation (and spacedust, soon)
+123
legacy/original-notes.md
···11+---
22+33+44+old notes follow, ignore
55+------------------------
66+77+88+as far as i can tell, atproto lexicons today don't follow much of a convention for referencing across documents: sometimes it's a StrongRef, sometimes it's a DID, sometimes it's a bare at-uri. lexicon authors choose any old link-sounding key name for the key in their document.
99+1010+it's pretty messy so embrace the mess: atproto wants to be part of the web, so this library will also extract URLs and other URIs if you want it to. all the links.
1111+1212+1313+why
1414+---
1515+1616+the atproto firehose that bluesky sprays at you will contain raw _contents_ from peoples' pdses. these are isolated, decontextualized updates. it's very easy to build some kinds of interesting downstream apps off of this feed.
1717+1818+- bluesky posts (firesky, deletions, )
1919+- blueksy post stats (emojis, )
2020+- trending keywords ()
2121+2222+but bringing almost kind of _context_ into your project requires a big step up in complexity and potentially cost: you're entering "appview" territory. _how many likes does a post have? who follows this account?_
2323+2424+you own your atproto data: it's kept in your personal data repository (PDS) and noone else can write to it. when someone likes your post, they create a "like" record in their _own_ pds, and that like belongs to _them_, not to you/your post.
2525+2626+in the firehose you'll see a `app.bsky.feed.post` record created, with no details about who has liked it. then you'll see separate `app.bsky.feed.like` records show up for each like that comes in on that post, with no context about the post except a random-looking reference to it. storing these in order to do so is up to you!
2727+2828+**so, why**
2929+3030+everything is links, and they're a mess, but they all kinda work the same, so maybe some tooling can bring down that big step in complexity from firehose raw-content apps -> apps requiring any social context.
3131+3232+everything is links:
3333+3434+- likes
3535+- follows
3636+- blocks
3737+- reposts
3838+- quotes
3939+4040+some low-level things you could make from links:
4141+4242+- notification streams (part of ucosm)
4343+- a global reverse index (part of ucosm)
4444+4545+i think that making these low-level services as easy to use as jetstream could open up pathways for building more atproto apps that operate at full scale with interesting features for reasonable effort at low cost to operate.
4646+4747+4848+extracting links
4949+---------------
5050+5151+5252+- low-level: pass a &str of a field value and get a parsed link back
5353+5454+- med-level: pass a &str of record in json form and get a list of parsed links + json paths back. (todo: should also handle dag-cbor prob?)
5555+5656+- high-ish level: pass the json record and maybe apply some pre-loaded rules based on known lexicons to get the best result.
5757+5858+for now, a link is only considered if it matches for the entire value of the record's field -- links embedded in text content are not included. note that urls in bluesky posts _will_ still be extracted, since they are broken out into facets.
5959+6060+6161+resolving / canonicalizing links
6262+--------------------------------
6363+6464+6565+### at-uris
6666+6767+every at-uri has at least two equivalent forms, one with a `DID`, and one with an account handle. the at-uri spec [illustrates this by example](https://atproto.com/specs/at-uri-scheme):
6868+6969+- `at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26`
7070+- `at://bnewbold.bsky.team/app.bsky.feed.post/3jwdwj2ctlk26`
7171+7272+some applications, like a reverse link index, may wish to canonicalize at-uris to a single form. the `DID`-form is stable as an account changes its handle and probably the right choice to canonicalize to, but maybe some apps would actually perfer to canonicalise to handles?
7373+7474+hopefully atrium will make it easy to resolve at-uris.
7575+7676+7777+### urls
7878+7979+canonicalizing URLs is more annoying but also a bit more established. lots of details.
8080+8181+- do we have to deal with punycode?
8282+- follow redirects (todo: only permanent ones, or all?)
8383+- check for rel=canonical http header and possibly follow it
8484+- check link rel=canonical meta tag and possibly follow it
8585+- do we need to check site maps??
8686+- do we have to care at all about AMP?
8787+- do we want anything to do with url shorteners??
8888+- how do multilingual sites affect this?
8989+- do we have to care about `script type="application/ld+json"` ???
9090+9191+ugh. is there a crate for this.
9292+9393+9494+### relative uris?
9595+9696+links might be relative, in which case they might need to be made absolute before being useful. is that a concern for this library, or up to the user? (seems like we might not have context here to determine its absolute)
9797+9898+9999+### canonicalizing
100100+101101+there should be a few async functions available to canonicalize already-parsed links.
102102+103103+- what happens if a link can't be resolved?
104104+105105+106106+---
107107+108108+- using `tinyjson` because it's nice -- maybe should switch to serde_json to share deps with atrium?
109109+110110+- would use atrium for parsing at-uris, but it's not in there. there's a did-only version in the non-lib commands.rs. its identifier parser is strict to did + handle, which makes sense, but for our purposes we might want to allow unknown methods too?
111111+112112+ - rsky-syntax has an aturi
113113+ - adenosyne also
114114+ - might come back to these
115115+116116+117117+-------
118118+119119+rocks
120120+121121+```bash
122122+ROCKSDB_LIB_DIR=/nix/store/z2chn0hsik0clridr8mlprx1cngh1g3c-rocksdb-9.7.3/lib/ cargo build
123123+```
···11+[package]
22+name = "pocket"
33+version = "0.1.0"
44+edition = "2024"
55+66+[dependencies]
77+atrium-crypto = "0.1.2"
88+clap = { version = "4.5.41", features = ["derive"] }
99+jwt-compact = { git = "https://github.com/fatfingers23/jwt-compact.git", features = ["es256k"] }
1010+log = "0.4.27"
1111+poem = { version = "3.1.12", features = ["acme", "static-files"] }
1212+poem-openapi = { version = "5.1.16", features = ["scalar"] }
1313+reqwest = { version = "0.12.22", features = ["json"] }
1414+rusqlite = "0.37.0"
1515+serde = { version = "1.0.219", features = ["derive"] }
1616+serde_json = { version = "1.0.141" }
1717+thiserror = "2.0.16"
1818+tokio = { version = "1.47.0", features = ["full"] }
1919+tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
+17
pocket/api-description.md
···11+_A pocket dimension to stash a bit of non-public user data._
22+33+44+# Pocket: user preference storage
55+66+This API leverages atproto service proxying to offer a bit of per-user per-app non-public data storage.
77+Perfect for things like application preferences that might be better left out of the public PDS data.
88+99+The intent is to use oauth scopes to isolate storage on a per-application basis, and to allow easy data migration from a community hosted instance to your own if you end up needing that.
1010+1111+1212+### Current status
1313+1414+> [!important]
1515+> Pocket is currently in a **v0, pre-release state**. There is one production instance and you can use it! Expect short downtimes for restarts as development progresses and occaisional data loss until it's stable.
1616+1717+ATProto might end up adding a similar feature to [PDSs](https://atproto.com/guides/glossary#pds-personal-data-server). If/when that happens, you should use it instead of this!
+7
pocket/src/lib.rs
···11+mod server;
22+mod storage;
33+mod token;
44+55+pub use server::serve;
66+pub use storage::Storage;
77+pub use token::TokenVerifier;
+34
pocket/src/main.rs
···11+use clap::Parser;
22+use pocket::{Storage, serve};
33+use std::path::PathBuf;
44+55+/// Slingshot record edge cache
66+#[derive(Parser, Debug, Clone)]
77+#[command(version, about, long_about = None)]
88+struct Args {
99+ /// path to the sqlite db file
1010+ #[arg(long)]
1111+ db: Option<PathBuf>,
1212+ /// just initialize the db and exit
1313+ #[arg(long, action)]
1414+ init_db: bool,
1515+ /// the domain for serving a did doc (unused if running behind reflector)
1616+ #[arg(long)]
1717+ domain: Option<String>,
1818+}
1919+2020+#[tokio::main]
2121+async fn main() {
2222+ tracing_subscriber::fmt::init();
2323+ log::info!("👖 hi");
2424+ let args = Args::parse();
2525+ let domain = args.domain.unwrap_or("bad-example.com".into());
2626+ let db_path = args.db.unwrap_or("prefs.sqlite3".into());
2727+ if args.init_db {
2828+ Storage::init(&db_path).unwrap();
2929+ log::info!("👖 initialized db at {db_path:?}. bye")
3030+ } else {
3131+ let storage = Storage::connect(db_path).unwrap();
3232+ serve(&domain, storage).await
3333+ }
3434+}
···11-microcosm: links
22-================
33-44-this repo contains libraries and apps for working with cross-record references in at-protocol.
55-11+microcosm HTTP APIs + rust crates
22+=================================
33+[](https://bsky.app/profile/microcosm.blue)
44+[](https://discord.gg/tcDfe4PGVB)
55+[](https://github.com/sponsors/uniphil/)
66+[](https://ko-fi.com/bad_example)
6777-App: [Constellation](./constellation/)
88---------------------------------------------
88+Welcome! Documentation is under active development. If you like reading API docs, you'll probably hit the ground running!
991010-A global atproto backlink index ✨
1010+Tutorials, how-to guides, and client SDK libraries are all in the works for gentler on-ramps, but are not quite ready yet. But don't let that stop you! Hop in the [microcosm discord](https://discord.gg/tcDfe4PGVB), or post questions and tag [@bad-example.com](https://bsky.app/profile/bad-example.com) on Bluesky if you get stuck anywhere.
11111212-- Self hostable: handles the full write throughput of the global atproto firehose on a raspberry pi 4b + single SSD
1313-- Storage efficient: less than 2GB/day disk consumption indexing all references in all lexicons and all non-atproto URLs
1414-- Handles record deletion, account de/re-activation, and account deletion, ensuring accurate link counts and respecting users data choices
1515-- Simple JSON API
1212+> [!tip]
1313+> This repository's primary home is moving to tangled: [@microcosm.blue/microcosm-rs](https://tangled.sh/@microcosm.blue/microcosm-rs). It will continue to be mirrored on [github](https://github.com/at-microcosm/microcosm-rs) for the forseeable future, and it's fine to open issues or pulls in either place!
16141717-All social interactions in atproto tend to be represented by links (or references) between PDS records. This index can answer questions like "how many likes does a bsky post have", "who follows an account", "what are all the comments on a [frontpage](https://frontpage.fyi/) post", and more.
18151919-- **status**: works! api is unstable and likely to change, and no known instances have a full network backfill yet.
2020-- source: [./constellation/](./constellation/)
2121-- public instance: [constellation.microcosm.blue](https://constellation.microcosm.blue/)
2222-2323-_note: the public instance currently runs on a little raspberry pi in my house, feel free to use it! it comes with only with best-effort uptime, no commitment to not breaking the api for now, and possible rate-limiting. if you want to be nice you can put your project name and bsky username (or email) in your user-agent header for api requests._
2424-2525-2626-App: Spacedust
2727---------------
2828-2929-A notification subscription service 💫
3030-3131-using the same "link source" concept as [constellation](./constellation/), offer webhook notifications for new references created to records
3232-3333-- **status**: in design
3434-3535-3636-Library: [links](./links/)
1616+🌌 [Constellation](./constellation/)
3717------------------------------------
38183939-A rust crate (not published on crates.io yet) for optimistically parsing links out of arbitrary atproto PDS records, and potentially canonicalizing them
4040-4141-- **status**: unstable, might remain an internal lib for constellation (and spacedust, soon)
4242-4343-4444-4545----
4646-4747-4848-old notes follow, ignore
4949-------------------------
5050-5151-5252-as far as i can tell, atproto lexicons today don't follow much of a convention for referencing across documents: sometimes it's a StrongRef, sometimes it's a DID, sometimes it's a bare at-uri. lexicon authors choose any old link-sounding key name for the key in their document.
5353-5454-it's pretty messy so embrace the mess: atproto wants to be part of the web, so this library will also extract URLs and other URIs if you want it to. all the links.
5555-5656-5757-why
5858----
5959-6060-the atproto firehose that bluesky sprays at you will contain raw _contents_ from peoples' pdses. these are isolated, decontextualized updates. it's very easy to build some kinds of interesting downstream apps off of this feed.
6161-6262-- bluesky posts (firesky, deletions, )
6363-- blueksy post stats (emojis, )
6464-- trending keywords ()
6565-6666-but bringing almost kind of _context_ into your project requires a big step up in complexity and potentially cost: you're entering "appview" territory. _how many likes does a post have? who follows this account?_
6767-6868-you own your atproto data: it's kept in your personal data repository (PDS) and noone else can write to it. when someone likes your post, they create a "like" record in their _own_ pds, and that like belongs to _them_, not to you/your post.
6969-7070-in the firehose you'll see a `app.bsky.feed.post` record created, with no details about who has liked it. then you'll see separate `app.bsky.feed.like` records show up for each like that comes in on that post, with no context about the post except a random-looking reference to it. storing these in order to do so is up to you!
7171-7272-**so, why**
7373-7474-everything is links, and they're a mess, but they all kinda work the same, so maybe some tooling can bring down that big step in complexity from firehose raw-content apps -> apps requiring any social context.
7575-7676-everything is links:
7777-7878-- likes
7979-- follows
8080-- blocks
8181-- reposts
8282-- quotes
8383-8484-some low-level things you could make from links:
8585-8686-- notification streams (part of ucosm)
8787-- a global reverse index (part of ucosm)
8888-8989-i think that making these low-level services as easy to use as jetstream could open up pathways for building more atproto apps that operate at full scale with interesting features for reasonable effort at low cost to operate.
1919+A global atproto interactions backlink index as a simple JSON API. Works with every lexicon, runs on a raspberry pi, consumes less than 2GiB of disk per day. Handles record deletion, account de/re-activation, and account deletion, ensuring accurate link counts while respecting users' data choices.
90202121+- Source: [./constellation/](./constellation/)
2222+- [Public instance/API docs](https://constellation.microcosm.blue/)
2323+- Status: used in production. APIs will change but backwards compatibility will be maintained as long as needed.
91249292-extracting links
9393----------------
94252626+🎇 [Spacedust](./spacedust/)
2727+----------------------------
95289696-- low-level: pass a &str of a field value and get a parsed link back
2929+A global atproto interactions firehose. Extracts all at-uris, DIDs, and URLs from every lexicon in the firehose, and exposes them over a websocket modelled after [jetstream](github.com/bluesky-social/jetstream).
97309898-- med-level: pass a &str of record in json form and get a list of parsed links + json paths back. (todo: should also handle dag-cbor prob?)
3131+- Source: [./spacedust/](./spacedust/)
3232+- [Public instance/API docs](https://spacedust.microcosm.blue/)
3333+- Status: v0: the basics work and the APIs are in place! missing cursor replay, forward link storage, and delete event link hydration.
9934100100-- high-ish level: pass the json record and maybe apply some pre-loaded rules based on known lexicons to get the best result.
3535+### Demos:
10136102102-for now, a link is only considered if it matches for the entire value of the record's field -- links embedded in text content are not included. note that urls in bluesky posts _will_ still be extracted, since they are broken out into facets.
3737+- [Spacedust notifications](https://notifications.microcosm.blue/): web push notifications for _every_ atproto app
3838+- [Zero-Bluesky real-time interaction-updating post embed](https://bsky.bad-example.com/zero-bluesky-realtime-embed/)
1033910440105105-resolving / canonicalizing links
106106---------------------------------
4141+🛰️ [Slingshot](./slingshot)
4242+---------------------------
107434444+A fast, eager, production-grade edge cache for atproto records and identities. Pre-caches all records from the firehose and maintains a longer-term cache of requested records on disk.
10845109109-### at-uris
4646+- Source: [./slingshot/](./slingshot/)
4747+- [Public instance/API docs](https://slingshot.microcosm.blue/)
4848+- Status: v0: most XRPC APIs are working. cache storage is being reworked.
11049111111-every at-uri has at least two equivalent forms, one with a `DID`, and one with an account handle. the at-uri spec [illustrates this by example](https://atproto.com/specs/at-uri-scheme):
11250113113-- `at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26`
114114-- `at://bnewbold.bsky.team/app.bsky.feed.post/3jwdwj2ctlk26`
5151+🛸 [UFOs API](./ufos)
5252+---------------------
11553116116-some applications, like a reverse link index, may wish to canonicalize at-uris to a single form. the `DID`-form is stable as an account changes its handle and probably the right choice to canonicalize to, but maybe some apps would actually perfer to canonicalise to handles?
5454+Timeseries stats and sample records for every [collection](https://atproto.com/guides/glossary#collection) ever seen in the atproto firehose. Unique users are counted in hyperloglog sketches enabling arbitrary cardinality aggregation across time buckets and/or NSIDs.
11755118118-hopefully atrium will make it easy to resolve at-uris.
5656+- Source: [./ufos/](./ufos/)
5757+- [Public instance/API docs](https://ufos-api.microcosm.blue/)
5858+- Status: Used in production. It has APIs and they work! Needs improvement on indexing; needs more indexes and some more APIs to the data exposed.
119596060+> [!tip]
6161+> See also: [UFOs atproto explorer](https://ufos.microcosm.blue/) built on UFOs API. ([source](github.com/at-microcosm/spacedust-utils))
12062121121-### urls
12263123123-canonicalizing URLs is more annoying but also a bit more established. lots of details.
6464+💫 [Links](./links)
6565+-------------------
12466125125-- do we have to deal with punycode?
126126-- follow redirects (todo: only permanent ones, or all?)
127127-- check for rel=canonical http header and possibly follow it
128128-- check link rel=canonical meta tag and possibly follow it
129129-- do we need to check site maps??
130130-- do we have to care at all about AMP?
131131-- do we want anything to do with url shorteners??
132132-- how do multilingual sites affect this?
133133-- do we have to care about `script type="application/ld+json"` ???
6767+Rust library for parsing and extracting links (at-uris, DIDs, and URLs) from atproto records.
13468135135-ugh. is there a crate for this.
6969+- Source: [./links/](./links/)
7070+- Status: not yet published to crates.io; needs some rework
1367113772138138-### relative uris?
139139-140140-links might be relative, in which case they might need to be made absolute before being useful. is that a concern for this library, or up to the user? (seems like we might not have context here to determine its absolute)
141141-142142-143143-### canonicalizing
144144-145145-there should be a few async functions available to canonicalize already-parsed links.
146146-147147-- what happens if a link can't be resolved?
7373+🛩️ [Jetstream](./jetstream)
7474+---------------------------
148757676+A low-overhead jetstream client with cursor handling and automatic reconnect.
14977150150----
7878+- Source: [./links/](./links/)
7979+- Status: used in multiple apps in production, but not yet published to crates.io; some rework planned
15180152152-- using `tinyjson` because it's nice -- maybe should switch to serde_json to share deps with atrium?
8181+> [!tip]
8282+> See also: [Rocketman](https://github.com/teal-fm/cadet/tree/main/rocketman), another excellent rust jetstream client which shares some lineage and _is_ published on crates.io.
15383154154-- would use atrium for parsing at-uris, but it's not in there. there's a did-only version in the non-lib commands.rs. its identifier parser is strict to did + handle, which makes sense, but for our purposes we might want to allow unknown methods too?
15584156156- - rsky-syntax has an aturi
157157- - adenosyne also
158158- - might come back to these
159858686+🔭 Deprecated: [Who am I](./who-am-i)
8787+-------------------------------------
16088161161--------
8989+An identity bridge for microcosm demos, that kinda worked. Fixing its problems is about equivalent to reinventing a lot of OIDC, so it's being retired.
16290163163-rocks
9191+- Source: [./who-am-i/](./who-am-i/)
9292+- Status: ready for retirement.
16493165165-```bash
166166-ROCKSDB_LIB_DIR=/nix/store/z2chn0hsik0clridr8mlprx1cngh1g3c-rocksdb-9.7.3/lib/ cargo build
167167-```
9494+> [!warning]
9595+> `who-am-i` is still in use for the Spacedust Notifications demo, but that will hopefully be migrated to use atproto oauth directly instead.
+12
reflector/Cargo.toml
···11+[package]
22+name = "reflector"
33+version = "0.1.0"
44+edition = "2024"
55+66+[dependencies]
77+clap = { version = "4.5.47", features = ["derive"] }
88+log = "0.4.28"
99+poem = "3.1.12"
1010+serde = { version = "1.0.219", features = ["derive"] }
1111+tokio = "1.47.1"
1212+tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
+9
reflector/readme.md
···11+# reflector
22+33+a tiny did:web service server that maps subdomains to a single service endpoint
44+55+receiving requests from multiple subdomains is left as a problem for the reverse proxy to solve, since acme wildcard certificates (ie. letsencrypt) require the most complicated and involved challenge type (DNS).
66+77+caddy [has good support for](https://caddyserver.com/docs/caddyfile/patterns#wildcard-certificates) configuring the wildcard DNS challenge with various DNS providers, and also supports [on-demand](https://caddyserver.com/docs/automatic-https#using-on-demand-tls) provisioning via the simpler methods.
88+99+if you only need a small fixed number of subdomains, you can also use certbot or otherwise individually configure them in your reverse proxy.
+113
reflector/src/main.rs
···11+use clap::Parser;
22+use poem::{
33+ EndpointExt, Response, Route, Server, get, handler,
44+ http::StatusCode,
55+ listener::TcpListener,
66+ middleware::{AddData, Tracing},
77+ web::{Data, Json, Query, TypedHeader, headers::Host},
88+};
99+use serde::{Deserialize, Serialize};
1010+1111+#[handler]
1212+fn hello() -> String {
1313+ "ɹoʇɔǝʅⅎǝɹ".to_string()
1414+}
1515+1616+#[derive(Debug, Serialize)]
1717+struct DidDoc {
1818+ id: String,
1919+ service: [DidService; 1],
2020+}
2121+2222+#[derive(Debug, Clone, Serialize)]
2323+#[serde(rename_all = "camelCase")]
2424+struct DidService {
2525+ id: String,
2626+ r#type: String,
2727+ service_endpoint: String,
2828+}
2929+3030+#[handler]
3131+fn did_doc(TypedHeader(host): TypedHeader<Host>, service: Data<&DidService>) -> Json<DidDoc> {
3232+ Json(DidDoc {
3333+ id: format!("did:web:{}", host.hostname()),
3434+ service: [service.clone()],
3535+ })
3636+}
3737+3838+#[derive(Deserialize)]
3939+struct AskQuery {
4040+ domain: String,
4141+}
4242+#[handler]
4343+fn ask_caddy(
4444+ Data(parent): Data<&Option<String>>,
4545+ Query(AskQuery { domain }): Query<AskQuery>,
4646+) -> Response {
4747+ if let Some(parent) = parent {
4848+ if let Some(prefix) = domain.strip_suffix(&format!(".{parent}")) {
4949+ if !prefix.contains('.') {
5050+ // no sub-sub-domains allowed
5151+ return Response::builder().body("ok");
5252+ }
5353+ }
5454+ };
5555+ Response::builder()
5656+ .status(StatusCode::FORBIDDEN)
5757+ .body("nope")
5858+}
5959+6060+/// Slingshot record edge cache
6161+#[derive(Parser, Debug, Clone)]
6262+#[command(version, about, long_about = None)]
6363+struct Args {
6464+ /// The DID document service ID to serve
6565+ ///
6666+ /// must start with a '#', like `#bsky_appview'
6767+ #[arg(long)]
6868+ id: String,
6969+ /// Service type
7070+ ///
7171+ /// Not sure exactly what its requirements are. 'BlueskyAppview' for example
7272+ #[arg(long)]
7373+ r#type: String,
7474+ /// The HTTPS endpoint for the service
7575+ #[arg(long)]
7676+ service_endpoint: String,
7777+ /// The parent domain; requests should come from subdomains of this
7878+ #[arg(long)]
7979+ domain: Option<String>,
8080+}
8181+8282+impl From<Args> for DidService {
8383+ fn from(a: Args) -> Self {
8484+ Self {
8585+ id: a.id,
8686+ r#type: a.r#type,
8787+ service_endpoint: a.service_endpoint,
8888+ }
8989+ }
9090+}
9191+9292+#[tokio::main(flavor = "current_thread")]
9393+async fn main() {
9494+ tracing_subscriber::fmt::init();
9595+ log::info!("ɹoʇɔǝʅⅎǝɹ");
9696+9797+ let args = Args::parse();
9898+ let domain = args.domain.clone();
9999+ let service: DidService = args.into();
100100+101101+ Server::new(TcpListener::bind("0.0.0.0:3001"))
102102+ .run(
103103+ Route::new()
104104+ .at("/", get(hello))
105105+ .at("/.well-known/did.json", get(did_doc))
106106+ .at("/ask", get(ask_caddy))
107107+ .with(AddData::new(service))
108108+ .with(AddData::new(domain))
109109+ .with(Tracing),
110110+ )
111111+ .await
112112+ .unwrap()
113113+}
···11+[package]
22+name = "slingshot"
33+version = "0.1.0"
44+edition = "2024"
55+66+[dependencies]
77+atrium-api = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace", default-features = false }
88+atrium-common = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace" }
99+atrium-identity = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace" }
1010+atrium-oauth = { git = "https://github.com/uniphil/atrium.git", branch = "fix/resolve-handle-https-accept-whitespace" }
1111+clap = { version = "4.5.41", features = ["derive"] }
1212+ctrlc = "3.4.7"
1313+foyer = { version = "0.18.0", features = ["serde"] }
1414+hickory-resolver = "0.25.2"
1515+jetstream = { path = "../jetstream", features = ["metrics"] }
1616+links = { path = "../links" }
1717+log = "0.4.27"
1818+metrics = "0.24.2"
1919+metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] }
2020+poem = { version = "3.1.12", features = ["acme", "static-files"] }
2121+poem-openapi = { version = "5.1.16", features = ["scalar"] }
2222+reqwest = { version = "0.12.22", features = ["json"] }
2323+rustls = "0.23.31"
2424+serde = { version = "1.0.219", features = ["derive"] }
2525+serde_json = { version = "1.0.141", features = ["raw_value"] }
2626+thiserror = "2.0.12"
2727+time = { version = "0.3.41", features = ["serde"] }
2828+tokio = { version = "1.47.0", features = ["full"] }
2929+tokio-util = "0.7.15"
3030+tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
3131+url = "2.5.4"
+93
slingshot/api-description.md
···11+_A [gravitational slingshot](https://en.wikipedia.org/wiki/Gravity_assist) makes use of the gravity and relative movements of celestial bodies to accelerate a spacecraft and change its trajectory._
22+33+44+# Slingshot: edge record cache
55+66+Applications in [ATProtocol](https://atproto.com/) store data in users' own [PDS](https://atproto.com/guides/self-hosting) (Personal Data Server), which are distributed across thousands of independently-run servers all over the world. Trying to access this data poses challenges for client applications:
77+88+- A PDS might be far away with long network latency
99+- or may be on an unreliable connection
1010+- or overloaded when you need it, or offline, or…
1111+1212+Large projects like [Bluesky](https://bsky.app/) control their performance and reliability by syncing all app-relevant data from PDSs into first-party databases. But for new apps, building out this additional data infrastructure adds significant effort and complexity up front.
1313+1414+**Slingshot is a fast, eager, production-grade cache of data in the [ATmosphere](https://atproto.com/)**, offering performance and reliability without custom infrastructure.
1515+1616+1717+### Current status
1818+1919+> [!important]
2020+> Slingshot is currently in a **v0, pre-release state**. There is one production instance and you can use it! Expect short downtimes for restarts as development progresses and lower cache hit-rates as the internal storage caches are adjusted and reset.
2121+2222+The core APIs will not change, since they are standard third-party `com.atproto` query APIs from ATProtocol.
2323+2424+2525+## Eager caching
2626+2727+In many cases, Slingshot can cache the data you need *before* first request!
2828+2929+Slingshot subscribes to the global [Firehose](https://atproto.com/specs/sync#firehose) of data updates. It keeps a short-term rolling indexed window of *all* data, and automatically promotes content likely to be requested to its longer-term main cache. _(automatic promotion is still a work in progress)_
3030+3131+When there is a cache miss, Slingshot can often still accelerate record fetching, since it keeps a large cache of resolved identities: it can usually request from the correct PDS without extra lookups.
3232+3333+3434+## Precise invalidation
3535+3636+The fireshose includes **update** and **delete** events, which Slingshot uses to ensure stale and deleted data is removed within a very short window. Additonally, identity and account-level events can trigger rapid cleanup of data for deactivated and deleted accounts. _(some of this is still a work in progress)_
3737+3838+3939+## Low-trust
4040+4141+The "AT" in ATProtocol [stands for _Authenticated Transfer_](https://atproto.com/guides/glossary#at-protocol): all data is cryptographically signed, which makes it possible to broadcast data through third parties and trust that it's real _without_ having to directly contact the originating server.
4242+4343+Two core standard query APIs are supported to balance convenience and trust. They both fetch [records](https://atproto.com/guides/glossary#record):
4444+4545+### [`com.atproto.repo.getRecord`](#tag/comatproto-queries/get/xrpc/com.atproto.repo.getRecord)
4646+4747+- convenient `JSON` response format
4848+- cannot be proven authentic
4949+5050+### [`com.atproto.sync.getRecord`](#tag/comatproto-queries/get/xrpc/com.atproto.sync.getRecord)
5151+5252+- [`DAG-CBOR`](https://atproto.com/specs/data-model)-encoded response requires extra libraries to decode, but
5353+- includes a cryptographic proof of authenticity!
5454+5555+_(work on this endpoint is in progress)_
5656+5757+5858+## Service proxying
5959+6060+Clients can proxy atproto queries through their own PDS with [Service Proxying](https://atproto.com/specs/xrpc#service-proxying), and this is supported by Slingshot. The Slingshot instance must be started the `--domain` argument specified.
6161+6262+Service-proxied requests can specify a Slingshot instance via the `atproto-proxy` header:
6363+6464+```http
6565+GET /xrpc/com.bad-example.identity.resolveMiniDoc?identifier=bad-example.com
6666+Host: <your pds>
6767+atproto-proxy: did:web:<slingshot domain>#slingshot
6868+```
6969+7070+Where `<your pds>` is the user's own PDS host, and `<slingshot domain>` is the domain that the slingshot instance is deployed at (eg. `slingshot.microcosm.blue`). See the [Service Proxying](https://atproto.com/specs/xrpc#service-proxying) docs for more.
7171+7272+> [!tip]
7373+> Service proxying is supported but completely optional. All APIs are directly accessible over the public internet, and GeoDNS helps route users to the closest instance to them for the lowest possible latency. (_note: deploying multiple slingshot instances with GeoDNS is still TODO_)
7474+7575+7676+## Ergonomic APIs
7777+7878+- Slingshot also offers variants of the `getRecord` endpoints that accept a full `at-uri` as a parameter, to save clients from needing to parse and validate all parts of a record location.
7979+8080+- Bi-directionally verifying identity endpoints, so you can directly exchange atproto [`handle`](https://atproto.com/guides/glossary#handle)s for [`DID`](https://atproto.com/guides/glossary#did-decentralized-id)s without extra steps, plus a convenient [Mini-Doc](#tag/slingshot-specific-queries/get/xrpc/com.bad-example.identity.resolveMiniDoc) verified identity summary.
8181+8282+8383+## Part of microcosm
8484+8585+[Microcosm](https://www.microcosm.blue/) is a collection of services and independent community-run infrastructure for ATProtocol.
8686+8787+Slingshot excels when combined with _shallow indexing_ services, which offer fast queries of global data relationships but with only references to the data records. Microcosm has a few!
8888+8989+- [🌌 Constellation](https://constellation.microcosm.blue/), a global backlink index (all social interactions in atproto are links!)
9090+- [🎇 Spacedust](https://spacedust.microcosm.blue/), a firehose of all social interactions
9191+9292+> [!success]
9393+> All microcosm projects are [open source](https://tangled.sh/@bad-example.com/microcosm-links). **You can help sustain Slingshot** and all of microcosm by becoming a [Github sponsor](https://github.com/sponsors/uniphil/) or a [Ko-fi supporter](https://ko-fi.com/bad_example)!
+7
slingshot/readme.md
···11+# slingshot: atproto record edge cache
22+33+local dev running:
44+55+```bash
66+RUST_LOG=info,slingshot=trace ulimit -n 4096 && RUST_LOG=info cargo run -- --jetstream us-east-1 --cache-dir ./foyer
77+```
···11+use hickory_resolver::{ResolveError, TokioResolver};
22+use std::collections::{HashSet, VecDeque};
33+use std::path::Path;
44+use std::sync::Arc;
55+/// for now we're gonna just keep doing more cache
66+///
77+/// plc.director x foyer, ttl kept with data, refresh deferred to background on fetch
88+///
99+/// things we need:
1010+///
1111+/// 1. handle -> DID resolution: getRecord must accept a handle for `repo` param
1212+/// 2. DID -> PDS resolution: so we know where to getRecord
1313+/// 3. DID -> handle resolution: for bidirectional handle validation and in case we want to offer this
1414+use std::time::Duration;
1515+use tokio::sync::Mutex;
1616+use tokio_util::sync::CancellationToken;
1717+1818+use crate::error::IdentityError;
1919+use atrium_api::{
2020+ did_doc::DidDocument,
2121+ types::string::{Did, Handle},
2222+};
2323+use atrium_common::resolver::Resolver;
2424+use atrium_identity::{
2525+ did::{CommonDidResolver, CommonDidResolverConfig, DEFAULT_PLC_DIRECTORY_URL},
2626+ handle::{AtprotoHandleResolver, AtprotoHandleResolverConfig, DnsTxtResolver},
2727+};
2828+use atrium_oauth::DefaultHttpClient; // it's probably not worth bringing all of atrium_oauth for this but
2929+use foyer::{DirectFsDeviceOptions, Engine, HybridCache, HybridCacheBuilder};
3030+use serde::{Deserialize, Serialize};
3131+use time::UtcDateTime;
3232+3333+/// once we have something resolved, don't re-resolve until after this period
3434+const MIN_TTL: Duration = Duration::from_secs(4 * 3600); // probably shoudl have a max ttl
3535+const MIN_NOT_FOUND_TTL: Duration = Duration::from_secs(60);
3636+3737+#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
3838+enum IdentityKey {
3939+ Handle(Handle),
4040+ Did(Did),
4141+}
4242+4343+#[derive(Debug, Serialize, Deserialize)]
4444+struct IdentityVal(UtcDateTime, IdentityData);
4545+4646+#[derive(Debug, Serialize, Deserialize)]
4747+enum IdentityData {
4848+ NotFound,
4949+ Did(Did),
5050+ Doc(PartialMiniDoc),
5151+}
5252+5353+/// partial representation of a com.bad-example.identity mini atproto doc
5454+///
5555+/// partial because the handle is not verified
5656+#[derive(Debug, Clone, Serialize, Deserialize)]
5757+pub struct PartialMiniDoc {
5858+ /// an atproto handle (**unverified**)
5959+ ///
6060+ /// the first valid atproto handle from the did doc's aka
6161+ pub unverified_handle: Handle,
6262+ /// the did's atproto pds url (TODO: type this?)
6363+ ///
6464+ /// note: atrium *does* actually parse it into a URI, it just doesn't return
6565+ /// that for some reason
6666+ pub pds: String,
6767+ /// for now we're just pulling this straight from the did doc
6868+ ///
6969+ /// would be nice to type and validate it
7070+ ///
7171+ /// this is the publicKeyMultibase from the did doc.
7272+ /// legacy key encoding not supported.
7373+ /// `id`, `type`, and `controller` must be checked, but aren't stored.
7474+ pub signing_key: String,
7575+}
7676+7777+impl TryFrom<DidDocument> for PartialMiniDoc {
7878+ type Error = String;
7979+ fn try_from(did_doc: DidDocument) -> Result<Self, Self::Error> {
8080+ // must use the first valid handle
8181+ let mut unverified_handle = None;
8282+ let Some(ref doc_akas) = did_doc.also_known_as else {
8383+ return Err("did doc missing `also_known_as`".to_string());
8484+ };
8585+ for aka in doc_akas {
8686+ let Some(maybe_handle) = aka.strip_prefix("at://") else {
8787+ continue;
8888+ };
8989+ let Ok(valid_handle) = Handle::new(maybe_handle.to_string()) else {
9090+ continue;
9191+ };
9292+ unverified_handle = Some(valid_handle);
9393+ break;
9494+ }
9595+ let Some(unverified_handle) = unverified_handle else {
9696+ return Err("no valid atproto handles in `also_known_as`".to_string());
9797+ };
9898+9999+ // atrium seems to get service endpoint getters
100100+ let Some(pds) = did_doc.get_pds_endpoint() else {
101101+ return Err("no valid pds service found".to_string());
102102+ };
103103+104104+ // TODO can't use atrium's get_signing_key() becuase it fails to check type and controller
105105+ // so if we check those and reject it, we might miss a later valid key in the array
106106+ // (todo is to fix atrium)
107107+ // actually: atrium might be flexible for legacy reps. for now we're rejecting legacy rep.
108108+109109+ // must use the first valid signing key
110110+ let mut signing_key = None;
111111+ let Some(verification_methods) = did_doc.verification_method else {
112112+ return Err("no verification methods found".to_string());
113113+ };
114114+ for method in verification_methods {
115115+ if method.id != format!("{}#atproto", did_doc.id) {
116116+ continue;
117117+ }
118118+ if method.r#type != "Multikey" {
119119+ continue;
120120+ }
121121+ if method.controller != did_doc.id {
122122+ continue;
123123+ }
124124+ let Some(key) = method.public_key_multibase else {
125125+ continue;
126126+ };
127127+ signing_key = Some(key);
128128+ break;
129129+ }
130130+ let Some(signing_key) = signing_key else {
131131+ return Err("no valid atproto signing key found in verification methods".to_string());
132132+ };
133133+134134+ Ok(PartialMiniDoc {
135135+ unverified_handle,
136136+ pds,
137137+ signing_key,
138138+ })
139139+ }
140140+}
141141+142142+/// multi-producer *single-consumer* queue structures (wrap in arc-mutex plz)
143143+///
144144+/// the hashset allows testing for presense of items in the queue.
145145+/// this has absolutely no support for multiple queue consumers.
146146+#[derive(Debug, Default)]
147147+struct RefreshQueue {
148148+ queue: VecDeque<IdentityKey>,
149149+ items: HashSet<IdentityKey>,
150150+}
151151+152152+#[derive(Clone)]
153153+pub struct Identity {
154154+ handle_resolver: Arc<AtprotoHandleResolver<HickoryDnsTxtResolver, DefaultHttpClient>>,
155155+ did_resolver: Arc<CommonDidResolver<DefaultHttpClient>>,
156156+ cache: HybridCache<IdentityKey, IdentityVal>,
157157+ /// multi-producer *single consumer* queue
158158+ refresh_queue: Arc<Mutex<RefreshQueue>>,
159159+ /// just a lock to ensure only one refresher (queue consumer) is running (to be improved with a better refresher)
160160+ refresher: Arc<Mutex<()>>,
161161+}
162162+163163+impl Identity {
164164+ pub async fn new(cache_dir: impl AsRef<Path>) -> Result<Self, IdentityError> {
165165+ let http_client = Arc::new(DefaultHttpClient::default());
166166+ let handle_resolver = AtprotoHandleResolver::new(AtprotoHandleResolverConfig {
167167+ dns_txt_resolver: HickoryDnsTxtResolver::new().unwrap(),
168168+ http_client: http_client.clone(),
169169+ });
170170+ let did_resolver = CommonDidResolver::new(CommonDidResolverConfig {
171171+ plc_directory_url: DEFAULT_PLC_DIRECTORY_URL.to_string(),
172172+ http_client: http_client.clone(),
173173+ });
174174+175175+ let cache = HybridCacheBuilder::new()
176176+ .with_name("identity")
177177+ .memory(16 * 2_usize.pow(20))
178178+ .with_weighter(|k, v| std::mem::size_of_val(k) + std::mem::size_of_val(v))
179179+ .storage(Engine::small())
180180+ .with_device_options(
181181+ DirectFsDeviceOptions::new(cache_dir)
182182+ .with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something)
183183+ .with_file_size(2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records
184184+ )
185185+ .build()
186186+ .await?;
187187+188188+ Ok(Self {
189189+ handle_resolver: Arc::new(handle_resolver),
190190+ did_resolver: Arc::new(did_resolver),
191191+ cache,
192192+ refresh_queue: Default::default(),
193193+ refresher: Default::default(),
194194+ })
195195+ }
196196+197197+ /// Resolve (and verify!) an atproto handle to a DID
198198+ ///
199199+ /// The result can be stale
200200+ ///
201201+ /// `None` if the handle can't be found or verification fails
202202+ pub async fn handle_to_did(&self, handle: Handle) -> Result<Option<Did>, IdentityError> {
203203+ let Some(did) = self.handle_to_unverified_did(&handle).await? else {
204204+ return Ok(None);
205205+ };
206206+ let Some(doc) = self.did_to_partial_mini_doc(&did).await? else {
207207+ return Ok(None);
208208+ };
209209+ if doc.unverified_handle != handle {
210210+ return Ok(None);
211211+ }
212212+ Ok(Some(did))
213213+ }
214214+215215+ /// Resolve a DID to a pds url
216216+ ///
217217+ /// This *also* incidentally resolves and verifies the handle, which might
218218+ /// make it slower than expected
219219+ pub async fn did_to_pds(&self, did: Did) -> Result<Option<String>, IdentityError> {
220220+ let Some(mini_doc) = self.did_to_partial_mini_doc(&did).await? else {
221221+ return Ok(None);
222222+ };
223223+ Ok(Some(mini_doc.pds))
224224+ }
225225+226226+ /// Resolve (and cache but **not verify**) a handle to a DID
227227+ async fn handle_to_unverified_did(
228228+ &self,
229229+ handle: &Handle,
230230+ ) -> Result<Option<Did>, IdentityError> {
231231+ let key = IdentityKey::Handle(handle.clone());
232232+ let entry = self
233233+ .cache
234234+ .fetch(key.clone(), {
235235+ let handle = handle.clone();
236236+ let resolver = self.handle_resolver.clone();
237237+ || async move {
238238+ match resolver.resolve(&handle).await {
239239+ Ok(did) => Ok(IdentityVal(UtcDateTime::now(), IdentityData::Did(did))),
240240+ Err(atrium_identity::Error::NotFound) => {
241241+ Ok(IdentityVal(UtcDateTime::now(), IdentityData::NotFound))
242242+ }
243243+ Err(other) => Err(foyer::Error::Other(Box::new({
244244+ log::debug!("other error resolving handle: {other:?}");
245245+ IdentityError::ResolutionFailed(other)
246246+ }))),
247247+ }
248248+ }
249249+ })
250250+ .await?;
251251+252252+ let now = UtcDateTime::now();
253253+ let IdentityVal(last_fetch, data) = entry.value();
254254+ match data {
255255+ IdentityData::Doc(_) => {
256256+ log::error!("identity value mixup: got a doc from a handle key (should be a did)");
257257+ Err(IdentityError::IdentityValTypeMixup(handle.to_string()))
258258+ }
259259+ IdentityData::NotFound => {
260260+ if (now - *last_fetch) >= MIN_NOT_FOUND_TTL {
261261+ self.queue_refresh(key).await;
262262+ }
263263+ Ok(None)
264264+ }
265265+ IdentityData::Did(did) => {
266266+ if (now - *last_fetch) >= MIN_TTL {
267267+ self.queue_refresh(key).await;
268268+ }
269269+ Ok(Some(did.clone()))
270270+ }
271271+ }
272272+ }
273273+274274+ /// Fetch (and cache) a partial mini doc from a did
275275+ pub async fn did_to_partial_mini_doc(
276276+ &self,
277277+ did: &Did,
278278+ ) -> Result<Option<PartialMiniDoc>, IdentityError> {
279279+ let key = IdentityKey::Did(did.clone());
280280+ let entry = self
281281+ .cache
282282+ .fetch(key.clone(), {
283283+ let did = did.clone();
284284+ let resolver = self.did_resolver.clone();
285285+ || async move {
286286+ match resolver.resolve(&did).await {
287287+ Ok(did_doc) => {
288288+ // TODO: fix in atrium: should verify id is did
289289+ if did_doc.id != did.to_string() {
290290+ return Err(foyer::Error::other(Box::new(
291291+ IdentityError::BadDidDoc(
292292+ "did doc's id did not match did".to_string(),
293293+ ),
294294+ )));
295295+ }
296296+ let mini_doc = did_doc.try_into().map_err(|e| {
297297+ foyer::Error::Other(Box::new(IdentityError::BadDidDoc(e)))
298298+ })?;
299299+ Ok(IdentityVal(UtcDateTime::now(), IdentityData::Doc(mini_doc)))
300300+ }
301301+ Err(atrium_identity::Error::NotFound) => {
302302+ Ok(IdentityVal(UtcDateTime::now(), IdentityData::NotFound))
303303+ }
304304+ Err(other) => Err(foyer::Error::Other(Box::new(
305305+ IdentityError::ResolutionFailed(other),
306306+ ))),
307307+ }
308308+ }
309309+ })
310310+ .await?;
311311+312312+ let now = UtcDateTime::now();
313313+ let IdentityVal(last_fetch, data) = entry.value();
314314+ match data {
315315+ IdentityData::Did(_) => {
316316+ log::error!("identity value mixup: got a did from a did key (should be a doc)");
317317+ Err(IdentityError::IdentityValTypeMixup(did.to_string()))
318318+ }
319319+ IdentityData::NotFound => {
320320+ if (now - *last_fetch) >= MIN_NOT_FOUND_TTL {
321321+ self.queue_refresh(key).await;
322322+ }
323323+ Ok(None)
324324+ }
325325+ IdentityData::Doc(mini_did) => {
326326+ if (now - *last_fetch) >= MIN_TTL {
327327+ self.queue_refresh(key).await;
328328+ }
329329+ Ok(Some(mini_did.clone()))
330330+ }
331331+ }
332332+ }
333333+334334+ /// put a refresh task on the queue
335335+ ///
336336+ /// this can be safely called from multiple concurrent tasks
337337+ async fn queue_refresh(&self, key: IdentityKey) {
338338+ // todo: max queue size
339339+ let mut q = self.refresh_queue.lock().await;
340340+ if !q.items.contains(&key) {
341341+ q.items.insert(key.clone());
342342+ q.queue.push_back(key);
343343+ }
344344+ }
345345+346346+ /// find out what's next in the queue. concurrent consumers are not allowed.
347347+ ///
348348+ /// intent is to leave the item in the queue while refreshing, so that a
349349+ /// producer will not re-add it if it's in progress. there's definitely
350350+ /// better ways to do this, but this is ~simple for as far as a single
351351+ /// consumer can take us.
352352+ ///
353353+ /// we could take it from the queue but leave it in the set and remove from
354354+ /// set later, but splitting them apart feels more bug-prone.
355355+ async fn peek_refresh(&self) -> Option<IdentityKey> {
356356+ let q = self.refresh_queue.lock().await;
357357+ q.queue.front().cloned()
358358+ }
359359+360360+ /// call to clear the latest key from the refresh queue. concurrent consumers not allowed.
361361+ ///
362362+ /// must provide the last peeked refresh queue item as a small safety check
363363+ async fn complete_refresh(&self, key: &IdentityKey) -> Result<(), IdentityError> {
364364+ let mut q = self.refresh_queue.lock().await;
365365+366366+ let Some(queue_key) = q.queue.pop_front() else {
367367+ // gone from queue + since we're in an error condition, make sure it's not stuck in items
368368+ // (not toctou because we have the lock)
369369+ // bolder here than below and removing from items because if the queue is *empty*, then we
370370+ // know it hasn't been re-added since losing sync.
371371+ if q.items.remove(key) {
372372+ log::error!("identity refresh: queue de-sync: not in ");
373373+ } else {
374374+ log::warn!(
375375+ "identity refresh: tried to complete with wrong key. are multiple queue consumers running?"
376376+ );
377377+ }
378378+ return Err(IdentityError::RefreshQueueKeyError("no key in queue"));
379379+ };
380380+381381+ if queue_key != *key {
382382+ // extra weird case here, what's the most defensive behaviour?
383383+ // we have two keys: ours should have been first but isn't. this shouldn't happen, so let's
384384+ // just leave items alone for it. risks unbounded growth but we're in a bad place already.
385385+ // the other key is the one we just popped. we didn't want it, so maybe we should put it
386386+ // back, BUT if we somehow ended up with concurrent consumers, we have bigger problems. take
387387+ // responsibility for taking it instead: remove it from items as well, and just drop it.
388388+ //
389389+ // hope that whoever calls us takes this error seriously.
390390+ if q.items.remove(&queue_key) {
391391+ log::warn!(
392392+ "identity refresh: queue de-sync + dropping a bystander key without refreshing it!"
393393+ );
394394+ } else {
395395+ // you thought things couldn't get weirder? (i mean hopefully they can't)
396396+ log::error!("identity refresh: queue de-sync + bystander key also de-sync!?");
397397+ }
398398+ return Err(IdentityError::RefreshQueueKeyError(
399399+ "wrong key at front of queue",
400400+ ));
401401+ }
402402+403403+ if q.items.remove(key) {
404404+ Ok(())
405405+ } else {
406406+ log::error!("identity refresh: queue de-sync: key not in items");
407407+ Err(IdentityError::RefreshQueueKeyError("key not in items"))
408408+ }
409409+ }
410410+411411+ /// run the refresh queue consumer
412412+ pub async fn run_refresher(&self, shutdown: CancellationToken) -> Result<(), IdentityError> {
413413+ let _guard = self
414414+ .refresher
415415+ .try_lock()
416416+ .expect("there to only be one refresher running");
417417+ loop {
418418+ if shutdown.is_cancelled() {
419419+ log::info!("identity refresher: exiting for shutdown: closing cache...");
420420+ if let Err(e) = self.cache.close().await {
421421+ log::error!("cache close errored: {e}");
422422+ } else {
423423+ log::info!("identity cache closed.")
424424+ }
425425+ return Ok(());
426426+ }
427427+ let Some(task_key) = self.peek_refresh().await else {
428428+ tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
429429+ continue;
430430+ };
431431+ match task_key {
432432+ IdentityKey::Handle(ref handle) => {
433433+ log::trace!("refreshing handle {handle:?}");
434434+ match self.handle_resolver.resolve(handle).await {
435435+ Ok(did) => {
436436+ self.cache.insert(
437437+ task_key.clone(),
438438+ IdentityVal(UtcDateTime::now(), IdentityData::Did(did)),
439439+ );
440440+ }
441441+ Err(atrium_identity::Error::NotFound) => {
442442+ self.cache.insert(
443443+ task_key.clone(),
444444+ IdentityVal(UtcDateTime::now(), IdentityData::NotFound),
445445+ );
446446+ }
447447+ Err(err) => {
448448+ log::warn!(
449449+ "failed to refresh handle: {err:?}. leaving stale (should we eventually do something?)"
450450+ );
451451+ }
452452+ }
453453+ self.complete_refresh(&task_key).await?; // failures are bugs, so break loop
454454+ }
455455+ IdentityKey::Did(ref did) => {
456456+ log::trace!("refreshing did doc: {did:?}");
457457+458458+ match self.did_resolver.resolve(did).await {
459459+ Ok(did_doc) => {
460460+ // TODO: fix in atrium: should verify id is did
461461+ if did_doc.id != did.to_string() {
462462+ log::warn!(
463463+ "refreshed did doc failed: wrong did doc id. dropping refresh."
464464+ );
465465+ continue;
466466+ }
467467+ let mini_doc = match did_doc.try_into() {
468468+ Ok(md) => md,
469469+ Err(e) => {
470470+ log::warn!(
471471+ "converting mini doc failed: {e:?}. dropping refresh."
472472+ );
473473+ continue;
474474+ }
475475+ };
476476+ self.cache.insert(
477477+ task_key.clone(),
478478+ IdentityVal(UtcDateTime::now(), IdentityData::Doc(mini_doc)),
479479+ );
480480+ }
481481+ Err(atrium_identity::Error::NotFound) => {
482482+ self.cache.insert(
483483+ task_key.clone(),
484484+ IdentityVal(UtcDateTime::now(), IdentityData::NotFound),
485485+ );
486486+ }
487487+ Err(err) => {
488488+ log::warn!(
489489+ "failed to refresh did doc: {err:?}. leaving stale (should we eventually do something?)"
490490+ );
491491+ }
492492+ }
493493+494494+ self.complete_refresh(&task_key).await?; // failures are bugs, so break loop
495495+ }
496496+ }
497497+ }
498498+ }
499499+}
500500+501501+pub struct HickoryDnsTxtResolver(TokioResolver);
502502+503503+impl HickoryDnsTxtResolver {
504504+ fn new() -> Result<Self, ResolveError> {
505505+ Ok(Self(TokioResolver::builder_tokio()?.build()))
506506+ }
507507+}
508508+509509+impl DnsTxtResolver for HickoryDnsTxtResolver {
510510+ async fn resolve(
511511+ &self,
512512+ query: &str,
513513+ ) -> core::result::Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
514514+ match self.0.txt_lookup(query).await {
515515+ Ok(r) => {
516516+ metrics::counter!("whoami_resolve_dns_txt", "success" => "true").increment(1);
517517+ Ok(r.iter().map(|r| r.to_string()).collect())
518518+ }
519519+ Err(e) => {
520520+ metrics::counter!("whoami_resolve_dns_txt", "success" => "false").increment(1);
521521+ Err(e.into())
522522+ }
523523+ }
524524+ }
525525+}
+14
slingshot/src/lib.rs
···11+mod consumer;
22+pub mod error;
33+mod firehose_cache;
44+mod healthcheck;
55+mod identity;
66+mod record;
77+mod server;
88+99+pub use consumer::consume;
1010+pub use firehose_cache::firehose_cache;
1111+pub use healthcheck::healthcheck;
1212+pub use identity::Identity;
1313+pub use record::{CachedRecord, ErrorResponseObject, Repo};
1414+pub use server::serve;
+194
slingshot/src/main.rs
···11+// use foyer::HybridCache;
22+// use foyer::{Engine, DirectFsDeviceOptions, HybridCacheBuilder};
33+use metrics_exporter_prometheus::PrometheusBuilder;
44+use slingshot::{
55+ Identity, Repo, consume, error::MainTaskError, firehose_cache, healthcheck, serve,
66+};
77+use std::path::PathBuf;
88+99+use clap::Parser;
1010+use tokio_util::sync::CancellationToken;
1111+1212+/// Slingshot record edge cache
1313+#[derive(Parser, Debug, Clone)]
1414+#[command(version, about, long_about = None)]
1515+struct Args {
1616+ /// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:
1717+ /// 'us-east-1', 'us-east-2', 'us-west-1', or 'us-west-2'
1818+ #[arg(long)]
1919+ jetstream: String,
2020+ /// don't request zstd-compressed jetstream events
2121+ ///
2222+ /// reduces CPU at the expense of more ingress bandwidth
2323+ #[arg(long, action)]
2424+ jetstream_no_zstd: bool,
2525+ /// where to keep disk caches
2626+ #[arg(long)]
2727+ cache_dir: PathBuf,
2828+ /// the domain pointing to this server
2929+ ///
3030+ /// if present:
3131+ /// - a did:web document will be served at /.well-known/did.json
3232+ /// - an HTTPS certs will be automatically configured with Acme/letsencrypt
3333+ /// - TODO: a rate-limiter will be installed
3434+ #[arg(long)]
3535+ domain: Option<String>,
3636+ /// email address for letsencrypt contact
3737+ ///
3838+ /// recommended in production, i guess?
3939+ #[arg(long)]
4040+ acme_contact: Option<String>,
4141+ /// a location to cache acme https certs
4242+ ///
4343+ /// only used if --host is specified. omitting requires re-requesting certs
4444+ /// on every restart, and letsencrypt has rate limits that are easy to hit.
4545+ ///
4646+ /// recommended in production, but mind the file permissions.
4747+ #[arg(long)]
4848+ certs: Option<PathBuf>,
4949+ /// an web address to send healtcheck pings to every ~51s or so
5050+ #[arg(long)]
5151+ healthcheck: Option<String>,
5252+}
5353+5454+#[tokio::main]
5555+async fn main() -> Result<(), String> {
5656+ tracing_subscriber::fmt::init();
5757+5858+ let shutdown = CancellationToken::new();
5959+6060+ let ctrlc_shutdown = shutdown.clone();
6161+ ctrlc::set_handler(move || ctrlc_shutdown.cancel()).expect("failed to set ctrl-c handler");
6262+6363+ let args = Args::parse();
6464+6565+ if let Err(e) = install_metrics_server() {
6666+ log::error!("failed to install metrics server: {e:?}");
6767+ } else {
6868+ log::info!("metrics listening at http://0.0.0.0:8765");
6969+ }
7070+7171+ std::fs::create_dir_all(&args.cache_dir).map_err(|e| {
7272+ format!(
7373+ "failed to ensure cache parent dir: {e:?} (dir: {:?})",
7474+ args.cache_dir
7575+ )
7676+ })?;
7777+ let cache_dir = args.cache_dir.canonicalize().map_err(|e| {
7878+ format!(
7979+ "failed to canonicalize cache_dir: {e:?} (dir: {:?})",
8080+ args.cache_dir
8181+ )
8282+ })?;
8383+ log::info!("cache dir ready at at {cache_dir:?}.");
8484+8585+ log::info!("setting up firehose cache...");
8686+ let cache = firehose_cache(cache_dir.join("./firehose")).await?;
8787+ log::info!("firehose cache ready.");
8888+8989+ let mut tasks: tokio::task::JoinSet<Result<(), MainTaskError>> = tokio::task::JoinSet::new();
9090+9191+ log::info!("starting identity service...");
9292+ let identity = Identity::new(cache_dir.join("./identity"))
9393+ .await
9494+ .map_err(|e| format!("identity setup failed: {e:?}"))?;
9595+ log::info!("identity service ready.");
9696+ let identity_refresher = identity.clone();
9797+ let identity_shutdown = shutdown.clone();
9898+ tasks.spawn(async move {
9999+ identity_refresher.run_refresher(identity_shutdown).await?;
100100+ Ok(())
101101+ });
102102+103103+ let repo = Repo::new(identity.clone());
104104+105105+ let server_shutdown = shutdown.clone();
106106+ let server_cache_handle = cache.clone();
107107+ tasks.spawn(async move {
108108+ serve(
109109+ server_cache_handle,
110110+ identity,
111111+ repo,
112112+ args.domain,
113113+ args.acme_contact,
114114+ args.certs,
115115+ server_shutdown,
116116+ )
117117+ .await?;
118118+ Ok(())
119119+ });
120120+121121+ let consumer_shutdown = shutdown.clone();
122122+ let consumer_cache = cache.clone();
123123+ tasks.spawn(async move {
124124+ consume(
125125+ args.jetstream,
126126+ None,
127127+ args.jetstream_no_zstd,
128128+ consumer_shutdown,
129129+ consumer_cache,
130130+ )
131131+ .await?;
132132+ Ok(())
133133+ });
134134+135135+ if let Some(hc) = args.healthcheck {
136136+ let healthcheck_shutdown = shutdown.clone();
137137+ tasks.spawn(async move {
138138+ healthcheck(hc, healthcheck_shutdown).await?;
139139+ Ok(())
140140+ });
141141+ }
142142+143143+ tokio::select! {
144144+ _ = shutdown.cancelled() => log::warn!("shutdown requested"),
145145+ Some(r) = tasks.join_next() => {
146146+ log::warn!("a task exited, shutting down: {r:?}");
147147+ shutdown.cancel();
148148+ }
149149+ }
150150+151151+ tasks.spawn(async move {
152152+ cache
153153+ .close()
154154+ .await
155155+ .map_err(MainTaskError::FirehoseCacheCloseError)
156156+ });
157157+158158+ tokio::select! {
159159+ _ = async {
160160+ while let Some(completed) = tasks.join_next().await {
161161+ log::info!("shutdown: task completed: {completed:?}");
162162+ }
163163+ } => {},
164164+ _ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
165165+ log::info!("shutdown: not all tasks completed on time. aborting...");
166166+ tasks.shutdown().await;
167167+ },
168168+ }
169169+170170+ log::info!("bye!");
171171+172172+ Ok(())
173173+}
174174+175175+fn install_metrics_server() -> Result<(), metrics_exporter_prometheus::BuildError> {
176176+ log::info!("installing metrics server...");
177177+ let host = [0, 0, 0, 0];
178178+ let port = 8765;
179179+ PrometheusBuilder::new()
180180+ .set_quantiles(&[0.5, 0.9, 0.99, 1.0])?
181181+ .set_bucket_duration(std::time::Duration::from_secs(300))?
182182+ .set_bucket_count(std::num::NonZero::new(12).unwrap()) // count * duration = 60 mins. stuff doesn't happen that fast here.
183183+ .set_enable_unit_suffix(false) // this seemed buggy for constellation (sometimes wouldn't engage)
184184+ .with_http_listener((host, port))
185185+ .install()?;
186186+ log::info!(
187187+ "metrics server installed! listening on http://{}.{}.{}.{}:{port}",
188188+ host[0],
189189+ host[1],
190190+ host[2],
191191+ host[3]
192192+ );
193193+ Ok(())
194194+}
+155
slingshot/src/record.rs
···11+//! cached record storage
22+33+use crate::{Identity, error::RecordError};
44+use atrium_api::types::string::{Cid, Did, Nsid, RecordKey};
55+use reqwest::{Client, StatusCode};
66+use serde::{Deserialize, Serialize};
77+use serde_json::value::RawValue;
88+use std::str::FromStr;
99+use std::time::Duration;
1010+use url::Url;
1111+1212+#[derive(Debug, Serialize, Deserialize)]
1313+pub struct RawRecord {
1414+ cid: Cid,
1515+ record: String,
1616+}
1717+1818+// TODO: should be able to do typed CID
1919+impl From<(Cid, Box<RawValue>)> for RawRecord {
2020+ fn from((cid, rv): (Cid, Box<RawValue>)) -> Self {
2121+ Self {
2222+ cid,
2323+ record: rv.get().to_string(),
2424+ }
2525+ }
2626+}
2727+2828+/// only for use with stored (validated) values, not general strings
2929+impl From<&RawRecord> for (Cid, Box<RawValue>) {
3030+ fn from(RawRecord { cid, record }: &RawRecord) -> Self {
3131+ (
3232+ cid.clone(),
3333+ RawValue::from_string(record.to_string())
3434+ .expect("stored string from RawValue to be valid"),
3535+ )
3636+ }
3737+}
3838+3939+#[derive(Debug, Serialize, Deserialize)]
4040+pub enum CachedRecord {
4141+ Found(RawRecord),
4242+ Deleted,
4343+}
4444+4545+//////// upstream record fetching
4646+4747+#[derive(Deserialize)]
4848+struct RecordResponseObject {
4949+ #[allow(dead_code)] // expect it to be there but we ignore it
5050+ uri: String,
5151+ /// CID for this exact version of the record
5252+ ///
5353+ /// this is optional in the spec and that's potentially TODO for slingshot
5454+ cid: Option<String>,
5555+ /// the record itself as JSON
5656+ value: Box<RawValue>,
5757+}
5858+5959+#[derive(Debug, Deserialize)]
6060+pub struct ErrorResponseObject {
6161+ pub error: String,
6262+ pub message: String,
6363+}
6464+6565+#[derive(Clone)]
6666+pub struct Repo {
6767+ identity: Identity,
6868+ client: Client,
6969+}
7070+7171+impl Repo {
7272+ pub fn new(identity: Identity) -> Self {
7373+ let client = Client::builder()
7474+ .user_agent(format!(
7575+ "microcosm slingshot v{} (dev: @bad-example.com)",
7676+ env!("CARGO_PKG_VERSION")
7777+ ))
7878+ .no_proxy()
7979+ .timeout(Duration::from_secs(10))
8080+ .build()
8181+ .unwrap();
8282+ Repo { identity, client }
8383+ }
8484+8585+ pub async fn get_record(
8686+ &self,
8787+ did: &Did,
8888+ collection: &Nsid,
8989+ rkey: &RecordKey,
9090+ cid: &Option<Cid>,
9191+ ) -> Result<CachedRecord, RecordError> {
9292+ let Some(pds) = self.identity.did_to_pds(did.clone()).await? else {
9393+ return Err(RecordError::NotFound("could not get pds for DID"));
9494+ };
9595+9696+ // cid gets set to None for a retry, if it's Some and we got NotFound
9797+ let mut cid = cid;
9898+9999+ let res = loop {
100100+ // TODO: throttle outgoing requests by host probably, generally guard against outgoing requests
101101+ let mut params = vec![
102102+ ("repo", did.to_string()),
103103+ ("collection", collection.to_string()),
104104+ ("rkey", rkey.to_string()),
105105+ ];
106106+ if let Some(cid) = cid {
107107+ params.push(("cid", cid.as_ref().to_string()));
108108+ }
109109+ let mut url = Url::parse_with_params(&pds, ¶ms)?;
110110+ url.set_path("/xrpc/com.atproto.repo.getRecord");
111111+112112+ let res = self
113113+ .client
114114+ .get(url.clone())
115115+ .send()
116116+ .await
117117+ .map_err(RecordError::SendError)?;
118118+119119+ if res.status() == StatusCode::BAD_REQUEST {
120120+ // 1. if we're not able to parse json, it's not something we can handle
121121+ let err = res
122122+ .json::<ErrorResponseObject>()
123123+ .await
124124+ .map_err(RecordError::UpstreamBadBadNotGoodRequest)?;
125125+ // 2. if we are, is it a NotFound? and if so, did we try with a CID?
126126+ // if so, retry with no CID (api handler will reject for mismatch but
127127+ // with a nice error + warm cache)
128128+ if err.error == "NotFound" && cid.is_some() {
129129+ cid = &None;
130130+ continue;
131131+ } else {
132132+ return Err(RecordError::UpstreamBadRequest(err));
133133+ }
134134+ }
135135+ break res;
136136+ };
137137+138138+ let data = res
139139+ .error_for_status()
140140+ .map_err(RecordError::StatusError)? // TODO atproto error handling (think about handling not found)
141141+ .json::<RecordResponseObject>()
142142+ .await
143143+ .map_err(RecordError::ParseJsonError)?; // todo...
144144+145145+ let Some(cid) = data.cid else {
146146+ return Err(RecordError::MissingUpstreamCid);
147147+ };
148148+ let cid = Cid::from_str(&cid).map_err(|e| RecordError::BadUpstreamCid(e.to_string()))?;
149149+150150+ Ok(CachedRecord::Found(RawRecord {
151151+ cid,
152152+ record: data.value.to_string(),
153153+ }))
154154+ }
155155+}
+778
slingshot/src/server.rs
···11+use crate::{
22+ CachedRecord, ErrorResponseObject, Identity, Repo,
33+ error::{RecordError, ServerError},
44+};
55+use atrium_api::types::string::{Cid, Did, Handle, Nsid, RecordKey};
66+use foyer::HybridCache;
77+use links::at_uri::parse_at_uri as normalize_at_uri;
88+use serde::Serialize;
99+use std::path::PathBuf;
1010+use std::str::FromStr;
1111+use std::sync::Arc;
1212+use tokio_util::sync::CancellationToken;
1313+1414+use poem::{
1515+ Endpoint, EndpointExt, Route, Server,
1616+ endpoint::{StaticFileEndpoint, make_sync},
1717+ http::Method,
1818+ listener::{
1919+ Listener, TcpListener,
2020+ acme::{AutoCert, LETS_ENCRYPT_PRODUCTION},
2121+ },
2222+ middleware::{CatchPanic, Cors, Tracing},
2323+};
2424+use poem_openapi::{
2525+ ApiResponse, ContactObject, ExternalDocumentObject, Object, OpenApi, OpenApiService, Tags,
2626+ param::Query, payload::Json, types::Example,
2727+};
2828+2929+fn example_handle() -> String {
3030+ "bad-example.com".to_string()
3131+}
3232+fn example_did() -> String {
3333+ "did:plc:hdhoaan3xa3jiuq4fg4mefid".to_string()
3434+}
3535+fn example_collection() -> String {
3636+ "app.bsky.feed.like".to_string()
3737+}
3838+fn example_rkey() -> String {
3939+ "3lv4ouczo2b2a".to_string()
4040+}
4141+fn example_uri() -> String {
4242+ format!(
4343+ "at://{}/{}/{}",
4444+ example_did(),
4545+ example_collection(),
4646+ example_rkey()
4747+ )
4848+}
4949+fn example_pds() -> String {
5050+ "https://porcini.us-east.host.bsky.network".to_string()
5151+}
5252+fn example_signing_key() -> String {
5353+ "zQ3shpq1g134o7HGDb86CtQFxnHqzx5pZWknrVX2Waum3fF6j".to_string()
5454+}
5555+5656+#[derive(Object)]
5757+#[oai(example = true)]
5858+struct XrpcErrorResponseObject {
5959+ /// Should correspond an error `name` in the lexicon errors array
6060+ error: String,
6161+ /// Human-readable description and possibly additonal context
6262+ message: String,
6363+}
6464+impl Example for XrpcErrorResponseObject {
6565+ fn example() -> Self {
6666+ Self {
6767+ error: "RecordNotFound".to_string(),
6868+ message: "This record was deleted".to_string(),
6969+ }
7070+ }
7171+}
7272+type XrpcError = Json<XrpcErrorResponseObject>;
7373+fn xrpc_error(error: impl AsRef<str>, message: impl AsRef<str>) -> XrpcError {
7474+ Json(XrpcErrorResponseObject {
7575+ error: error.as_ref().to_string(),
7676+ message: message.as_ref().to_string(),
7777+ })
7878+}
7979+8080+fn bad_request_handler_get_record(err: poem::Error) -> GetRecordResponse {
8181+ GetRecordResponse::BadRequest(Json(XrpcErrorResponseObject {
8282+ error: "InvalidRequest".to_string(),
8383+ message: format!("Bad request, here's some info that maybe should not be exposed: {err}"),
8484+ }))
8585+}
8686+8787+fn bad_request_handler_resolve_mini(err: poem::Error) -> ResolveMiniIDResponse {
8888+ ResolveMiniIDResponse::BadRequest(Json(XrpcErrorResponseObject {
8989+ error: "InvalidRequest".to_string(),
9090+ message: format!("Bad request, here's some info that maybe should not be exposed: {err}"),
9191+ }))
9292+}
9393+9494+fn bad_request_handler_resolve_handle(err: poem::Error) -> JustDidResponse {
9595+ JustDidResponse::BadRequest(Json(XrpcErrorResponseObject {
9696+ error: "InvalidRequest".to_string(),
9797+ message: format!("Bad request, here's some info that maybe should not be exposed: {err}"),
9898+ }))
9999+}
100100+101101+#[derive(Object)]
102102+#[oai(example = true)]
103103+struct FoundRecordResponseObject {
104104+ /// at-uri for this record
105105+ uri: String,
106106+ /// CID for this exact version of the record
107107+ ///
108108+ /// Slingshot will always return the CID, despite it not being a required
109109+ /// response property in the official lexicon.
110110+ ///
111111+ /// TODO: probably actually let it be optional, idk are some pds's weirdly
112112+ /// not returning it?
113113+ cid: Option<String>,
114114+ /// the record itself as JSON
115115+ value: serde_json::Value,
116116+}
117117+impl Example for FoundRecordResponseObject {
118118+ fn example() -> Self {
119119+ Self {
120120+ uri: example_uri(),
121121+ cid: Some("bafyreialv3mzvvxaoyrfrwoer3xmabbmdchvrbyhayd7bga47qjbycy74e".to_string()),
122122+ value: serde_json::json!({
123123+ "$type": "app.bsky.feed.like",
124124+ "createdAt": "2025-07-29T18:02:02.327Z",
125125+ "subject": {
126126+ "cid": "bafyreia2gy6eyk5qfetgahvshpq35vtbwy6negpy3gnuulcdi723mi7vxy",
127127+ "uri": "at://did:plc:vwzwgnygau7ed7b7wt5ux7y2/app.bsky.feed.post/3lv4lkb4vgs2k"
128128+ }
129129+ }),
130130+ }
131131+ }
132132+}
133133+134134+#[derive(ApiResponse)]
135135+#[oai(bad_request_handler = "bad_request_handler_get_record")]
136136+enum GetRecordResponse {
137137+ /// Record found
138138+ #[oai(status = 200)]
139139+ Ok(Json<FoundRecordResponseObject>),
140140+ /// Bad request or no record to return
141141+ ///
142142+ /// The only error name in the repo.getRecord lexicon is `RecordNotFound`,
143143+ /// but the [canonical api docs](https://docs.bsky.app/docs/api/com-atproto-repo-get-record)
144144+ /// also list `InvalidRequest`, `ExpiredToken`, and `InvalidToken`. Of
145145+ /// these, slingshot will only generate `RecordNotFound` or `InvalidRequest`,
146146+ /// but may return any proxied error code from the upstream repo.
147147+ #[oai(status = 400)]
148148+ BadRequest(XrpcError),
149149+ /// Server errors
150150+ #[oai(status = 500)]
151151+ ServerError(XrpcError),
152152+}
153153+154154+#[derive(Object)]
155155+#[oai(example = true)]
156156+struct MiniDocResponseObject {
157157+ /// DID, bi-directionally verified if a handle was provided in the query.
158158+ did: String,
159159+ /// The validated handle of the account or `handle.invalid` if the handle
160160+ /// did not bi-directionally match the DID document.
161161+ handle: String,
162162+ /// The identity's PDS URL
163163+ pds: String,
164164+ /// The atproto signing key publicKeyMultibase
165165+ ///
166166+ /// Legacy key encoding not supported. the key is returned directly; `id`,
167167+ /// `type`, and `controller` are omitted.
168168+ signing_key: String,
169169+}
170170+impl Example for MiniDocResponseObject {
171171+ fn example() -> Self {
172172+ Self {
173173+ did: example_did(),
174174+ handle: example_handle(),
175175+ pds: example_pds(),
176176+ signing_key: example_signing_key(),
177177+ }
178178+ }
179179+}
180180+181181+#[derive(ApiResponse)]
182182+#[oai(bad_request_handler = "bad_request_handler_resolve_mini")]
183183+enum ResolveMiniIDResponse {
184184+ /// Identity resolved
185185+ #[oai(status = 200)]
186186+ Ok(Json<MiniDocResponseObject>),
187187+ /// Bad request or identity not resolved
188188+ #[oai(status = 400)]
189189+ BadRequest(XrpcError),
190190+}
191191+192192+#[derive(Object)]
193193+#[oai(example = true)]
194194+struct FoundDidResponseObject {
195195+ /// the DID, bi-directionally verified if using Slingshot
196196+ did: String,
197197+}
198198+impl Example for FoundDidResponseObject {
199199+ fn example() -> Self {
200200+ Self { did: example_did() }
201201+ }
202202+}
203203+204204+#[derive(ApiResponse)]
205205+#[oai(bad_request_handler = "bad_request_handler_resolve_handle")]
206206+enum JustDidResponse {
207207+ /// Resolution succeeded
208208+ #[oai(status = 200)]
209209+ Ok(Json<FoundDidResponseObject>),
210210+ /// Bad request, failed to resolve, or failed to verify
211211+ ///
212212+ /// `error` will be one of `InvalidRequest`, `HandleNotFound`.
213213+ #[oai(status = 400)]
214214+ BadRequest(XrpcError),
215215+ /// Something went wrong trying to complete the request
216216+ #[oai(status = 500)]
217217+ ServerError(XrpcError),
218218+}
219219+220220+struct Xrpc {
221221+ cache: HybridCache<String, CachedRecord>,
222222+ identity: Identity,
223223+ repo: Arc<Repo>,
224224+}
225225+226226+#[derive(Tags)]
227227+enum ApiTags {
228228+ /// Core ATProtocol-compatible APIs.
229229+ ///
230230+ /// > [!tip]
231231+ /// > Upstream documentation is available at
232232+ /// > https://docs.bsky.app/docs/category/http-reference
233233+ ///
234234+ /// These queries are usually executed directly against the PDS containing
235235+ /// the data being requested. Slingshot offers a caching view of the same
236236+ /// contents with better expected performance and reliability.
237237+ #[oai(rename = "com.atproto.* queries")]
238238+ ComAtproto,
239239+ /// Additional and improved APIs.
240240+ ///
241241+ /// These APIs offer small tweaks to the core ATProtocol APIs, with more
242242+ /// more convenient [request parameters](#tag/slingshot-specific-queries/GET/xrpc/com.bad-example.repo.getUriRecord)
243243+ /// or [response formats](#tag/slingshot-specific-queries/GET/xrpc/com.bad-example.identity.resolveMiniDoc).
244244+ ///
245245+ /// > [!important]
246246+ /// > At the moment, these are namespaced under the `com.bad-example.*` NSID
247247+ /// > prefix, but as they stabilize they may be migrated to an org namespace
248248+ /// > like `blue.microcosm.*`. Support for asliasing to `com.bad-example.*`
249249+ /// > will be maintained as long as it's in use.
250250+ #[oai(rename = "slingshot-specific queries")]
251251+ Custom,
252252+}
253253+254254+#[OpenApi]
255255+impl Xrpc {
256256+ /// com.atproto.repo.getRecord
257257+ ///
258258+ /// Get a single record from a repository. Does not require auth.
259259+ ///
260260+ /// > [!tip]
261261+ /// > See also the [canonical `com.atproto` XRPC documentation](https://docs.bsky.app/docs/api/com-atproto-repo-get-record)
262262+ /// > that this endpoint aims to be compatible with.
263263+ #[oai(
264264+ path = "/com.atproto.repo.getRecord",
265265+ method = "get",
266266+ tag = "ApiTags::ComAtproto"
267267+ )]
268268+ async fn get_record(
269269+ &self,
270270+ /// The DID or handle of the repo
271271+ #[oai(example = "example_did")]
272272+ Query(repo): Query<String>,
273273+ /// The NSID of the record collection
274274+ #[oai(example = "example_collection")]
275275+ Query(collection): Query<String>,
276276+ /// The Record key
277277+ #[oai(example = "example_rkey")]
278278+ Query(rkey): Query<String>,
279279+ /// Optional: the CID of the version of the record.
280280+ ///
281281+ /// If not specified, then return the most recent version.
282282+ ///
283283+ /// If a stale `CID` is specified and a newer version of the record
284284+ /// exists, Slingshot returns a `NotFound` error. That is: Slingshot
285285+ /// only retains the most recent version of a record.
286286+ Query(cid): Query<Option<String>>,
287287+ ) -> GetRecordResponse {
288288+ self.get_record_impl(repo, collection, rkey, cid).await
289289+ }
290290+291291+ /// com.bad-example.repo.getUriRecord
292292+ ///
293293+ /// Ergonomic complement to [`com.atproto.repo.getRecord`](https://docs.bsky.app/docs/api/com-atproto-repo-get-record)
294294+ /// which accepts an `at-uri` instead of individual repo/collection/rkey params
295295+ #[oai(
296296+ path = "/com.bad-example.repo.getUriRecord",
297297+ method = "get",
298298+ tag = "ApiTags::Custom"
299299+ )]
300300+ async fn get_uri_record(
301301+ &self,
302302+ /// The at-uri of the record
303303+ ///
304304+ /// The identifier can be a DID or an atproto handle, and the collection
305305+ /// and rkey segments must be present.
306306+ #[oai(example = "example_uri")]
307307+ Query(at_uri): Query<String>,
308308+ /// Optional: the CID of the version of the record.
309309+ ///
310310+ /// If not specified, then return the most recent version.
311311+ ///
312312+ /// > [!tip]
313313+ /// > If specified and a newer version of the record exists, returns 404 not
314314+ /// > found. That is: slingshot only retains the most recent version of a
315315+ /// > record.
316316+ Query(cid): Query<Option<String>>,
317317+ ) -> GetRecordResponse {
318318+ let bad_at_uri = || {
319319+ GetRecordResponse::BadRequest(xrpc_error(
320320+ "InvalidRequest",
321321+ "at-uri does not appear to be valid",
322322+ ))
323323+ };
324324+325325+ let Some(normalized) = normalize_at_uri(&at_uri) else {
326326+ return bad_at_uri();
327327+ };
328328+329329+ // TODO: move this to links
330330+ let Some(rest) = normalized.strip_prefix("at://") else {
331331+ return bad_at_uri();
332332+ };
333333+ let Some((repo, rest)) = rest.split_once('/') else {
334334+ return bad_at_uri();
335335+ };
336336+ let Some((collection, rest)) = rest.split_once('/') else {
337337+ return bad_at_uri();
338338+ };
339339+ let rkey = if let Some((rkey, _rest)) = rest.split_once('?') {
340340+ rkey
341341+ } else {
342342+ rest
343343+ };
344344+345345+ self.get_record_impl(
346346+ repo.to_string(),
347347+ collection.to_string(),
348348+ rkey.to_string(),
349349+ cid,
350350+ )
351351+ .await
352352+ }
353353+354354+ /// com.atproto.identity.resolveHandle
355355+ ///
356356+ /// Resolves an atproto [`handle`](https://atproto.com/guides/glossary#handle)
357357+ /// (hostname) to a [`DID`](https://atproto.com/guides/glossary#did-decentralized-id).
358358+ ///
359359+ /// > [!tip]
360360+ /// > Compatibility note: Slingshot will **always bi-directionally verify
361361+ /// > against the DID document**, which is optional according to the
362362+ /// > authoritative lexicon.
363363+ ///
364364+ /// > [!tip]
365365+ /// > See the [canonical `com.atproto` XRPC documentation](https://docs.bsky.app/docs/api/com-atproto-identity-resolve-handle)
366366+ /// > that this endpoint aims to be compatible with.
367367+ #[oai(
368368+ path = "/com.atproto.identity.resolveHandle",
369369+ method = "get",
370370+ tag = "ApiTags::ComAtproto"
371371+ )]
372372+ async fn resolve_handle(
373373+ &self,
374374+ /// The handle to resolve.
375375+ #[oai(example = "example_handle")]
376376+ Query(handle): Query<String>,
377377+ ) -> JustDidResponse {
378378+ let Ok(handle) = Handle::new(handle) else {
379379+ return JustDidResponse::BadRequest(xrpc_error("InvalidRequest", "not a valid handle"));
380380+ };
381381+382382+ let Ok(alleged_did) = self.identity.handle_to_did(handle.clone()).await else {
383383+ return JustDidResponse::ServerError(xrpc_error("Failed", "Could not resolve handle"));
384384+ };
385385+386386+ let Some(alleged_did) = alleged_did else {
387387+ return JustDidResponse::BadRequest(xrpc_error(
388388+ "HandleNotFound",
389389+ "Could not resolve handle to a DID",
390390+ ));
391391+ };
392392+393393+ let Ok(partial_doc) = self.identity.did_to_partial_mini_doc(&alleged_did).await else {
394394+ return JustDidResponse::ServerError(xrpc_error("Failed", "Could not fetch DID doc"));
395395+ };
396396+397397+ let Some(partial_doc) = partial_doc else {
398398+ return JustDidResponse::BadRequest(xrpc_error(
399399+ "HandleNotFound",
400400+ "Resolved handle but could not find DID doc for the DID",
401401+ ));
402402+ };
403403+404404+ if partial_doc.unverified_handle != handle {
405405+ return JustDidResponse::BadRequest(xrpc_error(
406406+ "HandleNotFound",
407407+ "Resolved handle failed bi-directional validation",
408408+ ));
409409+ }
410410+411411+ JustDidResponse::Ok(Json(FoundDidResponseObject {
412412+ did: alleged_did.to_string(),
413413+ }))
414414+ }
415415+416416+ /// com.bad-example.identity.resolveMiniDoc
417417+ ///
418418+ /// Like [com.atproto.identity.resolveIdentity](https://docs.bsky.app/docs/api/com-atproto-identity-resolve-identity)
419419+ /// but instead of the full `didDoc` it returns an atproto-relevant subset.
420420+ #[oai(
421421+ path = "/com.bad-example.identity.resolveMiniDoc",
422422+ method = "get",
423423+ tag = "ApiTags::Custom"
424424+ )]
425425+ async fn resolve_mini_id(
426426+ &self,
427427+ /// Handle or DID to resolve
428428+ #[oai(example = "example_handle")]
429429+ Query(identifier): Query<String>,
430430+ ) -> ResolveMiniIDResponse {
431431+ let invalid = |reason: &'static str| {
432432+ ResolveMiniIDResponse::BadRequest(xrpc_error("InvalidRequest", reason))
433433+ };
434434+435435+ let mut unverified_handle = None;
436436+ let did = match Did::new(identifier.clone()) {
437437+ Ok(did) => did,
438438+ Err(_) => {
439439+ let Ok(alleged_handle) = Handle::new(identifier) else {
440440+ return invalid("identifier was not a valid DID or handle");
441441+ };
442442+443443+ match self.identity.handle_to_did(alleged_handle.clone()).await {
444444+ Ok(res) => {
445445+ if let Some(did) = res {
446446+ // we did it joe
447447+ unverified_handle = Some(alleged_handle);
448448+ did
449449+ } else {
450450+ return invalid("Could not resolve handle identifier to a DID");
451451+ }
452452+ }
453453+ Err(e) => {
454454+ log::debug!("failed to resolve handle: {e}");
455455+ // TODO: ServerError not BadRequest
456456+ return invalid("errored while trying to resolve handle to DID");
457457+ }
458458+ }
459459+ }
460460+ };
461461+ let Ok(partial_doc) = self.identity.did_to_partial_mini_doc(&did).await else {
462462+ return invalid("failed to get DID doc");
463463+ };
464464+ let Some(partial_doc) = partial_doc else {
465465+ return invalid("failed to find DID doc");
466466+ };
467467+468468+ // ok so here's where we're at:
469469+ // ✅ we have a DID
470470+ // ✅ we have a partial doc
471471+ // 🔶 if we have a handle, it's from the `identifier` (user-input)
472472+ // -> then we just need to compare to the partial doc to confirm
473473+ // -> else we need to resolve the DID doc's to a handle and check
474474+ let handle = if let Some(h) = unverified_handle {
475475+ if h == partial_doc.unverified_handle {
476476+ h.to_string()
477477+ } else {
478478+ "handle.invalid".to_string()
479479+ }
480480+ } else {
481481+ let Ok(handle_did) = self
482482+ .identity
483483+ .handle_to_did(partial_doc.unverified_handle.clone())
484484+ .await
485485+ else {
486486+ return invalid("failed to get did doc's handle");
487487+ };
488488+ let Some(handle_did) = handle_did else {
489489+ return invalid("failed to resolve did doc's handle");
490490+ };
491491+ if handle_did == did {
492492+ partial_doc.unverified_handle.to_string()
493493+ } else {
494494+ "handle.invalid".to_string()
495495+ }
496496+ };
497497+498498+ ResolveMiniIDResponse::Ok(Json(MiniDocResponseObject {
499499+ did: did.to_string(),
500500+ handle,
501501+ pds: partial_doc.pds,
502502+ signing_key: partial_doc.signing_key,
503503+ }))
504504+ }
505505+506506+ async fn get_record_impl(
507507+ &self,
508508+ repo: String,
509509+ collection: String,
510510+ rkey: String,
511511+ cid: Option<String>,
512512+ ) -> GetRecordResponse {
513513+ let did = match Did::new(repo.clone()) {
514514+ Ok(did) => did,
515515+ Err(_) => {
516516+ let Ok(handle) = Handle::new(repo) else {
517517+ return GetRecordResponse::BadRequest(xrpc_error(
518518+ "InvalidRequest",
519519+ "repo was not a valid DID or handle",
520520+ ));
521521+ };
522522+ match self.identity.handle_to_did(handle).await {
523523+ Ok(res) => {
524524+ if let Some(did) = res {
525525+ did
526526+ } else {
527527+ return GetRecordResponse::BadRequest(xrpc_error(
528528+ "InvalidRequest",
529529+ "Could not resolve handle repo to a DID",
530530+ ));
531531+ }
532532+ }
533533+ Err(e) => {
534534+ log::debug!("handle resolution failed: {e}");
535535+ return GetRecordResponse::ServerError(xrpc_error(
536536+ "ResolutionFailed",
537537+ "errored while trying to resolve handle to DID",
538538+ ));
539539+ }
540540+ }
541541+ }
542542+ };
543543+544544+ let Ok(collection) = Nsid::new(collection) else {
545545+ return GetRecordResponse::BadRequest(xrpc_error(
546546+ "InvalidRequest",
547547+ "invalid NSID for collection",
548548+ ));
549549+ };
550550+551551+ let Ok(rkey) = RecordKey::new(rkey) else {
552552+ return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "invalid rkey"));
553553+ };
554554+555555+ let cid: Option<Cid> = if let Some(cid) = cid {
556556+ let Ok(cid) = Cid::from_str(&cid) else {
557557+ return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "invalid CID"));
558558+ };
559559+ Some(cid)
560560+ } else {
561561+ None
562562+ };
563563+564564+ let at_uri = format!("at://{}/{}/{}", &*did, &*collection, &*rkey);
565565+566566+ let fr = self
567567+ .cache
568568+ .fetch(at_uri.clone(), {
569569+ let cid = cid.clone();
570570+ let repo_api = self.repo.clone();
571571+ || async move {
572572+ repo_api
573573+ .get_record(&did, &collection, &rkey, &cid)
574574+ .await
575575+ .map_err(|e| foyer::Error::Other(Box::new(e)))
576576+ }
577577+ })
578578+ .await;
579579+580580+ let entry = match fr {
581581+ Ok(e) => e,
582582+ Err(foyer::Error::Other(e)) => {
583583+ let record_error = match e.downcast::<RecordError>() {
584584+ Ok(e) => e,
585585+ Err(e) => {
586586+ log::error!("error (foyer other) getting cache entry, {e:?}");
587587+ return GetRecordResponse::ServerError(xrpc_error(
588588+ "ServerError",
589589+ "sorry, something went wrong",
590590+ ));
591591+ }
592592+ };
593593+ let RecordError::UpstreamBadRequest(ErrorResponseObject { error, message }) =
594594+ *record_error
595595+ else {
596596+ log::error!("RecordError getting cache entry, {record_error:?}");
597597+ return GetRecordResponse::ServerError(xrpc_error(
598598+ "ServerError",
599599+ "sorry, something went wrong",
600600+ ));
601601+ };
602602+603603+ // all of the noise around here is so that we can ultimately reach this:
604604+ // upstream BadRequest extracted from the foyer result which we can proxy back
605605+ return GetRecordResponse::BadRequest(xrpc_error(
606606+ error,
607607+ format!("Upstream bad request: {message}"),
608608+ ));
609609+ }
610610+ Err(e) => {
611611+ log::error!("error (foyer) getting cache entry, {e:?}");
612612+ return GetRecordResponse::ServerError(xrpc_error(
613613+ "ServerError",
614614+ "sorry, something went wrong",
615615+ ));
616616+ }
617617+ };
618618+619619+ match *entry {
620620+ CachedRecord::Found(ref raw) => {
621621+ let (found_cid, raw_value) = raw.into();
622622+ if cid.clone().map(|c| c != found_cid).unwrap_or(false) {
623623+ return GetRecordResponse::BadRequest(Json(XrpcErrorResponseObject {
624624+ error: "RecordNotFound".to_string(),
625625+ message: "A record was found but its CID did not match that requested"
626626+ .to_string(),
627627+ }));
628628+ }
629629+ // TODO: thank u stellz: https://gist.github.com/stella3d/51e679e55b264adff89d00a1e58d0272
630630+ let value =
631631+ serde_json::from_str(raw_value.get()).expect("RawValue to be valid json");
632632+ GetRecordResponse::Ok(Json(FoundRecordResponseObject {
633633+ uri: at_uri,
634634+ cid: Some(found_cid.as_ref().to_string()),
635635+ value,
636636+ }))
637637+ }
638638+ CachedRecord::Deleted => GetRecordResponse::BadRequest(Json(XrpcErrorResponseObject {
639639+ error: "RecordNotFound".to_string(),
640640+ message: "This record was deleted".to_string(),
641641+ })),
642642+ }
643643+ }
644644+645645+ // TODO
646646+ // #[oai(path = "/com.atproto.identity.resolveHandle", method = "get")]
647647+ // #[oai(path = "/com.atproto.identity.resolveDid", method = "get")]
648648+ // but these are both not specified to do bidirectional validation, which is what we want to offer
649649+ // com.atproto.identity.resolveIdentity seems right, but requires returning the full did-doc
650650+ // would be nice if there were two queries:
651651+ // did -> verified handle + pds url
652652+ // handle -> verified did + pds url
653653+ //
654654+ // we could do horrible things and implement resolveIdentity with only a stripped-down fake did doc
655655+ // but this will *definitely* cause problems because eg. we're not currently storing pubkeys and
656656+ // those are a little bit important
657657+}
658658+659659+#[derive(Debug, Clone, Serialize)]
660660+#[serde(rename_all = "camelCase")]
661661+struct AppViewService {
662662+ id: String,
663663+ r#type: String,
664664+ service_endpoint: String,
665665+}
666666+#[derive(Debug, Clone, Serialize)]
667667+struct AppViewDoc {
668668+ id: String,
669669+ service: [AppViewService; 1],
670670+}
671671+/// Serve a did document for did:web for this to be an xrpc appview
672672+///
673673+/// No slingshot endpoints currently require auth, so it's not necessary to do
674674+/// service proxying, however clients may wish to:
675675+///
676676+/// - PDS proxying offers a level of client IP anonymity from slingshot
677677+/// - slingshot *may* implement more generous per-user rate-limits for proxied requests in the future
678678+fn get_did_doc(domain: &str) -> impl Endpoint + use<> {
679679+ let doc = poem::web::Json(AppViewDoc {
680680+ id: format!("did:web:{domain}"),
681681+ service: [AppViewService {
682682+ id: "#slingshot".to_string(),
683683+ r#type: "SlingshotRecordProxy".to_string(),
684684+ service_endpoint: format!("https://{domain}"),
685685+ }],
686686+ });
687687+ make_sync(move |_| doc.clone())
688688+}
689689+690690+pub async fn serve(
691691+ cache: HybridCache<String, CachedRecord>,
692692+ identity: Identity,
693693+ repo: Repo,
694694+ domain: Option<String>,
695695+ acme_contact: Option<String>,
696696+ certs: Option<PathBuf>,
697697+ shutdown: CancellationToken,
698698+) -> Result<(), ServerError> {
699699+ let repo = Arc::new(repo);
700700+ let api_service = OpenApiService::new(
701701+ Xrpc {
702702+ cache,
703703+ identity,
704704+ repo,
705705+ },
706706+ "Slingshot",
707707+ env!("CARGO_PKG_VERSION"),
708708+ )
709709+ .server(if let Some(ref h) = domain {
710710+ format!("https://{h}")
711711+ } else {
712712+ "http://localhost:3000".to_string()
713713+ })
714714+ .url_prefix("/xrpc")
715715+ .contact(
716716+ ContactObject::new()
717717+ .name("@microcosm.blue")
718718+ .url("https://bsky.app/profile/microcosm.blue"),
719719+ )
720720+ .description(include_str!("../api-description.md"))
721721+ .external_document(ExternalDocumentObject::new(
722722+ "https://microcosm.blue/slingshot",
723723+ ));
724724+725725+ let mut app = Route::new()
726726+ .at("/", StaticFileEndpoint::new("./static/index.html"))
727727+ .nest("/openapi", api_service.spec_endpoint())
728728+ .nest("/xrpc/", api_service);
729729+730730+ if let Some(domain) = domain {
731731+ rustls::crypto::aws_lc_rs::default_provider()
732732+ .install_default()
733733+ .expect("alskfjalksdjf");
734734+735735+ app = app.at("/.well-known/did.json", get_did_doc(&domain));
736736+737737+ let mut auto_cert = AutoCert::builder()
738738+ .directory_url(LETS_ENCRYPT_PRODUCTION)
739739+ .domain(&domain);
740740+ if let Some(contact) = acme_contact {
741741+ auto_cert = auto_cert.contact(contact);
742742+ }
743743+ if let Some(certs) = certs {
744744+ auto_cert = auto_cert.cache_path(certs);
745745+ }
746746+ let auto_cert = auto_cert.build().map_err(ServerError::AcmeBuildError)?;
747747+748748+ run(
749749+ TcpListener::bind("0.0.0.0:443").acme(auto_cert),
750750+ app,
751751+ shutdown,
752752+ )
753753+ .await
754754+ } else {
755755+ run(TcpListener::bind("127.0.0.1:3000"), app, shutdown).await
756756+ }
757757+}
758758+759759+async fn run<L>(listener: L, app: Route, shutdown: CancellationToken) -> Result<(), ServerError>
760760+where
761761+ L: Listener + 'static,
762762+{
763763+ let app = app
764764+ .with(
765765+ Cors::new()
766766+ .allow_origin_regex("*")
767767+ .allow_methods([Method::GET])
768768+ .allow_credentials(false),
769769+ )
770770+ .with(CatchPanic::new())
771771+ .with(Tracing);
772772+ Server::new(listener)
773773+ .name("slingshot")
774774+ .run_with_graceful_shutdown(app, shutdown.cancelled(), None)
775775+ .await
776776+ .map_err(ServerError::ServerExited)
777777+ .inspect(|()| log::info!("server ended. goodbye."))
778778+}
···11+pub mod consumer;
22+pub mod delay;
33+pub mod error;
44+pub mod removable_delay_queue;
55+pub mod server;
66+pub mod subscriber;
77+88+use jetstream::events::CommitEvent;
99+use links::CollectedLink;
1010+use serde::{Deserialize, Serialize};
1111+use server::MultiSubscribeQuery;
1212+use tokio_tungstenite::tungstenite::Message;
1313+1414+#[derive(Debug)]
1515+pub struct FilterableProperties {
1616+ /// Full unmodified DID, at-uri, or url
1717+ pub subject: String,
1818+ /// User/identity DID.
1919+ ///
2020+ /// Will match both bare-DIDs and DIDs extracted from at-uris.
2121+ /// `None` for any URL.
2222+ pub subject_did: Option<String>,
2323+ /// Link source -- collection NSID joined with `:` to the record property path.
2424+ pub source: String,
2525+}
2626+2727+/// A serialized message with filterable properties attached
2828+#[derive(Debug)]
2929+pub struct ClientMessage {
3030+ pub message: Message, // always Message::Text
3131+ pub properties: FilterableProperties,
3232+}
3333+3434+impl ClientMessage {
3535+ pub fn new_link(
3636+ link: CollectedLink,
3737+ at_uri: &str,
3838+ commit: &CommitEvent,
3939+ ) -> Result<Self, serde_json::Error> {
4040+ let subject_did = link.target.did();
4141+4242+ let subject = link.target.into_string();
4343+4444+ let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
4545+ eprintln!("link path did not have expected '.' prefix: {}", link.path);
4646+ ""
4747+ });
4848+ let source = format!("{}:{undotted}", &*commit.collection);
4949+5050+ let client_link_event = ClientLinkEvent {
5151+ operation: "create",
5252+ source: source.clone(),
5353+ source_record: at_uri.to_string(),
5454+ source_rev: commit.rev.to_string(),
5555+ subject: subject.clone(),
5656+ };
5757+5858+ let client_event = ClientEvent {
5959+ kind: "link",
6060+ origin: "live", // TODO: indicate when we're locally replaying jetstream on reconnect?? maybe not.
6161+ link: client_link_event,
6262+ };
6363+6464+ let client_event_json = serde_json::to_string(&client_event)?;
6565+6666+ let message = Message::Text(client_event_json.into());
6767+6868+ let properties = FilterableProperties {
6969+ subject,
7070+ subject_did,
7171+ source,
7272+ };
7373+7474+ Ok(ClientMessage {
7575+ message,
7676+ properties,
7777+ })
7878+ }
7979+}
8080+8181+#[derive(Debug, Serialize)]
8282+#[serde(rename_all = "snake_case")]
8383+pub struct ClientEvent {
8484+ kind: &'static str, // "link"
8585+ origin: &'static str, // "live", "replay", "backfill"
8686+ link: ClientLinkEvent,
8787+}
8888+8989+#[derive(Debug, Serialize)]
9090+struct ClientLinkEvent {
9191+ operation: &'static str, // "create", "delete" (prob no update, though maybe for rev?)
9292+ source: String,
9393+ source_record: String,
9494+ source_rev: String,
9595+ subject: String,
9696+ // TODO: include the record too? would save clients a level of hydration
9797+ // ^^ no, not for now. until we backfill + support broader deletes at *least*.
9898+}
9999+100100+#[derive(Debug, Deserialize)]
101101+#[serde(tag = "type", content = "payload", rename_all = "snake_case")]
102102+pub enum SubscriberSourcedMessage {
103103+ OptionsUpdate(MultiSubscribeQuery),
104104+}
+144
spacedust/src/main.rs
···11+use spacedust::consumer;
22+use spacedust::delay;
33+use spacedust::error::MainTaskError;
44+use spacedust::removable_delay_queue::removable_delay_queue;
55+use spacedust::server;
66+77+use clap::Parser;
88+use metrics_exporter_prometheus::PrometheusBuilder;
99+use std::time::Duration;
1010+use tokio::sync::broadcast;
1111+use tokio_util::sync::CancellationToken;
1212+1313+/// Aggregate links in the at-mosphere
1414+#[derive(Parser, Debug, Clone)]
1515+#[command(version, about, long_about = None)]
1616+struct Args {
1717+ /// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:
1818+ /// 'us-east-1', 'us-east-2', 'us-west-1', or 'us-west-2'
1919+ #[arg(long)]
2020+ jetstream: String,
2121+ /// don't request zstd-compressed jetstream events
2222+ ///
2323+ /// reduces CPU at the expense of more ingress bandwidth
2424+ #[arg(long, action)]
2525+ jetstream_no_zstd: bool,
2626+}
2727+2828+#[tokio::main]
2929+async fn main() -> Result<(), String> {
3030+ env_logger::init();
3131+3232+ // tokio broadcast keeps a single main output queue for all subscribers.
3333+ // each subscriber clones off a copy of an individual value for each recv.
3434+ // since there's no large per-client buffer, we can make this one kind of
3535+ // big and accommodate more slow/bursty clients.
3636+ //
3737+ // in fact, we *could* even keep lagging clients alive, inserting lag-
3838+ // indicating messages to their output.... but for now we'll drop them to
3939+ // avoid accumulating zombies.
4040+ //
4141+ // events on the channel are individual links as they are discovered. a link
4242+ // contains a source and a target. the target is an at-uri, so it's up to
4343+ // ~1KB max; source is a collection + link path, which can be more but in
4444+ // practice the whole link rarely approaches 1KB total.
4545+ //
4646+ // TODO: determine if a pathological case could blow this up (eg 1MB link
4747+ // paths + slow subscriber -> 16GiB queue)
4848+ let (b, _) = broadcast::channel(16_384);
4949+ let consumer_sender = b.clone();
5050+ let (d, _) = broadcast::channel(16_384);
5151+ let consumer_delayed_sender = d.clone();
5252+5353+ let delay = Duration::from_secs(21);
5454+ let (delay_queue_sender, delay_queue_receiver) = removable_delay_queue(delay);
5555+5656+ let shutdown = CancellationToken::new();
5757+5858+ let ctrlc_shutdown = shutdown.clone();
5959+ ctrlc::set_handler(move || ctrlc_shutdown.cancel()).expect("failed to set ctrl-c handler");
6060+6161+ let args = Args::parse();
6262+6363+ if let Err(e) = install_metrics_server() {
6464+ log::error!("failed to install metrics server: {e:?}");
6565+ };
6666+6767+ let mut tasks: tokio::task::JoinSet<Result<(), MainTaskError>> = tokio::task::JoinSet::new();
6868+6969+ let server_shutdown = shutdown.clone();
7070+ tasks.spawn(async move {
7171+ server::serve(b, d, server_shutdown).await?;
7272+ Ok(())
7373+ });
7474+7575+ let consumer_shutdown = shutdown.clone();
7676+ tasks.spawn(async move {
7777+ consumer::consume(
7878+ consumer_sender,
7979+ delay_queue_sender,
8080+ args.jetstream,
8181+ None,
8282+ args.jetstream_no_zstd,
8383+ consumer_shutdown,
8484+ )
8585+ .await?;
8686+ Ok(())
8787+ });
8888+8989+ let delay_shutdown = shutdown.clone();
9090+ tasks.spawn(async move {
9191+ delay::to_broadcast(
9292+ delay_queue_receiver,
9393+ consumer_delayed_sender,
9494+ delay_shutdown,
9595+ )
9696+ .await?;
9797+ Ok(())
9898+ });
9999+100100+ tokio::select! {
101101+ _ = shutdown.cancelled() => log::warn!("shutdown requested"),
102102+ Some(r) = tasks.join_next() => {
103103+ log::warn!("a task exited, shutting down: {r:?}");
104104+ shutdown.cancel();
105105+ }
106106+ }
107107+108108+ tokio::select! {
109109+ _ = async {
110110+ while let Some(completed) = tasks.join_next().await {
111111+ log::info!("shutdown: task completed: {completed:?}");
112112+ }
113113+ } => {},
114114+ _ = tokio::time::sleep(std::time::Duration::from_secs(3)) => {
115115+ log::info!("shutdown: not all tasks completed on time. aborting...");
116116+ tasks.shutdown().await;
117117+ },
118118+ }
119119+120120+ log::info!("bye!");
121121+122122+ Ok(())
123123+}
124124+125125+fn install_metrics_server() -> Result<(), metrics_exporter_prometheus::BuildError> {
126126+ log::info!("installing metrics server...");
127127+ let host = [0, 0, 0, 0];
128128+ let port = 8765;
129129+ PrometheusBuilder::new()
130130+ .set_quantiles(&[0.5, 0.9, 0.99, 1.0])?
131131+ .set_bucket_duration(std::time::Duration::from_secs(300))?
132132+ .set_bucket_count(std::num::NonZero::new(12).unwrap()) // count * duration = 60 mins. stuff doesn't happen that fast here.
133133+ .set_enable_unit_suffix(false) // this seemed buggy for constellation (sometimes wouldn't engage)
134134+ .with_http_listener((host, port))
135135+ .install()?;
136136+ log::info!(
137137+ "metrics server installed! listening on http://{}.{}.{}.{}:{port}",
138138+ host[0],
139139+ host[1],
140140+ host[2],
141141+ host[3]
142142+ );
143143+ Ok(())
144144+}
+125
spacedust/src/removable_delay_queue.rs
···11+use std::collections::{BTreeMap, VecDeque};
22+use std::ops::RangeBounds;
33+use std::sync::Arc;
44+use std::time::{Duration, Instant};
55+use thiserror::Error;
66+use tokio::sync::Mutex;
77+88+#[derive(Debug, Error)]
99+pub enum EnqueueError<T> {
1010+ #[error("queue ouput dropped")]
1111+ OutputDropped(T),
1212+}
1313+1414+pub trait Key: Eq + Ord + Clone {}
1515+impl<T: Eq + Ord + Clone> Key for T {}
1616+1717+#[derive(Debug)]
1818+struct Queue<K: Key, T> {
1919+ queue: VecDeque<(Instant, K)>,
2020+ items: BTreeMap<K, T>,
2121+}
2222+2323+pub struct Input<K: Key, T> {
2424+ q: Arc<Mutex<Queue<K, T>>>,
2525+}
2626+2727+impl<K: Key, T> Input<K, T> {
2828+ /// if a key is already present, its previous item will be overwritten and
2929+ /// its delay time will be reset for the new item.
3030+ ///
3131+ /// errors if the remover has been dropped
3232+ pub async fn enqueue(&self, key: K, item: T) -> Result<(), EnqueueError<T>> {
3333+ if Arc::strong_count(&self.q) == 1 {
3434+ return Err(EnqueueError::OutputDropped(item));
3535+ }
3636+ // TODO: try to push out an old element first
3737+ // for now we just hope there's a listener
3838+ let now = Instant::now();
3939+ let mut q = self.q.lock().await;
4040+ q.queue.push_back((now, key.clone()));
4141+ q.items.insert(key, item);
4242+ Ok(())
4343+ }
4444+ /// remove an item from the queue, by key
4545+ ///
4646+ /// the item itself is removed, but the key will remain in the queue -- it
4747+ /// will simply be skipped over when a new output item is requested. this
4848+ /// keeps the removal cheap (=btreemap remove), for a bit of space overhead
4949+ pub async fn remove_range(&self, range: impl RangeBounds<K>) {
5050+ let n = {
5151+ let mut q = self.q.lock().await;
5252+ let keys = q
5353+ .items
5454+ .range(range)
5555+ .map(|(k, _)| k)
5656+ .cloned()
5757+ .collect::<Vec<_>>();
5858+ for k in &keys {
5959+ q.items.remove(k);
6060+ }
6161+ keys.len()
6262+ };
6363+ if n == 0 {
6464+ metrics::counter!("delay_queue_remove_not_found").increment(1);
6565+ } else {
6666+ metrics::counter!("delay_queue_remove_total_records").increment(1);
6767+ metrics::counter!("delay_queue_remove_total_links").increment(n as u64);
6868+ }
6969+ }
7070+}
7171+7272+pub struct Output<K: Key, T> {
7373+ delay: Duration,
7474+ q: Arc<Mutex<Queue<K, T>>>,
7575+}
7676+7777+impl<K: Key, T> Output<K, T> {
7878+ pub async fn next(&self) -> Option<T> {
7979+ let get = || async {
8080+ let mut q = self.q.lock().await;
8181+ metrics::gauge!("delay_queue_queue_len").set(q.queue.len() as f64);
8282+ metrics::gauge!("delay_queue_queue_capacity").set(q.queue.capacity() as f64);
8383+ while let Some((t, k)) = q.queue.pop_front() {
8484+ // skip over queued keys that were removed from items
8585+ if let Some(item) = q.items.remove(&k) {
8686+ return Some((t, item));
8787+ }
8888+ }
8989+ None
9090+ };
9191+ loop {
9292+ if let Some((t, item)) = get().await {
9393+ let now = Instant::now();
9494+ let expected_release = t + self.delay;
9595+ if expected_release.saturating_duration_since(now) > Duration::from_millis(1) {
9696+ tokio::time::sleep_until(expected_release.into()).await;
9797+ metrics::counter!("delay_queue_emit_total", "early" => "yes").increment(1);
9898+ metrics::histogram!("delay_queue_emit_overshoot").record(0);
9999+ } else {
100100+ let overshoot = now.saturating_duration_since(expected_release);
101101+ metrics::counter!("delay_queue_emit_total", "early" => "no").increment(1);
102102+ metrics::histogram!("delay_queue_emit_overshoot")
103103+ .record(overshoot.as_secs_f64());
104104+ }
105105+ return Some(item);
106106+ } else if Arc::strong_count(&self.q) == 1 {
107107+ return None;
108108+ }
109109+ // the queue is *empty*, so we need to wait at least as long as the current delay
110110+ tokio::time::sleep(self.delay).await;
111111+ metrics::counter!("delay_queue_entirely_empty_total").increment(1);
112112+ }
113113+ }
114114+}
115115+116116+pub fn removable_delay_queue<K: Key, T>(delay: Duration) -> (Input<K, T>, Output<K, T>) {
117117+ let q: Arc<Mutex<Queue<K, T>>> = Arc::new(Mutex::new(Queue {
118118+ queue: VecDeque::new(),
119119+ items: BTreeMap::new(),
120120+ }));
121121+122122+ let input = Input::<K, T> { q: q.clone() };
123123+ let output = Output::<K, T> { q, delay };
124124+ (input, output)
125125+}
+339
spacedust/src/server.rs
···11+use crate::ClientMessage;
22+use crate::error::ServerError;
33+use crate::subscriber::Subscriber;
44+use dropshot::{
55+ ApiDescription, ApiEndpointBodyContentType, Body, ConfigDropshot, ConfigLogging,
66+ ConfigLoggingLevel, ExtractorMetadata, HttpError, HttpResponse, Query, RequestContext,
77+ ServerBuilder, ServerContext, SharedExtractor, WebsocketConnection, channel, endpoint,
88+};
99+use http::{
1010+ Response, StatusCode,
1111+ header::{ORIGIN, USER_AGENT},
1212+};
1313+use metrics::{counter, histogram};
1414+use std::sync::Arc;
1515+1616+use async_trait::async_trait;
1717+use schemars::JsonSchema;
1818+use serde::{Deserialize, Serialize};
1919+use std::collections::HashSet;
2020+use tokio::sync::broadcast;
2121+use tokio::time::Instant;
2222+use tokio_tungstenite::tungstenite::protocol::{Role, WebSocketConfig};
2323+use tokio_util::sync::CancellationToken;
2424+2525+const INDEX_HTML: &str = include_str!("../static/index.html");
2626+const FAVICON: &[u8] = include_bytes!("../static/favicon.ico");
2727+2828+pub async fn serve(
2929+ b: broadcast::Sender<Arc<ClientMessage>>,
3030+ d: broadcast::Sender<Arc<ClientMessage>>,
3131+ shutdown: CancellationToken,
3232+) -> Result<(), ServerError> {
3333+ let config_logging = ConfigLogging::StderrTerminal {
3434+ level: ConfigLoggingLevel::Info,
3535+ };
3636+3737+ let log = config_logging
3838+ .to_logger("example-basic")
3939+ .map_err(ServerError::ConfigLogError)?;
4040+4141+ let mut api = ApiDescription::new();
4242+ api.register(index).unwrap();
4343+ api.register(favicon).unwrap();
4444+ api.register(openapi).unwrap();
4545+ api.register(subscribe).unwrap();
4646+4747+ // TODO: put spec in a once cell / lazy lock thing?
4848+ let spec = Arc::new(
4949+ api.openapi(
5050+ "Spacedust",
5151+ env!("CARGO_PKG_VERSION")
5252+ .parse()
5353+ .inspect_err(|e| {
5454+ eprintln!("failed to parse cargo package version for openapi: {e:?}")
5555+ })
5656+ .unwrap_or(semver::Version::new(0, 0, 1)),
5757+ )
5858+ .description("A configurable ATProto notifications firehose.")
5959+ .contact_name("part of @microcosm.blue")
6060+ .contact_url("https://microcosm.blue")
6161+ .json()
6262+ .map_err(ServerError::OpenApiJsonFail)?,
6363+ );
6464+6565+ let sub_shutdown = shutdown.clone();
6666+ let ctx = Context {
6767+ spec,
6868+ b,
6969+ d,
7070+ shutdown: sub_shutdown,
7171+ };
7272+7373+ let server = ServerBuilder::new(api, ctx, log)
7474+ .config(ConfigDropshot {
7575+ bind_address: "0.0.0.0:9998".parse().unwrap(),
7676+ ..Default::default()
7777+ })
7878+ .start()?;
7979+8080+ tokio::select! {
8181+ s = server.wait_for_shutdown() => {
8282+ s.map_err(ServerError::ServerExited)?;
8383+ log::info!("server shut down normally.");
8484+ },
8585+ _ = shutdown.cancelled() => {
8686+ log::info!("shutting down: closing server");
8787+ server.close().await.map_err(ServerError::BadClose)?;
8888+ },
8989+ }
9090+ Ok(())
9191+}
9292+9393+#[derive(Debug, Clone)]
9494+struct Context {
9595+ pub spec: Arc<serde_json::Value>,
9696+ pub b: broadcast::Sender<Arc<ClientMessage>>,
9797+ pub d: broadcast::Sender<Arc<ClientMessage>>,
9898+ pub shutdown: CancellationToken,
9999+}
100100+101101+async fn instrument_handler<T, H, R>(ctx: &RequestContext<T>, handler: H) -> Result<R, HttpError>
102102+where
103103+ R: HttpResponse,
104104+ H: Future<Output = Result<R, HttpError>>,
105105+ T: ServerContext,
106106+{
107107+ let start = Instant::now();
108108+ let result = handler.await;
109109+ let latency = start.elapsed();
110110+ let status_code = match &result {
111111+ Ok(response) => response.status_code(),
112112+ Err(e) => e.status_code.as_status(),
113113+ }
114114+ .as_str() // just the number (.to_string()'s Display does eg `200 OK`)
115115+ .to_string();
116116+ let endpoint = ctx.endpoint.operation_id.clone();
117117+ let headers = ctx.request.headers();
118118+ let origin = headers
119119+ .get(ORIGIN)
120120+ .and_then(|v| v.to_str().ok())
121121+ .unwrap_or("")
122122+ .to_string();
123123+ let ua = headers
124124+ .get(USER_AGENT)
125125+ .and_then(|v| v.to_str().ok())
126126+ .map(|ua| {
127127+ if ua.starts_with("Mozilla/5.0 ") {
128128+ "browser"
129129+ } else {
130130+ ua
131131+ }
132132+ })
133133+ .unwrap_or("")
134134+ .to_string();
135135+ counter!("server_requests_total",
136136+ "endpoint" => endpoint.clone(),
137137+ "origin" => origin,
138138+ "ua" => ua,
139139+ "status_code" => status_code,
140140+ )
141141+ .increment(1);
142142+ histogram!("server_handler_latency", "endpoint" => endpoint).record(latency.as_micros() as f64);
143143+ result
144144+}
145145+146146+use dropshot::{HttpResponseHeaders, HttpResponseOk};
147147+148148+pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>;
149149+150150+/// Helper for constructing Ok responses: return OkCors(T).into()
151151+/// (not happy with this yet)
152152+pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T);
153153+154154+impl<T> From<OkCors<T>> for OkCorsResponse<T>
155155+where
156156+ T: Serialize + JsonSchema + Send + Sync,
157157+{
158158+ fn from(ok: OkCors<T>) -> OkCorsResponse<T> {
159159+ let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0));
160160+ res.headers_mut()
161161+ .insert("access-control-allow-origin", "*".parse().unwrap());
162162+ Ok(res)
163163+ }
164164+}
165165+166166+// TODO: cors for HttpError
167167+168168+/// Serve index page as html
169169+#[endpoint {
170170+ method = GET,
171171+ path = "/",
172172+ /*
173173+ * not useful to have this in openapi
174174+ */
175175+ unpublished = true,
176176+}]
177177+async fn index(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> {
178178+ instrument_handler(&ctx, async {
179179+ Ok(Response::builder()
180180+ .status(StatusCode::OK)
181181+ .header(http::header::CONTENT_TYPE, "text/html")
182182+ .body(INDEX_HTML.into())?)
183183+ })
184184+ .await
185185+}
186186+187187+/// Serve index page as html
188188+#[endpoint {
189189+ method = GET,
190190+ path = "/favicon.ico",
191191+ /*
192192+ * not useful to have this in openapi
193193+ */
194194+ unpublished = true,
195195+}]
196196+async fn favicon(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> {
197197+ instrument_handler(&ctx, async {
198198+ Ok(Response::builder()
199199+ .status(StatusCode::OK)
200200+ .header(http::header::CONTENT_TYPE, "image/x-icon")
201201+ .body(FAVICON.to_vec().into())?)
202202+ })
203203+ .await
204204+}
205205+206206+/// Meta: get the openapi spec for this api
207207+#[endpoint {
208208+ method = GET,
209209+ path = "/openapi",
210210+ /*
211211+ * not useful to have this in openapi
212212+ */
213213+ unpublished = true,
214214+}]
215215+async fn openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> {
216216+ instrument_handler(&ctx, async {
217217+ let spec = (*ctx.context().spec).clone();
218218+ OkCors(spec).into()
219219+ })
220220+ .await
221221+}
222222+223223+/// The real type that gets deserialized
224224+#[derive(Debug, Deserialize, JsonSchema)]
225225+#[serde(rename_all = "camelCase")]
226226+pub struct MultiSubscribeQuery {
227227+ #[serde(default)]
228228+ pub wanted_subjects: HashSet<String>,
229229+ #[serde(default)]
230230+ pub wanted_subject_dids: HashSet<String>,
231231+ #[serde(default)]
232232+ pub wanted_sources: HashSet<String>,
233233+}
234234+/// The fake corresponding type for docs that dropshot won't freak out about a
235235+/// vec for
236236+#[derive(Deserialize, JsonSchema)]
237237+#[allow(dead_code)]
238238+#[serde(rename_all = "camelCase")]
239239+struct MultiSubscribeQueryForDocs {
240240+ /// One or more at-uris to receive links about
241241+ ///
242242+ /// The at-uri must be url-encoded
243243+ ///
244244+ /// Pass this parameter multiple times to specify multiple collections, like
245245+ /// `wantedSubjects=[...]&wantedSubjects=[...]`
246246+ pub wanted_subjects: String,
247247+ /// One or more DIDs to receive links about
248248+ ///
249249+ /// Pass this parameter multiple times to specify multiple collections
250250+ pub wanted_subject_dids: String,
251251+ /// One or more link sources to receive links about
252252+ ///
253253+ /// TODO: docs about link sources
254254+ ///
255255+ /// eg, a bluesky like's link source: `app.bsky.feed.like:subject.uri`
256256+ ///
257257+ /// Pass this parameter multiple times to specify multiple sources
258258+ pub wanted_sources: String,
259259+}
260260+261261+// The `SharedExtractor` implementation for Query<QueryType> describes how to
262262+// construct an instance of `Query<QueryType>` from an HTTP request: namely, by
263263+// parsing the query string to an instance of `QueryType`.
264264+#[async_trait]
265265+impl SharedExtractor for MultiSubscribeQuery {
266266+ async fn from_request<Context: ServerContext>(
267267+ ctx: &RequestContext<Context>,
268268+ ) -> Result<MultiSubscribeQuery, HttpError> {
269269+ let raw_query = ctx.request.uri().query().unwrap_or("");
270270+ let q = serde_qs::from_str(raw_query).map_err(|e| {
271271+ HttpError::for_bad_request(None, format!("unable to parse query string: {e}"))
272272+ })?;
273273+ Ok(q)
274274+ }
275275+276276+ fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata {
277277+ // HACK: query type switcheroo: passing MultiSubscribeQuery to
278278+ // `metadata` would "helpfully" panic because dropshot believes we can
279279+ // only have scalar types in a query.
280280+ //
281281+ // so instead we have a fake second type whose only job is to look the
282282+ // same as MultiSubscribeQuery exept that it has `String` instead of
283283+ // `Vec<String>`, which dropshot will accept, and generate ~close-enough
284284+ // docs for.
285285+ <Query<MultiSubscribeQueryForDocs> as SharedExtractor>::metadata(body_content_type)
286286+ }
287287+}
288288+289289+#[derive(Deserialize, JsonSchema)]
290290+#[serde(rename_all = "camelCase")]
291291+struct ScalarSubscribeQuery {
292292+ /// Bypass the 21-sec delay buffer
293293+ ///
294294+ /// By default, spacedust holds all firehose links for 21 seconds before
295295+ /// emitting them, to prevent quickly- undone interactions from generating
296296+ /// notifications.
297297+ ///
298298+ /// Setting `instant` to true bypasses this buffer, allowing faster (and
299299+ /// noisier) notification delivery.
300300+ ///
301301+ /// Typically [a little less than 1%](https://bsky.app/profile/bad-example.com/post/3ls32wctsrs2l)
302302+ /// of links links get deleted within 21s of being created.
303303+ #[serde(default)]
304304+ pub instant: bool,
305305+}
306306+307307+#[channel {
308308+ protocol = WEBSOCKETS,
309309+ path = "/subscribe",
310310+}]
311311+async fn subscribe(
312312+ reqctx: RequestContext<Context>,
313313+ query: MultiSubscribeQuery,
314314+ scalar_query: Query<ScalarSubscribeQuery>,
315315+ upgraded: WebsocketConnection,
316316+) -> dropshot::WebsocketChannelResult {
317317+ let ws = tokio_tungstenite::WebSocketStream::from_raw_socket(
318318+ upgraded.into_inner(),
319319+ Role::Server,
320320+ Some(WebSocketConfig::default().max_message_size(
321321+ Some(10 * 2_usize.pow(20)), // 10MiB, matching jetstream
322322+ )),
323323+ )
324324+ .await;
325325+326326+ let Context { b, d, shutdown, .. } = reqctx.context();
327327+ let sub_token = shutdown.child_token();
328328+329329+ let q = scalar_query.into_inner();
330330+ let subscription = if q.instant { b } else { d }.subscribe();
331331+ log::info!("starting subscriber with broadcast: instant={}", q.instant);
332332+333333+ Subscriber::new(query, sub_token)
334334+ .start(ws, subscription)
335335+ .await
336336+ .map_err(|e| format!("boo: {e:?}"))?;
337337+338338+ Ok(())
339339+}
+164
spacedust/src/subscriber.rs
···11+use crate::error::SubscriberUpdateError;
22+use crate::server::MultiSubscribeQuery;
33+use crate::{ClientMessage, FilterableProperties, SubscriberSourcedMessage};
44+use dropshot::WebsocketConnectionRaw;
55+use futures::SinkExt;
66+use futures::StreamExt;
77+use std::error::Error;
88+use std::sync::Arc;
99+use std::time::Duration;
1010+use tokio::sync::broadcast::{self, error::RecvError};
1111+use tokio::time::interval;
1212+use tokio_tungstenite::{WebSocketStream, tungstenite::Message};
1313+use tokio_util::sync::CancellationToken;
1414+1515+const PING_PERIOD: Duration = Duration::from_secs(30);
1616+1717+pub struct Subscriber {
1818+ query: MultiSubscribeQuery,
1919+ shutdown: CancellationToken,
2020+}
2121+2222+impl Subscriber {
2323+ pub fn new(query: MultiSubscribeQuery, shutdown: CancellationToken) -> Self {
2424+ Self { query, shutdown }
2525+ }
2626+2727+ pub async fn start(
2828+ mut self,
2929+ ws: WebSocketStream<WebsocketConnectionRaw>,
3030+ mut receiver: broadcast::Receiver<Arc<ClientMessage>>,
3131+ ) -> Result<(), Box<dyn Error>> {
3232+ let mut ping_state = None;
3333+ let (mut ws_sender, mut ws_receiver) = ws.split();
3434+ let mut ping_interval = interval(PING_PERIOD);
3535+ let _guard = self.shutdown.clone().drop_guard();
3636+3737+ // TODO: do we need to timeout ws sends??
3838+3939+ metrics::counter!("subscribers_connected_total").increment(1);
4040+ metrics::gauge!("subscribers_connected").increment(1);
4141+4242+ loop {
4343+ tokio::select! {
4444+ l = receiver.recv() => match l {
4545+ Ok(link) => if self.filter(&link.properties) {
4646+ if let Err(e) = ws_sender.send(link.message.clone()).await {
4747+ log::warn!("failed to send link, dropping subscriber: {e:?}");
4848+ break;
4949+ }
5050+ },
5151+ Err(RecvError::Closed) => self.shutdown.cancel(),
5252+ Err(RecvError::Lagged(n)) => {
5353+ log::warn!("dropping lagging subscriber (missed {n} messages already)");
5454+ self.shutdown.cancel();
5555+ }
5656+ },
5757+ cm = ws_receiver.next() => match cm {
5858+ Some(Ok(Message::Ping(state))) => {
5959+ if let Err(e) = ws_sender.send(Message::Pong(state)).await {
6060+ log::error!("failed to reply pong to subscriber: {e:?}");
6161+ break;
6262+ }
6363+ }
6464+ Some(Ok(Message::Pong(state))) => {
6565+ if let Some(expected_state) = ping_state {
6666+ if *state == expected_state {
6767+ ping_state = None; // good
6868+ } else {
6969+ log::error!("subscriber returned a pong with the wrong state, dropping");
7070+ self.shutdown.cancel();
7171+ }
7272+ } else {
7373+ log::error!("subscriber sent a pong when none was expected");
7474+ self.shutdown.cancel();
7575+ }
7676+ }
7777+ Some(Ok(Message::Text(raw))) => {
7878+ if let Err(e) = self.query.update_from_raw(&raw) {
7979+ log::error!("subscriber options could not be updated, dropping: {e:?}");
8080+ // TODO: send client an explanation
8181+ self.shutdown.cancel();
8282+ }
8383+ log::trace!("subscriber updated with opts: {:?}", self.query);
8484+ },
8585+ Some(Ok(m)) => log::trace!("subscriber sent an unexpected message: {m:?}"),
8686+ Some(Err(e)) => {
8787+ log::error!("failed to receive subscriber message: {e:?}");
8888+ break;
8989+ }
9090+ None => {
9191+ log::trace!("end of subscriber messages. bye!");
9292+ break;
9393+ }
9494+ },
9595+ _ = ping_interval.tick() => {
9696+ if ping_state.is_some() {
9797+ log::warn!("did not recieve pong within {PING_PERIOD:?}, dropping subscriber");
9898+ self.shutdown.cancel();
9999+ } else {
100100+ let new_state: [u8; 8] = rand::random();
101101+ let ping = new_state.to_vec().into();
102102+ ping_state = Some(new_state);
103103+ if let Err(e) = ws_sender.send(Message::Ping(ping)).await {
104104+ log::error!("failed to send ping to subscriber, dropping: {e:?}");
105105+ self.shutdown.cancel();
106106+ }
107107+ }
108108+ }
109109+ _ = self.shutdown.cancelled() => {
110110+ log::info!("subscriber shutdown requested, bye!");
111111+ if let Err(e) = ws_sender.close().await {
112112+ log::warn!("failed to close subscriber: {e:?}");
113113+ }
114114+ break;
115115+ },
116116+ }
117117+ }
118118+ log::trace!("end of subscriber. bye!");
119119+ metrics::gauge!("subscribers_connected").decrement(1);
120120+ Ok(())
121121+ }
122122+123123+ fn filter(&self, properties: &FilterableProperties) -> bool {
124124+ let query = &self.query;
125125+126126+ // subject + subject DIDs are logical OR
127127+ if !(query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()
128128+ || query.wanted_subjects.contains(&properties.subject)
129129+ || properties
130130+ .subject_did
131131+ .as_ref()
132132+ .map(|did| query.wanted_subject_dids.contains(did))
133133+ .unwrap_or(false))
134134+ {
135135+ // wowwww ^^ fix that
136136+ return false;
137137+ }
138138+139139+ // subjects together with sources are logical AND
140140+ if !(query.wanted_sources.is_empty() || query.wanted_sources.contains(&properties.source)) {
141141+ return false;
142142+ }
143143+144144+ true
145145+ }
146146+}
147147+148148+impl MultiSubscribeQuery {
149149+ pub fn update_from_raw(&mut self, s: &str) -> Result<(), SubscriberUpdateError> {
150150+ let SubscriberSourcedMessage::OptionsUpdate(opts) =
151151+ serde_json::from_str(s).map_err(SubscriberUpdateError::FailedToParseMessage)?;
152152+ if opts.wanted_sources.len() > 1_000 {
153153+ return Err(SubscriberUpdateError::TooManySourcesWanted);
154154+ }
155155+ if opts.wanted_subject_dids.len() > 10_000 {
156156+ return Err(SubscriberUpdateError::TooManyDidsWanted);
157157+ }
158158+ if opts.wanted_subjects.len() > 50_000 {
159159+ return Err(SubscriberUpdateError::TooManySubjectsWanted);
160160+ }
161161+ *self = opts;
162162+ Ok(())
163163+ }
164164+}
···11+#!/usr/bin/env bash
22+33+echo "note: you might need to access via http://127.0.0.1:8888 (not localhost) for the iframe to get its cookies"
44+python3 -m http.server 8888
+66
who-am-i/readme.md
···11+# who am i
22+33+a little auth service for microcosm demos
44+55+**you probably SHOULD NOT USE THIS in any serious environment**
66+77+for now the deployment is restricted to microcosm -- expanding it for wider use likely requires solving a number of challenges that oauth exists for.
88+99+1010+## a little auth service
1111+1212+- you drop an iframe and a short few lines of JS on your web page, and get a nice-ish atproto login prompt.
1313+- if the user has ever authorized this service before (and within some expiration), they will be presented with an in-frame one-click option to proceed.
1414+- otherwise they get bounced over to the normal atproto oauth flow (in a popup or new tab)
1515+- you get a callback containing
1616+ - a verified DID and handle
1717+ - a JWT containing the same that can be verified by public key
1818+- **no write permissions** or any atproto permissions at all, just a verified identity
1919+2020+**you probably SHOULD NOT USE THIS in any serious environment**
2121+2222+2323+### problems
2424+2525+- clickjacking: if this were allowed on arbitrary domains, malicious sites could trick users into proving their atproto identity.
2626+- all the other problems oauth exists to solve: it's a little tricky to hook around the oauth flow so there are probably some annoying attacks.
2727+- auth in front of auth: it's just a bit awkward to run an auth service that acts as an intermediary for a more-real auth behind it, but that's worse, less secure, and doesn't conform to any standards.
2828+2929+so, **you probably SHOULD NOT USE THIS in any serious environment**
3030+3131+3232+## why
3333+3434+sometimes you want to make a thing that people can use with an atproto identity, and you might not want to let them put in any else's identity. apps that operate on public data like skircle, cred.blue, and the microcosm spacedust notifications demo don't require any special permission to operate for any user, and that's sometimes fine, but sometimes creepy/stalker-y/etc.
3535+3636+to avoid building a small torment nexus for a microcosm demo (while also not wanting to get deep into oauth or operate a demo-specific auth backend), i made this little service to just get a verified identity.
3737+3838+note: **you probably SHOULD NOT USE THIS in any serious environment**
3939+4040+---
4141+4242+since the requirements (read-only, just verifying identity) seem modest, i was hoping that a fairly simple implementation could be Good Enough, but in the time that i was willing to spend on it, the simple version without major obvious weaknesses i was hoping for didn't emerge.
4343+4444+it's still nice to have an explicit opt-in on a per-demo basis for microcosm so it will be used for that. it's allow-listed for the microcosm domain however (so not deployed on any adversarial hosting pages), so it's simultaenously overkill and restrictive.
4545+4646+i will get back to oauth eventually and hopefully roll out a microcosm service to make it easy for clients (and demos), but there are a few more things in the pipeline to get to first.
4747+4848+4949+### todo
5050+5151+provide a pubkey-signed JWT of the identity (just the DID as `sub` probably). (**you probably SHOULD NOT USE THIS in any serious environment**)
5252+5353+5454+## building
5555+5656+for raspi 1 model b:
5757+5858+atrium-oauth uses reqwest with default tls config that requires openssl which `cross` doesn't have a good time getting the os deps for.
5959+6060+fortunately, simply *enabling* a differnent tls feature for reqwest actually stops the default problematic one from causing problems, so we have a `reqwest` direct dependency with a feature enabled, even though it's never imported into actual code,
6161+6262+it builds with
6363+6464+```bash
6565+cross build --release --target arm-unknown-linux-gnueabihf
6666+```
+89
who-am-i/src/expiring_task_map.rs
···11+use dashmap::DashMap;
22+use rand::{Rng, distr::Alphanumeric};
33+use std::sync::Arc;
44+use std::time::Duration;
55+use tokio::task::{JoinHandle, spawn};
66+use tokio::time::sleep;
77+use tokio_util::sync::{CancellationToken, DropGuard};
88+99+pub struct ExpiringTaskMap<T>(TaskMap<T>);
1010+1111+/// need to manually implement clone because T is allowed to not be clone
1212+impl<T> Clone for ExpiringTaskMap<T> {
1313+ fn clone(&self) -> Self {
1414+ Self(self.0.clone())
1515+ }
1616+}
1717+1818+impl<T: Send + 'static> ExpiringTaskMap<T> {
1919+ pub fn new(expiration: Duration) -> Self {
2020+ let map = TaskMap {
2121+ map: Arc::new(DashMap::new()),
2222+ expiration,
2323+ };
2424+ Self(map)
2525+ }
2626+2727+ pub fn dispatch<F>(&self, task: F, cancel: CancellationToken) -> String
2828+ where
2929+ F: Future<Output = T> + Send + 'static,
3030+ {
3131+ let TaskMap {
3232+ ref map,
3333+ expiration,
3434+ } = self.0;
3535+ let task_key: String = rand::rng()
3636+ .sample_iter(&Alphanumeric)
3737+ .take(24)
3838+ .map(char::from)
3939+ .collect();
4040+4141+ // spawn a tokio task and put the join handle in the map for later retrieval
4242+ map.insert(task_key.clone(), (cancel.clone().drop_guard(), spawn(task)));
4343+4444+ // spawn a second task to clean up the map in case it doesn't get claimed
4545+ let k = task_key.clone();
4646+ let map = map.clone();
4747+ spawn(async move {
4848+ if cancel
4949+ .run_until_cancelled(sleep(expiration))
5050+ .await
5151+ .is_some()
5252+ // the (sleep) task completed first
5353+ {
5454+ map.remove(&k);
5555+ cancel.cancel();
5656+ metrics::counter!("whoami_task_map_completions", "result" => "expired")
5757+ .increment(1);
5858+ }
5959+ });
6060+6161+ task_key
6262+ }
6363+6464+ pub fn take(&self, key: &str) -> Option<JoinHandle<T>> {
6565+ if let Some((_key, (_guard, handle))) = self.0.map.remove(key) {
6666+ // when the _guard drops, it cancels the token for us
6767+ metrics::counter!("whoami_task_map_completions", "result" => "retrieved").increment(1);
6868+ Some(handle)
6969+ } else {
7070+ metrics::counter!("whoami_task_map_gones").increment(1);
7171+ None
7272+ }
7373+ }
7474+}
7575+7676+struct TaskMap<T> {
7777+ map: Arc<DashMap<String, (DropGuard, JoinHandle<T>)>>,
7878+ expiration: Duration,
7979+}
8080+8181+/// need to manually implement clone because T is allowed to not be clone
8282+impl<T> Clone for TaskMap<T> {
8383+ fn clone(&self) -> Self {
8484+ Self {
8585+ map: self.map.clone(),
8686+ expiration: self.expiration,
8787+ }
8888+ }
8989+}
+93
who-am-i/src/jwt.rs
···11+use elliptic_curve::SecretKey;
22+use jose_jwk::{Class, Jwk, Key, Parameters};
33+use jsonwebtoken::{Algorithm, EncodingKey, Header, encode, errors::Error as JWTError};
44+use pkcs8::DecodePrivateKey;
55+use serde::Serialize;
66+use std::fs;
77+use std::io::Error as IOError;
88+use std::path::Path;
99+use std::string::FromUtf8Error;
1010+use std::time::{Duration, SystemTime, UNIX_EPOCH};
1111+use thiserror::Error;
1212+1313+#[derive(Debug, Error)]
1414+pub enum TokensSetupError {
1515+ #[error("failed to read private key")]
1616+ ReadPrivateKey(IOError),
1717+ #[error("failed to retrieve private key: {0}")]
1818+ PrivateKey(JWTError),
1919+ #[error("failed to read private key")]
2020+ ReadJwks(IOError),
2121+ #[error("failed to retrieve jwks: {0}")]
2222+ DecodeJwks(FromUtf8Error),
2323+}
2424+2525+#[derive(Debug, Error)]
2626+pub enum TokenMintingError {
2727+ #[error("failed to mint: {0}")]
2828+ EncodingError(#[from] JWTError),
2929+}
3030+3131+pub struct Tokens {
3232+ encoding_key: EncodingKey,
3333+ jwk: Jwk,
3434+}
3535+3636+impl Tokens {
3737+ pub fn from_files(priv_f: impl AsRef<Path>) -> Result<Self, TokensSetupError> {
3838+ let private_key_data: Vec<u8> =
3939+ fs::read(priv_f).map_err(TokensSetupError::ReadPrivateKey)?;
4040+ let encoding_key =
4141+ EncodingKey::from_ec_pem(&private_key_data).map_err(TokensSetupError::PrivateKey)?;
4242+4343+ let jwk_key_string = String::from_utf8(private_key_data).unwrap();
4444+ let mut jwk = SecretKey::<p256::NistP256>::from_pkcs8_pem(&jwk_key_string)
4545+ .map(|secret_key| Jwk {
4646+ key: Key::from(&secret_key.into()),
4747+ prm: Parameters {
4848+ kid: Some("who-am-i-00".to_string()),
4949+ cls: Some(Class::Signing),
5050+ ..Default::default()
5151+ },
5252+ })
5353+ .expect("to get private key");
5454+5555+ // CRITICAL: this is what turns the private jwk into a public one: the
5656+ // `d` parameter is the secret for an EC key; a pubkey just has no `d`.
5757+ //
5858+ // this feels baaaadd but hey we're just copying atrium
5959+ // https://github.com/atrium-rs/atrium/blob/b48810f84d83d037ee89b79b8566df9e0f2a6dae/atrium-oauth/src/keyset.rs#L41
6060+ let Key::Ec(ref mut ec) = jwk.key else {
6161+ unimplemented!()
6262+ };
6363+ ec.d = None; // CRITICAL
6464+6565+ Ok(Self { encoding_key, jwk })
6666+ }
6767+6868+ pub fn mint(&self, t: impl ToString) -> Result<String, TokenMintingError> {
6969+ let sub = t.to_string();
7070+7171+ let dt_now = SystemTime::now()
7272+ .duration_since(UNIX_EPOCH)
7373+ .expect("unix epoch is in the past");
7474+ let dt_exp = dt_now + Duration::from_secs(30 * 86_400);
7575+ let exp = dt_exp.as_secs();
7676+7777+ let mut header = Header::new(Algorithm::ES256);
7878+ header.kid = Some("who-am-i-00".to_string());
7979+ // todo: consider setting jku?
8080+8181+ Ok(encode(&header, &Claims { sub, exp }, &self.encoding_key)?)
8282+ }
8383+8484+ pub fn jwk(&self) -> Jwk {
8585+ self.jwk.clone()
8686+ }
8787+}
8888+8989+#[derive(Debug, Serialize)]
9090+struct Claims {
9191+ sub: String,
9292+ exp: u64,
9393+}
+9
who-am-i/src/lib.rs
···11+mod expiring_task_map;
22+mod jwt;
33+mod oauth;
44+mod server;
55+66+pub use expiring_task_map::ExpiringTaskMap;
77+pub use jwt::Tokens;
88+pub use oauth::{OAuth, OAuthCallbackParams, OAuthCompleteError, ResolveHandleError};
99+pub use server::serve;
+120
who-am-i/src/main.rs
···11+use clap::{ArgAction, Parser};
22+use metrics_exporter_prometheus::{BuildError as PromBuildError, PrometheusBuilder};
33+use std::path::PathBuf;
44+use tokio_util::sync::CancellationToken;
55+use who_am_i::{Tokens, serve};
66+77+/// Aggregate links in the at-mosphere
88+#[derive(Parser, Debug, Clone)]
99+#[command(version, about, long_about = None)]
1010+struct Args {
1111+ /// secret key from which the cookie-signing key is derived
1212+ ///
1313+ /// must have at least 512 bits (64 bytes) of randomness
1414+ ///
1515+ /// eg: `cat /dev/urandom | head -c 64 | base64`
1616+ #[arg(long, env)]
1717+ app_secret: String,
1818+ /// path to at-oauth private key (PEM pk8 format)
1919+ ///
2020+ /// generate with:
2121+ ///
2222+ /// openssl ecparam -genkey -noout -name prime256v1 \
2323+ /// | openssl pkcs8 -topk8 -nocrypt -out <PATH-TO-PRIV-KEY>.pem
2424+ #[arg(long, env)]
2525+ oauth_private_key: Option<PathBuf>,
2626+ /// path to jwt private key (PEM pk8 format)
2727+ ///
2828+ /// generate with:
2929+ ///
3030+ /// openssl ecparam -genkey -noout -name prime256v1 \
3131+ /// | openssl pkcs8 -topk8 -nocrypt -out <PATH-TO-PRIV-KEY>.pem
3232+ #[arg(long)]
3333+ jwt_private_key: PathBuf,
3434+ /// this server's client-reachable base url, for oauth redirect + jwt check
3535+ ///
3636+ /// required unless running in localhost mode with --dev
3737+ #[arg(long, env)]
3838+ base_url: Option<String>,
3939+ /// host:port to bind to on startup
4040+ #[arg(long, env, default_value = "127.0.0.1:9997")]
4141+ bind: String,
4242+ /// Enable dev mode
4343+ ///
4444+ /// enables automatic template reloading, uses localhost oauth config, etc
4545+ #[arg(long, action)]
4646+ dev: bool,
4747+ /// Hosts who are allowed to one-click auth
4848+ ///
4949+ /// Pass this argument multiple times to allow multiple hosts
5050+ #[arg(long = "allow_host", short = 'a', action = ArgAction::Append)]
5151+ allowed_hosts: Vec<String>,
5252+}
5353+5454+#[tokio::main(flavor = "current_thread")]
5555+async fn main() {
5656+ let shutdown = CancellationToken::new();
5757+5858+ let ctrlc_shutdown = shutdown.clone();
5959+ ctrlc::set_handler(move || ctrlc_shutdown.cancel()).expect("failed to set ctrl-c handler");
6060+6161+ let args = Args::parse();
6262+6363+ // let bind = args.bind.to_socket_addrs().expect("--bind must be ToSocketAddrs");
6464+6565+ let base = args.base_url.unwrap_or_else(|| {
6666+ if args.dev {
6767+ format!("http://{}", args.bind)
6868+ } else {
6969+ panic!("not in --dev mode so --base-url is required")
7070+ }
7171+ });
7272+7373+ if !args.dev && args.oauth_private_key.is_none() {
7474+ panic!("--at-oauth-key is required except in --dev");
7575+ } else if args.dev && args.oauth_private_key.is_some() {
7676+ eprintln!("warn: --at-oauth-key is ignored in dev (localhost config)");
7777+ }
7878+7979+ if args.allowed_hosts.is_empty() {
8080+ panic!("at least one --allowed-host host must be set");
8181+ }
8282+8383+ println!("starting with allowed_hosts hosts:");
8484+ for host in &args.allowed_hosts {
8585+ println!(" - {host}");
8686+ }
8787+8888+ let tokens = Tokens::from_files(args.jwt_private_key).unwrap();
8989+9090+ if let Err(e) = install_metrics_server() {
9191+ eprintln!("failed to install metrics server: {e:?}");
9292+ };
9393+9494+ serve(
9595+ shutdown,
9696+ args.app_secret,
9797+ args.oauth_private_key,
9898+ tokens,
9999+ base,
100100+ args.bind,
101101+ args.allowed_hosts,
102102+ args.dev,
103103+ )
104104+ .await;
105105+}
106106+107107+fn install_metrics_server() -> Result<(), PromBuildError> {
108108+ println!("installing metrics server...");
109109+ let host = [0, 0, 0, 0];
110110+ let port = 8765;
111111+ PrometheusBuilder::new()
112112+ .set_enable_unit_suffix(false)
113113+ .with_http_listener((host, port))
114114+ .install()?;
115115+ println!(
116116+ "metrics server installed! listening on http://{}.{}.{}.{}:{port}",
117117+ host[0], host[1], host[2], host[3]
118118+ );
119119+ Ok(())
120120+}
···11+<!doctype html>
22+<meta charset="utf-8" />
33+<title>great job!</title>
44+55+<h1>oauth success!</h1>
66+<p>this window should automatically close itself (probably a bug if it hasn't)</p>
77+88+<script>
99+// TODO: tie this back to its source...........
1010+1111+localStorage.setItem("who-am-i", JSON.stringify({
1212+ result: "success",
1313+ did: {{{json did}}},
1414+ token: {{{json token}}},
1515+ fetch_key: {{{json fetch_key}}},
1616+}));
1717+// TODO: probably also wait for a reply from the frame and show an error if not
1818+window.close();
1919+</script>