···11+use hickory_resolver::{ResolveError, TokioResolver};
22+use std::collections::{HashSet, VecDeque};
33+use std::path::Path;
44+use std::sync::Arc;
55+/// for now we're gonna just keep doing more cache
66+///
77+/// plc.director x foyer, ttl kept with data, refresh deferred to background on fetch
88+///
99+/// things we need:
1010+///
1111+/// 1. handle -> DID resolution: getRecord must accept a handle for `repo` param
1212+/// 2. DID -> PDS resolution: so we know where to getRecord
1313+/// 3. DID -> handle resolution: for bidirectional handle validation and in case we want to offer this
1414+use std::time::Duration;
1515+use tokio::sync::Mutex;
1616+1717+use crate::error::IdentityError;
1818+use atrium_api::{
1919+ did_doc::DidDocument,
2020+ types::string::{Did, Handle},
2121+};
2222+use atrium_common::resolver::Resolver;
2323+use atrium_identity::{
2424+ did::{CommonDidResolver, CommonDidResolverConfig, DEFAULT_PLC_DIRECTORY_URL},
2525+ handle::{AtprotoHandleResolver, AtprotoHandleResolverConfig, DnsTxtResolver},
2626+};
2727+use atrium_oauth::DefaultHttpClient; // it's probably not worth bringing all of atrium_oauth for this but
2828+use foyer::{DirectFsDeviceOptions, Engine, HybridCache, HybridCacheBuilder};
2929+use serde::{Deserialize, Serialize};
3030+use time::UtcDateTime;
3131+3232+/// once we have something resolved, don't re-resolve until after this period
3333+const MIN_TTL: Duration = Duration::from_secs(4 * 3600); // probably shoudl have a max ttl
3434+const MIN_NOT_FOUND_TTL: Duration = Duration::from_secs(60);
3535+3636+#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
3737+enum IdentityKey {
3838+ Handle(Handle),
3939+ Did(Did),
4040+}
4141+4242+#[derive(Debug, Serialize, Deserialize)]
4343+struct IdentityVal(UtcDateTime, IdentityData);
4444+4545+#[derive(Debug, Serialize, Deserialize)]
4646+enum IdentityData {
4747+ NotFound,
4848+ Did(Did),
4949+ Doc(PartialMiniDoc),
5050+}
5151+5252+/// partial representation of a com.bad-example.identity mini atproto doc
5353+///
5454+/// partial because the handle is not verified
5555+#[derive(Debug, Clone, Serialize, Deserialize)]
5656+struct PartialMiniDoc {
5757+ /// an atproto handle (**unverified**)
5858+ ///
5959+ /// the first valid atproto handle from the did doc's aka
6060+ unverified_handle: Handle,
6161+ /// the did's atproto pds url (TODO: type this?)
6262+ ///
6363+ /// note: atrium *does* actually parse it into a URI, it just doesn't return
6464+ /// that for some reason
6565+ pds: String,
6666+ /// for now we're just pulling this straight from the did doc
6767+ ///
6868+ /// would be nice to type and validate it
6969+ ///
7070+ /// this is the publicKeyMultibase from the did doc.
7171+ /// legacy key encoding not supported.
7272+ /// `id`, `type`, and `controller` must be checked, but aren't stored.
7373+ signing_key: String,
7474+}
7575+7676+impl TryFrom<DidDocument> for PartialMiniDoc {
7777+ type Error = String;
7878+ fn try_from(did_doc: DidDocument) -> Result<Self, Self::Error> {
7979+ // must use the first valid handle
8080+ let mut unverified_handle = None;
8181+ let Some(ref doc_akas) = did_doc.also_known_as else {
8282+ return Err("did doc missing `also_known_as`".to_string());
8383+ };
8484+ for aka in doc_akas {
8585+ let Some(maybe_handle) = aka.strip_prefix("at://") else {
8686+ continue;
8787+ };
8888+ let Ok(valid_handle) = Handle::new(maybe_handle.to_string()) else {
8989+ continue;
9090+ };
9191+ unverified_handle = Some(valid_handle);
9292+ break;
9393+ }
9494+ let Some(unverified_handle) = unverified_handle else {
9595+ return Err("no valid atproto handles in `also_known_as`".to_string());
9696+ };
9797+9898+ // atrium seems to get service endpoint getters
9999+ let Some(pds) = did_doc.get_pds_endpoint() else {
100100+ return Err("no valid pds service found".to_string());
101101+ };
102102+103103+ // TODO can't use atrium's get_signing_key() becuase it fails to check type and controller
104104+ // so if we check those and reject it, we might miss a later valid key in the array
105105+ // (todo is to fix atrium)
106106+ // actually: atrium might be flexible for legacy reps. for now we're rejecting legacy rep.
107107+108108+ // must use the first valid signing key
109109+ let mut signing_key = None;
110110+ let Some(verification_methods) = did_doc.verification_method else {
111111+ return Err("no verification methods found".to_string());
112112+ };
113113+ for method in verification_methods {
114114+ if method.id != format!("{}#atproto", did_doc.id) {
115115+ continue;
116116+ }
117117+ if method.r#type != "Multikey" {
118118+ continue;
119119+ }
120120+ if method.controller != did_doc.id {
121121+ continue;
122122+ }
123123+ let Some(key) = method.public_key_multibase else {
124124+ continue;
125125+ };
126126+ signing_key = Some(key);
127127+ break;
128128+ }
129129+ let Some(signing_key) = signing_key else {
130130+ return Err("no valid atproto signing key found in verification methods".to_string());
131131+ };
132132+133133+ Ok(PartialMiniDoc {
134134+ unverified_handle,
135135+ pds,
136136+ signing_key,
137137+ })
138138+ }
139139+}
140140+141141+/// multi-producer *single-consumer* queue structures (wrap in arc-mutex plz)
142142+///
143143+/// the hashset allows testing for presense of items in the queue.
144144+/// this has absolutely no support for multiple queue consumers.
145145+#[derive(Debug, Default)]
146146+struct RefreshQueue {
147147+ queue: VecDeque<IdentityKey>,
148148+ items: HashSet<IdentityKey>,
149149+}
150150+151151+#[derive(Clone)]
152152+pub struct Identity {
153153+ handle_resolver: Arc<AtprotoHandleResolver<HickoryDnsTxtResolver, DefaultHttpClient>>,
154154+ did_resolver: Arc<CommonDidResolver<DefaultHttpClient>>,
155155+ cache: HybridCache<IdentityKey, IdentityVal>,
156156+ /// multi-producer *single consumer* queue
157157+ refresh_queue: Arc<Mutex<RefreshQueue>>,
158158+ /// just a lock to ensure only one refresher (queue consumer) is running (to be improved with a better refresher)
159159+ refresher: Arc<Mutex<()>>,
160160+}
161161+162162+impl Identity {
163163+ pub async fn new(cache_dir: impl AsRef<Path>) -> Result<Self, IdentityError> {
164164+ let http_client = Arc::new(DefaultHttpClient::default());
165165+ let handle_resolver = AtprotoHandleResolver::new(AtprotoHandleResolverConfig {
166166+ dns_txt_resolver: HickoryDnsTxtResolver::new().unwrap(),
167167+ http_client: http_client.clone(),
168168+ });
169169+ let did_resolver = CommonDidResolver::new(CommonDidResolverConfig {
170170+ plc_directory_url: DEFAULT_PLC_DIRECTORY_URL.to_string(),
171171+ http_client: http_client.clone(),
172172+ });
173173+174174+ let cache = HybridCacheBuilder::new()
175175+ .with_name("identity")
176176+ .memory(16 * 2_usize.pow(20))
177177+ .with_weighter(|k, v| std::mem::size_of_val(k) + std::mem::size_of_val(v))
178178+ .storage(Engine::large())
179179+ .with_device_options(DirectFsDeviceOptions::new(cache_dir))
180180+ .build()
181181+ .await?;
182182+183183+ Ok(Self {
184184+ handle_resolver: Arc::new(handle_resolver),
185185+ did_resolver: Arc::new(did_resolver),
186186+ cache,
187187+ refresh_queue: Default::default(),
188188+ refresher: Default::default(),
189189+ })
190190+ }
191191+192192+ /// Resolve (and verify!) an atproto handle to a DID
193193+ ///
194194+ /// The result can be stale
195195+ ///
196196+ /// `None` if the handle can't be found or verification fails
197197+ pub async fn handle_to_did(&self, handle: Handle) -> Result<Option<Did>, IdentityError> {
198198+ let Some(did) = self.handle_to_unverified_did(&handle).await? else {
199199+ return Ok(None);
200200+ };
201201+ let Some(doc) = self.did_to_partial_mini_doc(&did).await? else {
202202+ return Ok(None);
203203+ };
204204+ if doc.unverified_handle != handle {
205205+ return Ok(None);
206206+ }
207207+ Ok(Some(did))
208208+ }
209209+210210+ /// Resolve (and verify!) a DID to a pds url
211211+ ///
212212+ /// This *also* incidentally resolves and verifies the handle, which might
213213+ /// make it slower than expected
214214+ pub async fn did_to_pds(&self, did: Did) -> Result<Option<String>, IdentityError> {
215215+ let Some(mini_doc) = self.did_to_partial_mini_doc(&did).await? else {
216216+ return Ok(None);
217217+ };
218218+ Ok(Some(mini_doc.pds))
219219+ }
220220+221221+ /// Resolve (and cache but **not verify**) a handle to a DID
222222+ async fn handle_to_unverified_did(
223223+ &self,
224224+ handle: &Handle,
225225+ ) -> Result<Option<Did>, IdentityError> {
226226+ let key = IdentityKey::Handle(handle.clone());
227227+ let entry = self
228228+ .cache
229229+ .fetch(key.clone(), {
230230+ let handle = handle.clone();
231231+ let resolver = self.handle_resolver.clone();
232232+ || async move {
233233+ match resolver.resolve(&handle).await {
234234+ Ok(did) => Ok(IdentityVal(UtcDateTime::now(), IdentityData::Did(did))),
235235+ Err(atrium_identity::Error::NotFound) => {
236236+ Ok(IdentityVal(UtcDateTime::now(), IdentityData::NotFound))
237237+ }
238238+ Err(other) => Err(foyer::Error::Other(Box::new(
239239+ IdentityError::ResolutionFailed(other),
240240+ ))),
241241+ }
242242+ }
243243+ })
244244+ .await?;
245245+246246+ let now = UtcDateTime::now();
247247+ let IdentityVal(last_fetch, data) = entry.value();
248248+ match data {
249249+ IdentityData::Doc(_) => {
250250+ log::error!("identity value mixup: got a doc from a handle key (should be a did)");
251251+ Err(IdentityError::IdentityValTypeMixup(handle.to_string()))
252252+ }
253253+ IdentityData::NotFound => {
254254+ if (now - *last_fetch) >= MIN_NOT_FOUND_TTL {
255255+ self.queue_refresh(key).await;
256256+ }
257257+ Ok(None)
258258+ }
259259+ IdentityData::Did(did) => {
260260+ if (now - *last_fetch) >= MIN_TTL {
261261+ self.queue_refresh(key).await;
262262+ }
263263+ Ok(Some(did.clone()))
264264+ }
265265+ }
266266+ }
267267+268268+ /// Fetch (and cache) a partial mini doc from a did
269269+ async fn did_to_partial_mini_doc(
270270+ &self,
271271+ did: &Did,
272272+ ) -> Result<Option<PartialMiniDoc>, IdentityError> {
273273+ let key = IdentityKey::Did(did.clone());
274274+ let entry = self
275275+ .cache
276276+ .fetch(key.clone(), {
277277+ let did = did.clone();
278278+ let resolver = self.did_resolver.clone();
279279+ || async move {
280280+ match resolver.resolve(&did).await {
281281+ Ok(did_doc) => {
282282+ // TODO: fix in atrium: should verify id is did
283283+ if did_doc.id != did.to_string() {
284284+ return Err(foyer::Error::other(Box::new(
285285+ IdentityError::BadDidDoc(
286286+ "did doc's id did not match did".to_string(),
287287+ ),
288288+ )));
289289+ }
290290+ let mini_doc = did_doc.try_into().map_err(|e| {
291291+ foyer::Error::Other(Box::new(IdentityError::BadDidDoc(e)))
292292+ })?;
293293+ Ok(IdentityVal(UtcDateTime::now(), IdentityData::Doc(mini_doc)))
294294+ }
295295+ Err(atrium_identity::Error::NotFound) => {
296296+ Ok(IdentityVal(UtcDateTime::now(), IdentityData::NotFound))
297297+ }
298298+ Err(other) => Err(foyer::Error::Other(Box::new(
299299+ IdentityError::ResolutionFailed(other),
300300+ ))),
301301+ }
302302+ }
303303+ })
304304+ .await?;
305305+306306+ let now = UtcDateTime::now();
307307+ let IdentityVal(last_fetch, data) = entry.value();
308308+ match data {
309309+ IdentityData::Did(_) => {
310310+ log::error!("identity value mixup: got a did from a did key (should be a doc)");
311311+ Err(IdentityError::IdentityValTypeMixup(did.to_string()))
312312+ }
313313+ IdentityData::NotFound => {
314314+ if (now - *last_fetch) >= MIN_NOT_FOUND_TTL {
315315+ self.queue_refresh(key).await;
316316+ }
317317+ Ok(None)
318318+ }
319319+ IdentityData::Doc(mini_did) => {
320320+ if (now - *last_fetch) >= MIN_TTL {
321321+ self.queue_refresh(key).await;
322322+ }
323323+ Ok(Some(mini_did.clone()))
324324+ }
325325+ }
326326+ }
327327+328328+ /// put a refresh task on the queue
329329+ ///
330330+ /// this can be safely called from multiple concurrent tasks
331331+ async fn queue_refresh(&self, key: IdentityKey) {
332332+ // todo: max queue size
333333+ let mut q = self.refresh_queue.lock().await;
334334+ if !q.items.contains(&key) {
335335+ q.items.insert(key.clone());
336336+ q.queue.push_back(key);
337337+ }
338338+ }
339339+340340+ /// find out what's next in the queue. concurrent consumers are not allowed.
341341+ ///
342342+ /// intent is to leave the item in the queue while refreshing, so that a
343343+ /// producer will not re-add it if it's in progress. there's definitely
344344+ /// better ways to do this, but this is ~simple for as far as a single
345345+ /// consumer can take us.
346346+ ///
347347+ /// we could take it from the queue but leave it in the set and remove from
348348+ /// set later, but splitting them apart feels more bug-prone.
349349+ async fn peek_refresh(&self) -> Option<IdentityKey> {
350350+ let q = self.refresh_queue.lock().await;
351351+ q.queue.front().cloned()
352352+ }
353353+354354+ /// call to clear the latest key from the refresh queue. concurrent consumers not allowed.
355355+ ///
356356+ /// must provide the last peeked refresh queue item as a small safety check
357357+ async fn complete_refresh(&self, key: &IdentityKey) -> Result<(), IdentityError> {
358358+ let mut q = self.refresh_queue.lock().await;
359359+360360+ let Some(queue_key) = q.queue.pop_front() else {
361361+ // gone from queue + since we're in an error condition, make sure it's not stuck in items
362362+ // (not toctou because we have the lock)
363363+ // bolder here than below and removing from items because if the queue is *empty*, then we
364364+ // know it hasn't been re-added since losing sync.
365365+ if q.items.remove(key) {
366366+ log::error!("identity refresh: queue de-sync: not in ");
367367+ } else {
368368+ log::warn!(
369369+ "identity refresh: tried to complete with wrong key. are multiple queue consumers running?"
370370+ );
371371+ }
372372+ return Err(IdentityError::RefreshQueueKeyError("no key in queue"));
373373+ };
374374+375375+ if queue_key != *key {
376376+ // extra weird case here, what's the most defensive behaviour?
377377+ // we have two keys: ours should have been first but isn't. this shouldn't happen, so let's
378378+ // just leave items alone for it. risks unbounded growth but we're in a bad place already.
379379+ // the other key is the one we just popped. we didn't want it, so maybe we should put it
380380+ // back, BUT if we somehow ended up with concurrent consumers, we have bigger problems. take
381381+ // responsibility for taking it instead: remove it from items as well, and just drop it.
382382+ //
383383+ // hope that whoever calls us takes this error seriously.
384384+ if q.items.remove(&queue_key) {
385385+ log::warn!(
386386+ "identity refresh: queue de-sync + dropping a bystander key without refreshing it!"
387387+ );
388388+ } else {
389389+ // you thought things couldn't get weirder? (i mean hopefully they can't)
390390+ log::error!("identity refresh: queue de-sync + bystander key also de-sync!?");
391391+ }
392392+ return Err(IdentityError::RefreshQueueKeyError(
393393+ "wrong key at front of queue",
394394+ ));
395395+ }
396396+397397+ if q.items.remove(key) {
398398+ Ok(())
399399+ } else {
400400+ log::error!("identity refresh: queue de-sync: key not in items");
401401+ Err(IdentityError::RefreshQueueKeyError("key not in items"))
402402+ }
403403+ }
404404+405405+ /// run the refresh queue consumer
406406+ pub async fn run_refresher(&self) -> Result<(), IdentityError> {
407407+ let _guard = self
408408+ .refresher
409409+ .try_lock()
410410+ .expect("there to only be one refresher running");
411411+ loop {
412412+ let Some(task_key) = self.peek_refresh().await else {
413413+ tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
414414+ continue;
415415+ };
416416+ match task_key {
417417+ IdentityKey::Handle(ref handle) => {
418418+ log::trace!("refreshing handle {handle:?}");
419419+ match self.handle_resolver.resolve(handle).await {
420420+ Ok(did) => {
421421+ self.cache.insert(
422422+ task_key.clone(),
423423+ IdentityVal(UtcDateTime::now(), IdentityData::Did(did)),
424424+ );
425425+ }
426426+ Err(atrium_identity::Error::NotFound) => {
427427+ self.cache.insert(
428428+ task_key.clone(),
429429+ IdentityVal(UtcDateTime::now(), IdentityData::NotFound),
430430+ );
431431+ }
432432+ Err(err) => {
433433+ log::warn!(
434434+ "failed to refresh handle: {err:?}. leaving stale (should we eventually do something?)"
435435+ );
436436+ }
437437+ }
438438+ self.complete_refresh(&task_key).await?; // failures are bugs, so break loop
439439+ }
440440+ IdentityKey::Did(ref did) => {
441441+ log::trace!("refreshing did doc: {did:?}");
442442+443443+ match self.did_resolver.resolve(did).await {
444444+ Ok(did_doc) => {
445445+ // TODO: fix in atrium: should verify id is did
446446+ if did_doc.id != did.to_string() {
447447+ log::warn!(
448448+ "refreshed did doc failed: wrong did doc id. dropping refresh."
449449+ );
450450+ continue;
451451+ }
452452+ let mini_doc = match did_doc.try_into() {
453453+ Ok(md) => md,
454454+ Err(e) => {
455455+ log::warn!(
456456+ "converting mini doc failed: {e:?}. dropping refresh."
457457+ );
458458+ continue;
459459+ }
460460+ };
461461+ self.cache.insert(
462462+ task_key.clone(),
463463+ IdentityVal(UtcDateTime::now(), IdentityData::Doc(mini_doc)),
464464+ );
465465+ }
466466+ Err(atrium_identity::Error::NotFound) => {
467467+ self.cache.insert(
468468+ task_key.clone(),
469469+ IdentityVal(UtcDateTime::now(), IdentityData::NotFound),
470470+ );
471471+ }
472472+ Err(err) => {
473473+ log::warn!(
474474+ "failed to refresh did doc: {err:?}. leaving stale (should we eventually do something?)"
475475+ );
476476+ }
477477+ }
478478+479479+ self.complete_refresh(&task_key).await?; // failures are bugs, so break loop
480480+ }
481481+ }
482482+ }
483483+ }
484484+}
485485+486486+pub struct HickoryDnsTxtResolver(TokioResolver);
487487+488488+impl HickoryDnsTxtResolver {
489489+ fn new() -> Result<Self, ResolveError> {
490490+ Ok(Self(TokioResolver::builder_tokio()?.build()))
491491+ }
492492+}
493493+494494+impl DnsTxtResolver for HickoryDnsTxtResolver {
495495+ async fn resolve(
496496+ &self,
497497+ query: &str,
498498+ ) -> core::result::Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
499499+ match self.0.txt_lookup(query).await {
500500+ Ok(r) => {
501501+ metrics::counter!("whoami_resolve_dns_txt", "success" => "true").increment(1);
502502+ Ok(r.iter().map(|r| r.to_string()).collect())
503503+ }
504504+ Err(e) => {
505505+ metrics::counter!("whoami_resolve_dns_txt", "success" => "false").increment(1);
506506+ Err(e.into())
507507+ }
508508+ }
509509+ }
510510+}
+3-1
slingshot/src/lib.rs
···11mod consumer;
22pub mod error;
33mod firehose_cache;
44+mod identity;
45mod record;
56mod server;
6778pub use consumer::consume;
89pub use firehose_cache::firehose_cache;
99-pub use record::CachedRecord;
1010+pub use identity::Identity;
1111+pub use record::{CachedRecord, Repo};
1012pub use server::serve;
+34-3
slingshot/src/main.rs
···11// use foyer::HybridCache;
22// use foyer::{Engine, DirectFsDeviceOptions, HybridCacheBuilder};
33use metrics_exporter_prometheus::PrometheusBuilder;
44-use slingshot::{consume, error::MainTaskError, firehose_cache, serve};
44+use slingshot::{Identity, Repo, consume, error::MainTaskError, firehose_cache, serve};
55+use std::path::PathBuf;
5667use clap::Parser;
78use tokio_util::sync::CancellationToken;
···1920 /// reduces CPU at the expense of more ingress bandwidth
2021 #[arg(long, action)]
2122 jetstream_no_zstd: bool,
2323+ /// where to keep disk caches
2424+ #[arg(long)]
2525+ cache_dir: PathBuf,
2226}
23272428#[tokio::main]
···3842 log::info!("metrics listening at http://0.0.0.0:8765");
3943 }
40444545+ std::fs::create_dir_all(&args.cache_dir).map_err(|e| {
4646+ format!(
4747+ "failed to ensure cache parent dir: {e:?} (dir: {:?})",
4848+ args.cache_dir
4949+ )
5050+ })?;
5151+ let cache_dir = args.cache_dir.canonicalize().map_err(|e| {
5252+ format!(
5353+ "failed to canonicalize cache_dir: {e:?} (dir: {:?})",
5454+ args.cache_dir
5555+ )
5656+ })?;
5757+ log::info!("cache dir ready at at {cache_dir:?}.");
5858+4159 log::info!("setting up firehose cache...");
4242- let cache = firehose_cache("./foyer").await?;
6060+ let cache = firehose_cache(cache_dir.join("./firehose")).await?;
4361 log::info!("firehose cache ready.");
44624563 let mut tasks: tokio::task::JoinSet<Result<(), MainTaskError>> = tokio::task::JoinSet::new();
46646565+ log::info!("starting identity service...");
6666+ let identity = Identity::new(cache_dir.join("./identity"))
6767+ .await
6868+ .map_err(|e| format!("identity setup failed: {e:?}"))?;
6969+ log::info!("identity service ready.");
7070+ let identity_refresher = identity.clone();
7171+ tasks.spawn(async move {
7272+ identity_refresher.run_refresher().await?;
7373+ Ok(())
7474+ });
7575+7676+ let repo = Repo::new(identity);
7777+4778 let server_shutdown = shutdown.clone();
4879 let server_cache_handle = cache.clone();
4980 tasks.spawn(async move {
5050- serve(server_cache_handle, server_shutdown).await?;
8181+ serve(server_cache_handle, repo, server_shutdown).await?;
5182 Ok(())
5283 });
5384
+100-1
slingshot/src/record.rs
···11-use jetstream::exports::Cid;
11+//! cached record storage
22+33+use crate::{Identity, error::RecordError};
44+use atrium_api::types::string::{Cid, Did, Handle};
55+use reqwest::Client;
26use serde::{Deserialize, Serialize};
37use serde_json::value::RawValue;
88+use std::str::FromStr;
99+use std::time::Duration;
1010+use url::Url;
411512#[derive(Debug, Serialize, Deserialize)]
613pub struct RawRecord {
···3441 Found(RawRecord),
3542 Deleted,
3643}
4444+4545+//////// upstream record fetching
4646+4747+#[derive(Deserialize)]
4848+struct RecordResponseObject {
4949+ #[allow(dead_code)] // expect it to be there but we ignore it
5050+ uri: String,
5151+ /// CID for this exact version of the record
5252+ ///
5353+ /// this is optional in the spec and that's potentially TODO for slingshot
5454+ cid: Option<String>,
5555+ /// the record itself as JSON
5656+ value: Box<RawValue>,
5757+}
5858+5959+#[derive(Clone)]
6060+pub struct Repo {
6161+ identity: Identity,
6262+ client: Client,
6363+}
6464+6565+impl Repo {
6666+ pub fn new(identity: Identity) -> Self {
6767+ let client = Client::builder()
6868+ .user_agent(format!(
6969+ "microcosm slingshot v{} (dev: @bad-example.com)",
7070+ env!("CARGO_PKG_VERSION")
7171+ ))
7272+ .no_proxy()
7373+ .timeout(Duration::from_secs(10))
7474+ .build()
7575+ .unwrap();
7676+ Repo { identity, client }
7777+ }
7878+7979+ pub async fn get_record(
8080+ &self,
8181+ did_or_handle: String,
8282+ collection: String,
8383+ rkey: String,
8484+ cid: Option<String>,
8585+ ) -> Result<CachedRecord, RecordError> {
8686+ let did = match Did::new(did_or_handle.clone()) {
8787+ Ok(did) => did,
8888+ Err(_) => {
8989+ let handle = Handle::new(did_or_handle).map_err(|_| RecordError::BadRepo)?;
9090+ let Some(did) = self.identity.handle_to_did(handle).await? else {
9191+ return Err(RecordError::NotFound("could not resolve and verify handle"));
9292+ };
9393+ did
9494+ }
9595+ };
9696+ let Some(pds) = self.identity.did_to_pds(did.clone()).await? else {
9797+ return Err(RecordError::NotFound("could not get pds for DID"));
9898+ };
9999+100100+ // TODO: throttle by host probably, generally guard against outgoing requests
101101+102102+ let mut params = vec![
103103+ ("repo", did.to_string()),
104104+ ("collection", collection),
105105+ ("rkey", rkey),
106106+ ];
107107+ if let Some(cid) = cid {
108108+ params.push(("cid", cid));
109109+ }
110110+ let mut url = Url::parse_with_params(&pds, ¶ms)?;
111111+ url.set_path("/xrpc/com.atproto.repo.getRecord");
112112+113113+ let res = self
114114+ .client
115115+ .get(url)
116116+ .send()
117117+ .await
118118+ .map_err(RecordError::SendError)?
119119+ .error_for_status()
120120+ .map_err(RecordError::StatusError)? // TODO atproto error handling (think about handling not found)
121121+ .json::<RecordResponseObject>()
122122+ .await
123123+ .map_err(RecordError::ParseJsonError)?; // todo...
124124+125125+ let Some(cid) = res.cid else {
126126+ return Err(RecordError::MissingUpstreamCid);
127127+ };
128128+ let cid = Cid::from_str(&cid).map_err(|e| RecordError::BadUpstreamCid(e.to_string()))?;
129129+130130+ Ok(CachedRecord::Found(RawRecord {
131131+ cid,
132132+ record: res.value.to_string(),
133133+ }))
134134+ }
135135+}
+38-11
slingshot/src/server.rs
···11-use crate::{CachedRecord, error::ServerError};
11+use crate::{CachedRecord, Repo, error::ServerError};
22use foyer::HybridCache;
33+use std::sync::Arc;
34use tokio_util::sync::CancellationToken;
4556use poem::{Route, Server, listener::TcpListener};
···94959596struct Xrpc {
9697 cache: HybridCache<String, CachedRecord>,
9898+ repo: Arc<Repo>,
9799}
9810099101#[OpenApi]
···112114 ///
113115 /// NOTE: handles should be accepted here but this is still TODO in slingshot
114116 #[oai(example = "example_did")]
115115- repo: Query<String>,
117117+ Query(repo): Query<String>,
116118 /// The NSID of the record collection
117119 #[oai(example = "example_collection")]
118118- collection: Query<String>,
120120+ Query(collection): Query<String>,
119121 /// The Record key
120122 #[oai(example = "example_rkey")]
121121- rkey: Query<String>,
123123+ Query(rkey): Query<String>,
122124 /// Optional: the CID of the version of the record.
123125 ///
124126 /// If not specified, then return the most recent version.
···126128 /// If specified and a newer version of the record exists, returns 404 not
127129 /// found. That is: slingshot only retains the most recent version of a
128130 /// record.
129129- cid: Query<Option<String>>,
131131+ Query(cid): Query<Option<String>>,
130132 ) -> GetRecordResponse {
131133 // TODO: yeah yeah
132132- let at_uri = format!("at://{}/{}/{}", &*repo, &*collection, &*rkey);
134134+ let at_uri = format!("at://{repo}/{collection}/{rkey}");
133135134136 let entry = self
135137 .cache
136136- .fetch(at_uri.clone(), || async move { todo!() })
138138+ .fetch(at_uri.clone(), {
139139+ let cid = cid.clone();
140140+ let repo_api = self.repo.clone();
141141+ || async move {
142142+ repo_api
143143+ .get_record(repo, collection, rkey, cid)
144144+ .await
145145+ .map_err(|e| foyer::Error::Other(Box::new(e)))
146146+ }
147147+ })
137148 .await
138138- .unwrap();
149149+ .unwrap(); // todo
139150140151 // TODO: actual 404
141152···165176 })),
166177 }
167178 }
179179+180180+ // TODO
181181+ // #[oai(path = "/com.atproto.identity.resolveHandle", method = "get")]
182182+ // #[oai(path = "/com.atproto.identity.resolveDid", method = "get")]
183183+ // but these are both not specified to do bidirectional validation, which is what we want to offer
184184+ // com.atproto.identity.resolveIdentity seems right, but requires returning the full did-doc
185185+ // would be nice if there were two queries:
186186+ // did -> verified handle + pds url
187187+ // handle -> verified did + pds url
188188+ //
189189+ // we could do horrible things and implement resolveIdentity with only a stripped-down fake did doc
190190+ // but this will *definitely* cause problems because eg. we're not currently storing pubkeys and
191191+ // those are a little bit important
168192}
169193170194pub async fn serve(
171195 cache: HybridCache<String, CachedRecord>,
196196+ repo: Repo,
172197 _shutdown: CancellationToken,
173198) -> Result<(), ServerError> {
174174- let api_service = OpenApiService::new(Xrpc { cache }, "Slingshot", env!("CARGO_PKG_VERSION"))
175175- .server("http://localhost:3000")
176176- .url_prefix("/xrpc");
199199+ let repo = Arc::new(repo);
200200+ let api_service =
201201+ OpenApiService::new(Xrpc { cache, repo }, "Slingshot", env!("CARGO_PKG_VERSION"))
202202+ .server("http://localhost:3000")
203203+ .url_prefix("/xrpc");
177204178205 let app = Route::new()
179206 .nest("/", api_service.scalar())