···44use crate::index_html::INDEX_HTML;
55use crate::storage::StoreReader;
66use crate::store_types::{HourTruncatedCursor, WeekTruncatedCursor};
77-use crate::{ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord};
77+use crate::{
88+ ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, NsidPrefix, OrderCollectionsBy, PrefixChild,
99+ UFOsRecord,
1010+};
811use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
912use chrono::{DateTime, Utc};
1013use collections_query::MultiCollectionQuery;
···379382 .into()
380383}
381384385385+#[derive(Debug, Serialize, JsonSchema)]
386386+struct PrefixResponse {
387387+ /// Note that total may not include counts beyond the current page (TODO)
388388+ total: JustCount,
389389+ children: Vec<PrefixChild>,
390390+ /// Include in a follow-up request to get the next page of results, if more are available
391391+ cursor: Option<String>,
392392+}
393393+#[derive(Debug, Deserialize, JsonSchema)]
394394+struct PrefixQuery {
395395+ ///
396396+ /// The final segment of a collection NSID is the `name`, and everything before it is called its `group`. eg:
397397+ ///
398398+ /// - `app.bsky.feed.post` and `app.bsky.feed.like` are both in the _lexicon group_ "`app.bsky.feed`".
399399+ ///
400400+ prefix: String,
401401+ /// The maximum number of collections to return in one request.
402402+ ///
403403+ /// The number of items actually returned may be less than the limit. If paginating, this does **not** indicate that no
404404+ /// more items are available! Check if the `cursor` in the response is `null` to determine the end of items.
405405+ ///
406406+ /// Default: `100` normally, `32` if `order` is specified.
407407+ #[schemars(range(min = 1, max = 200))]
408408+ limit: Option<usize>,
409409+ /// Get a paginated response with more collections.
410410+ ///
411411+ /// Always omit the cursor for the first request. If more collections than the limit are available, the response will contain a non-null `cursor` to include with the next request.
412412+ ///
413413+ /// `cursor` is mutually exclusive with `order`.
414414+ cursor: Option<String>,
415415+ /// Limit collections and statistics to those seen after this UTC datetime
416416+ ///
417417+ /// Default: all-time
418418+ since: Option<DateTime<Utc>>,
419419+ /// Limit collections and statistics to those seen before this UTC datetime
420420+ ///
421421+ /// Default: now
422422+ until: Option<DateTime<Utc>>,
423423+ /// Get a limited, sorted list
424424+ ///
425425+ /// Mutually exclusive with `cursor` -- sorted results cannot be paged.
426426+ order: Option<CollectionsQueryOrder>,
427427+}
428428+/// Prefix-filter collections list
429429+///
430430+/// This endpoint enumerates all collection NSIDs for a lexicon group.
431431+///
432432+/// ## To fetch a full list:
433433+///
434434+/// Omit the `order` parameter and page through the results using the `cursor`. There have been a lot of collections seen in the ATmosphere, well over 400 at time of writing, so you *will* need to make a series of paginaged requests with `cursor`s to get them all.
435435+///
436436+/// The set of collections across multiple requests is not guaranteed to be a perfectly consistent snapshot:
437437+///
438438+/// - all collection NSIDs observed before the first request will be included in the results
439439+///
440440+/// - *new* NSIDs observed in the firehose *while paging* might be included or excluded from the final set
441441+///
442442+/// - no duplicate NSIDs will occur in the combined results
443443+///
444444+/// In practice this is close enough for most use-cases to not worry about.
445445+///
446446+/// ## To fetch the top collection NSIDs:
447447+///
448448+/// Specify the `order` parameter (must be either `records-created` or `did-estimate`). Note that ordered results cannot be paged.
449449+///
450450+/// All statistics are bucketed hourly, so the most granular effecitve time boundary for `since` and `until` is one hour.
451451+#[endpoint {
452452+ method = GET,
453453+ path = "/prefix"
454454+}]
455455+async fn get_prefix(
456456+ ctx: RequestContext<Context>,
457457+ query: Query<PrefixQuery>,
458458+) -> OkCorsResponse<PrefixResponse> {
459459+ let Context { storage, .. } = ctx.context();
460460+ let q = query.into_inner();
461461+462462+ let prefix = NsidPrefix::new(&q.prefix).map_err(|e| {
463463+ HttpError::for_bad_request(
464464+ None,
465465+ format!("{:?} was not a valid NSID prefix: {e:?}", q.prefix),
466466+ )
467467+ })?;
468468+469469+ if q.cursor.is_some() && q.order.is_some() {
470470+ let msg = "`cursor` is mutually exclusive with `order`. ordered results cannot be paged.";
471471+ return Err(HttpError::for_bad_request(None, msg.to_string()));
472472+ }
473473+474474+ let order = if let Some(ref o) = q.order {
475475+ o.into()
476476+ } else {
477477+ let cursor = q
478478+ .cursor
479479+ .and_then(|c| if c.is_empty() { None } else { Some(c) })
480480+ .map(|c| URL_SAFE_NO_PAD.decode(&c))
481481+ .transpose()
482482+ .map_err(|e| HttpError::for_bad_request(None, format!("invalid cursor: {e:?}")))?;
483483+ OrderCollectionsBy::Lexi { cursor }
484484+ };
485485+486486+ let limit = match (q.limit, q.order) {
487487+ (Some(limit), _) => limit,
488488+ (None, Some(_)) => 32,
489489+ (None, None) => 100,
490490+ };
491491+492492+ if !(1..=200).contains(&limit) {
493493+ let msg = format!("limit not in 1..=200: {}", limit);
494494+ return Err(HttpError::for_bad_request(None, msg));
495495+ }
496496+497497+ let since = q.since.map(dt_to_cursor).transpose()?;
498498+ let until = q.until.map(dt_to_cursor).transpose()?;
499499+500500+ let (total, children, next_cursor) = storage
501501+ .get_prefix(prefix, limit, order, since, until)
502502+ .await
503503+ .map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
504504+505505+ let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c));
506506+507507+ OkCors(PrefixResponse {
508508+ total,
509509+ children,
510510+ cursor: next_cursor,
511511+ })
512512+ .into()
513513+}
514514+382515#[derive(Debug, Deserialize, JsonSchema)]
383516struct CollectionTimeseriesQuery {
384517 collection: String, // JsonSchema not implemented for Nsid :(
···471604 api.register(get_records_by_collections).unwrap();
472605 api.register(get_collection_stats).unwrap();
473606 api.register(get_collections).unwrap();
607607+ api.register(get_prefix).unwrap();
474608 api.register(get_timeseries).unwrap();
475609476610 let context = Context {