···2929```
30303131There's also a [`justfile`](https://just.systems/) for Makefile-esque commands to be run inside of the devShell, and you can generally `cargo ...` or `just ...` whatever just fine if you don't want to use Nix and have the prerequisites installed.
3232+3333+3434+3535+### String types
3636+Something of a note to self. Developing a pattern with the string types (may macro-ify at some point). Each needs:
3737+- new(): constructing from a string slice with the right lifetime that borrows
3838+- new_owned(): constructing from an impl AsRef<str>, taking ownership
3939+- new_static(): construction from a &'static str, using SmolStr's/CowStr's new_static() constructor to not allocate
4040+- raw(): same as new() but panics instead of erroring
4141+- unchecked(): same as new() but doesn't validate. marked unsafe.
4242+- as_str(): does what it says on the tin
4343+#### Traits:
4444+- Serialize + Deserialize (custom impl for latter, sometimes for former)
4545+- FromStr
4646+- Display
4747+- Debug, PartialEq, Eq, Hash, Clone
4848+- From<T> for String, CowStr, SmolStr,
4949+- From<String>, From<CowStr>, From<SmolStr>, or TryFrom if likely enough to fail in practice to make panics common
5050+- AsRef<str>
5151+- Deref with Target = str (usually)
5252+5353+Use `#[repr(transparent)]` as much as possible. Main exception is at-uri type and components.
5454+Use SmolStr directly as the inner type if most or all of the instances will be under 24 bytes, save lifetime headaches.
5555+Use CowStr for longer to allow for borrowing from input.
5656+5757+TODO: impl IntoStatic trait to take ownership of string types
+1-1
crates/jacquard-common/Cargo.toml
···88[dependencies]
99chrono = "0.4.42"
1010cid = { version = "0.11.1", features = ["serde", "std"] }
1111-compact_str = "0.9.0"
1211miette = "7.6.0"
1312multibase = "0.9.1"
1413multihash = "0.19.3"
···1615serde = { version = "1.0.227", features = ["derive"] }
1716serde_html_form = "0.2.8"
1817serde_json = "1.0.145"
1818+smol_str = { version = "0.3.2", features = ["serde"] }
1919thiserror = "2.0.16"
+13-9
crates/jacquard-common/src/cowstr.rs
···11-use compact_str::CompactString;
21use serde::{Deserialize, Serialize};
22+use smol_str::SmolStr;
33use std::{
44 borrow::Cow,
55 fmt,
···1010use crate::IntoStatic;
11111212/// Shamelessly copied from https://github.com/bearcove/merde
1313-/// A copy-on-write string type that uses [`CompactString`] for
1313+/// A copy-on-write immutable string type that uses [`SmolStr`] for
1414/// the "owned" variant.
1515///
1616/// The standard [`Cow`] type cannot be used, since
1717-/// `<str as ToOwned>::Owned` is `String`, and not `CompactString`.
1717+/// `<str as ToOwned>::Owned` is `String`, and not `SmolStr`.
1818#[derive(Clone)]
1919pub enum CowStr<'s> {
2020 Borrowed(&'s str),
2121- Owned(CompactString),
2121+ Owned(SmolStr),
2222}
23232424impl CowStr<'static> {
···2626 /// if the `compact_str` feature is disabled, or if the string is longer
2727 /// than `MAX_INLINE_SIZE`.
2828 pub fn copy_from_str(s: &str) -> Self {
2929- Self::Owned(CompactString::from(s))
2929+ Self::Owned(SmolStr::from(s))
3030+ }
3131+3232+ pub fn new_static(s: &'static str) -> Self {
3333+ Self::Owned(SmolStr::new_static(s))
3034 }
3135}
3236···38423943 #[inline]
4044 pub fn from_utf8_owned(s: Vec<u8>) -> Result<Self, std::str::Utf8Error> {
4141- Ok(Self::Owned(CompactString::from_utf8(s)?))
4545+ Ok(Self::Owned(SmolStr::new(std::str::from_utf8(&s)?)))
4246 }
43474448 #[inline]
4549 pub fn from_utf8_lossy(s: &'s [u8]) -> Self {
4646- Self::Owned(CompactString::from_utf8_lossy(s))
5050+ Self::Owned(String::from_utf8_lossy(&s).into())
4751 }
48524953 /// # Safety
···5155 /// This function is unsafe because it does not check that the bytes are valid UTF-8.
5256 #[inline]
5357 pub unsafe fn from_utf8_unchecked(s: &'s [u8]) -> Self {
5454- unsafe { Self::Owned(CompactString::from_utf8_unchecked(s)) }
5858+ unsafe { Self::Owned(SmolStr::new(std::str::from_utf8_unchecked(s))) }
5559 }
5660}
5761···133137 fn from(s: CowStr<'_>) -> Self {
134138 match s {
135139 CowStr::Borrowed(s) => s.into(),
136136- CowStr::Owned(s) => s.into(),
140140+ CowStr::Owned(s) => String::from(s).into_boxed_str(),
137141 }
138142 }
139143}
+8
crates/jacquard-common/src/types.rs
···11pub mod aturi;
22pub mod blob;
33pub mod cid;
44+pub mod collection;
45pub mod datetime;
56pub mod did;
67pub mod handle;
···89pub mod integer;
910pub mod link;
1011pub mod nsid;
1212+pub mod recordkey;
1113pub mod tid;
1414+1515+/// Trait for a constant string literal type
1616+pub trait Literal: Clone + Copy + PartialEq + Eq + Send + Sync + 'static {
1717+ /// The string literal
1818+ const LITERAL: &'static str;
1919+}
+200-73
crates/jacquard-common/src/types/aturi.rs
···11+use crate::CowStr;
22+use crate::types::ident::AtIdentifier;
33+use crate::types::nsid::Nsid;
44+use crate::types::recordkey::{RecordKey, Rkey};
55+use regex::Regex;
66+use serde::Serializer;
77+use serde::{Deserialize, Deserializer, Serialize, de::Error};
88+use smol_str::ToSmolStr;
19use std::fmt;
210use std::sync::LazyLock;
311use std::{ops::Deref, str::FromStr};
41255-use compact_str::ToCompactString;
66-use serde::{Deserialize, Deserializer, Serialize, de::Error};
1313+/// at:// URI type
1414+///
1515+/// based on the regex here: https://github.com/bluesky-social/atproto/blob/main/packages/syntax/src/aturi_validation.ts
1616+///
1717+/// Doesn't support the query segment, but then neither does the Typescript SDK
1818+///
1919+/// TODO: support IntoStatic on string types. For composites like this where all borrow from (present) input,
2020+/// perhaps use some careful unsafe to launder the lifetimes.
2121+#[derive(Clone, PartialEq, Eq, Hash, Debug)]
2222+pub struct AtUri<'u> {
2323+ uri: CowStr<'u>,
2424+ pub authority: AtIdentifier<'u>,
2525+ pub path: Option<UriPath<'u>>,
2626+ pub fragment: Option<CowStr<'u>>,
2727+}
72888-use crate::{CowStr, IntoStatic};
99-use regex::Regex;
2929+/// at:// URI path component (current subset)
3030+#[derive(Clone, PartialEq, Eq, Hash, Debug)]
3131+pub struct UriPath<'u> {
3232+ pub collection: Nsid<'u>,
3333+ pub rkey: Option<RecordKey<Rkey<'u>>>,
3434+}
10351111-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)]
1212-#[serde(transparent)]
1313-pub struct AtUri<'a>(CowStr<'a>);
3636+pub type UriPathBuf = UriPath<'static>;
14371515-pub static AT_URI_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^$").unwrap());
3838+pub static ATURI_REGEX: LazyLock<Regex> = LazyLock::new(|| {
3939+ Regex::new(r##"^at://(?<authority>[a-zA-Z0-9._:%-]+)(/(?<collection>[a-zA-Z0-9-.]+)(/(?<rkey>[a-zA-Z0-9._~:@!$&%')(*+,;=-]+))?)?(#(?<fragment>/[a-zA-Z0-9._~:@!$&%')(*+,;=-[]/\]*))?$"##).unwrap()
4040+});
16411717-impl<'a> AtUri<'a> {
4242+impl<'u> AtUri<'u> {
1843 /// Fallible constructor, validates, borrows from input
1919- pub fn new(uri: &'a str) -> Result<Self, &'static str> {
2020- if uri.len() > 2048 {
2121- Err("AT_URI too long")
2222- } else if !AT_URI_REGEX.is_match(uri) {
2323- Err("Invalid AT_URI")
4444+ pub fn new(uri: &'u str) -> Result<Self, &'static str> {
4545+ if let Some(parts) = ATURI_REGEX.captures(uri) {
4646+ if let Some(authority) = parts.name("authority") {
4747+ let authority = AtIdentifier::new(authority.as_str())?;
4848+ let path = if let Some(collection) = parts.name("collection") {
4949+ let collection = Nsid::new(collection.as_str())?;
5050+ let rkey = if let Some(rkey) = parts.name("rkey") {
5151+ let rkey = RecordKey::from(Rkey::new(rkey.as_str())?);
5252+ Some(rkey)
5353+ } else {
5454+ None
5555+ };
5656+ Some(UriPath { collection, rkey })
5757+ } else {
5858+ None
5959+ };
6060+ let fragment = parts.name("fragment").map(|fragment| {
6161+ let fragment = CowStr::Borrowed(fragment.as_str());
6262+ fragment
6363+ });
6464+ Ok(AtUri {
6565+ uri: CowStr::Borrowed(uri),
6666+ authority,
6767+ path,
6868+ fragment,
6969+ })
7070+ } else {
7171+ Err("at:// URI missing authority")
7272+ }
2473 } else {
2525- Ok(Self(CowStr::Borrowed(uri)))
7474+ Err("Invalid at:// URI via regex")
2675 }
2776 }
28772929- /// Fallible constructor from an existing CowStr, clones and takes
3030- pub fn from_cowstr(uri: CowStr<'a>) -> Result<AtUri<'a>, &'static str> {
3131- if uri.len() > 2048 {
3232- Err("AT_URI too long")
3333- } else if !AT_URI_REGEX.is_match(&uri) {
3434- Err("Invalid AT_URI")
7878+ pub fn new_owned(uri: impl AsRef<str>) -> Result<Self, &'static str> {
7979+ let uri = uri.as_ref();
8080+ if let Some(parts) = ATURI_REGEX.captures(uri) {
8181+ if let Some(authority) = parts.name("authority") {
8282+ let authority = AtIdentifier::new_owned(authority.as_str())?;
8383+ let path = if let Some(collection) = parts.name("collection") {
8484+ let collection = Nsid::new_owned(collection.as_str())?;
8585+ let rkey = if let Some(rkey) = parts.name("rkey") {
8686+ let rkey = RecordKey::from(Rkey::new_owned(rkey.as_str())?);
8787+ Some(rkey)
8888+ } else {
8989+ None
9090+ };
9191+ Some(UriPath { collection, rkey })
9292+ } else {
9393+ None
9494+ };
9595+ let fragment = parts.name("fragment").map(|fragment| {
9696+ let fragment = CowStr::Owned(fragment.as_str().to_smolstr());
9797+ fragment
9898+ });
9999+ Ok(AtUri {
100100+ uri: CowStr::Owned(uri.to_smolstr()),
101101+ authority,
102102+ path,
103103+ fragment,
104104+ })
105105+ } else {
106106+ Err("at:// URI missing authority")
107107+ }
35108 } else {
3636- Ok(Self(uri.into_static()))
109109+ Err("Invalid at:// URI via regex")
37110 }
38111 }
391124040- /// Infallible constructor for when you *know* the string slice is a valid at:// uri.
4141- /// Will panic on invalid URIs. If you're manually decoding atproto records
4242- /// or API values you know are valid (rather than using serde), this is the one to use.
4343- /// The From<String> and From<CowStr> impls use the same logic.
4444- pub fn raw(uri: &'a str) -> Self {
4545- if uri.len() > 2048 {
4646- panic!("AT_URI too long")
4747- } else if !AT_URI_REGEX.is_match(uri) {
4848- panic!("Invalid AT_URI")
113113+ pub fn new_static(uri: &'static str) -> Result<AtUri<'static>, &'static str> {
114114+ let uri = uri.as_ref();
115115+ if let Some(parts) = ATURI_REGEX.captures(uri) {
116116+ if let Some(authority) = parts.name("authority") {
117117+ let authority = AtIdentifier::new_static(authority.as_str())?;
118118+ let path = if let Some(collection) = parts.name("collection") {
119119+ let collection = Nsid::new_static(collection.as_str())?;
120120+ let rkey = if let Some(rkey) = parts.name("rkey") {
121121+ let rkey = RecordKey::from(Rkey::new_static(rkey.as_str())?);
122122+ Some(rkey)
123123+ } else {
124124+ None
125125+ };
126126+ Some(UriPath { collection, rkey })
127127+ } else {
128128+ None
129129+ };
130130+ let fragment = parts.name("fragment").map(|fragment| {
131131+ let fragment = CowStr::new_static(fragment.as_str());
132132+ fragment
133133+ });
134134+ Ok(AtUri {
135135+ uri: CowStr::new_static(uri),
136136+ authority,
137137+ path,
138138+ fragment,
139139+ })
140140+ } else {
141141+ Err("at:// URI missing authority")
142142+ }
49143 } else {
5050- Self(CowStr::Borrowed(uri))
144144+ Err("Invalid at:// URI via regex")
51145 }
52146 }
531475454- /// Infallible constructor for when you *know* the string is a valid AT_URI.
5555- /// Marked unsafe because responsibility for upholding the invariant is on the developer.
5656- pub unsafe fn unchecked(uri: &'a str) -> Self {
5757- Self(CowStr::Borrowed(uri))
148148+ pub unsafe fn unchecked(uri: &'u str) -> Self {
149149+ if let Some(parts) = ATURI_REGEX.captures(uri) {
150150+ if let Some(authority) = parts.name("authority") {
151151+ let authority = unsafe { AtIdentifier::unchecked(authority.as_str()) };
152152+ let path = if let Some(collection) = parts.name("collection") {
153153+ let collection = unsafe { Nsid::unchecked(collection.as_str()) };
154154+ let rkey = if let Some(rkey) = parts.name("rkey") {
155155+ let rkey = RecordKey::from(unsafe { Rkey::unchecked(rkey.as_str()) });
156156+ Some(rkey)
157157+ } else {
158158+ None
159159+ };
160160+ Some(UriPath { collection, rkey })
161161+ } else {
162162+ None
163163+ };
164164+ let fragment = parts.name("fragment").map(|fragment| {
165165+ let fragment = CowStr::Borrowed(fragment.as_str());
166166+ fragment
167167+ });
168168+ AtUri {
169169+ uri: CowStr::Borrowed(uri),
170170+ authority,
171171+ path,
172172+ fragment,
173173+ }
174174+ } else {
175175+ Self {
176176+ uri: CowStr::Borrowed(uri),
177177+ authority: unsafe { AtIdentifier::unchecked(uri) },
178178+ path: None,
179179+ fragment: None,
180180+ }
181181+ }
182182+ } else {
183183+ Self {
184184+ uri: CowStr::Borrowed(uri),
185185+ authority: unsafe { AtIdentifier::unchecked(uri) },
186186+ path: None,
187187+ fragment: None,
188188+ }
189189+ }
58190 }
5919160192 pub fn as_str(&self) -> &str {
61193 {
6262- let this = &self.0;
194194+ let this = &self.uri;
63195 this
64196 }
65197 }
···69201 type Err = &'static str;
7020271203 /// Has to take ownership due to the lifetime constraints of the FromStr trait.
7272- /// Prefer `AtUri::new()` or `AtUri::raw` if you want to borrow.
204204+ /// Prefer `AtUri::new()` or `AtUri::raw()` if you want to borrow.
73205 fn from_str(s: &str) -> Result<Self, Self::Err> {
7474- Self::from_cowstr(CowStr::Owned(s.to_compact_string()))
206206+ Self::new_owned(s)
75207 }
76208}
772097878-impl<'ae> Deserialize<'ae> for AtUri<'ae> {
210210+impl<'de> Deserialize<'de> for AtUri<'de> {
79211 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
80212 where
8181- D: Deserializer<'ae>,
213213+ D: Deserializer<'de>,
82214 {
83215 let value = Deserialize::deserialize(deserializer)?;
84216 Self::new(value).map_err(D::Error::custom)
85217 }
86218}
872198888-impl fmt::Display for AtUri<'_> {
8989- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
9090- f.write_str(&self.0)
220220+impl Serialize for AtUri<'_> {
221221+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
222222+ where
223223+ S: Serializer,
224224+ {
225225+ serializer.serialize_str(&self.uri)
91226 }
92227}
932289494-impl<'a> From<AtUri<'a>> for String {
9595- fn from(value: AtUri<'a>) -> Self {
9696- value.0.to_string()
229229+impl fmt::Display for AtUri<'_> {
230230+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
231231+ f.write_str(&self.uri)
97232 }
98233}
99234100100-impl<'s> From<&'s AtUri<'_>> for &'s str {
101101- fn from(value: &'s AtUri<'_>) -> Self {
102102- value.0.as_ref()
235235+impl<'d> From<AtUri<'d>> for String {
236236+ fn from(value: AtUri<'d>) -> Self {
237237+ value.uri.to_string()
103238 }
104239}
105240106106-impl<'a> From<AtUri<'a>> for CowStr<'a> {
107107- fn from(value: AtUri<'a>) -> Self {
108108- value.0
241241+impl<'d> From<AtUri<'d>> for CowStr<'d> {
242242+ fn from(value: AtUri<'d>) -> Self {
243243+ value.uri
109244 }
110245}
111246112112-impl From<String> for AtUri<'static> {
113113- fn from(value: String) -> Self {
114114- if value.len() > 2048 {
115115- panic!("AT_URI too long")
116116- } else if !AT_URI_REGEX.is_match(&value) {
117117- panic!("Invalid AT_URI")
118118- } else {
119119- Self(CowStr::Owned(value.to_compact_string()))
120120- }
247247+impl TryFrom<String> for AtUri<'static> {
248248+ type Error = &'static str;
249249+250250+ fn try_from(value: String) -> Result<Self, Self::Error> {
251251+ Self::new_owned(&value)
121252 }
122253}
123254124124-impl<'a> From<CowStr<'a>> for AtUri<'a> {
125125- fn from(value: CowStr<'a>) -> Self {
126126- if value.len() > 2048 {
127127- panic!("AT_URI too long")
128128- } else if !AT_URI_REGEX.is_match(&value) {
129129- panic!("Invalid AT_URI")
130130- } else {
131131- Self(value)
132132- }
255255+impl<'d> TryFrom<CowStr<'d>> for AtUri<'d> {
256256+ type Error = &'static str;
257257+ /// TODO: rewrite to avoid taking ownership/cloning
258258+ fn try_from(value: CowStr<'d>) -> Result<Self, Self::Error> {
259259+ Self::new_owned(value)
133260 }
134261}
135262136263impl AsRef<str> for AtUri<'_> {
137264 fn as_ref(&self) -> &str {
138138- self.as_str()
265265+ &self.uri.as_ref()
139266 }
140267}
141268···143270 type Target = str;
144271145272 fn deref(&self) -> &Self::Target {
146146- self.as_str()
273273+ self.uri.as_ref()
147274 }
148275}
···11-use std::{convert::Infallible, fmt, marker::PhantomData, ops::Deref, str::FromStr};
22-33-use compact_str::ToCompactString;
44-use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Visitor};
55-11+use crate::{CowStr, IntoStatic};
62pub use cid::Cid as IpldCid;
77-88-use crate::CowStr;
33+use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Visitor};
44+use smol_str::ToSmolStr;
55+use std::{convert::Infallible, fmt, marker::PhantomData, ops::Deref, str::FromStr};
96107/// raw
118pub const ATP_CID_CODEC: u64 = 0x55;
···4744 let s = CowStr::Owned(
4845 cid.to_string_of_base(ATP_CID_BASE)
4946 .unwrap_or_default()
5050- .to_compact_string(),
4747+ .to_smolstr(),
5148 );
5249 Self::Ipld { cid, s }
5350 }
···89869087 /// Has to take ownership due to the lifetime constraints of the FromStr trait.
9188 fn from_str(s: &str) -> Result<Self, Self::Err> {
9292- Ok(Cid::Str(CowStr::Owned(s.to_compact_string())))
8989+ Ok(Cid::Str(CowStr::Owned(s.to_smolstr())))
9090+ }
9191+}
9292+9393+impl IntoStatic for Cid<'_> {
9494+ type Output = Cid<'static>;
9595+9696+ fn into_static(self) -> Self::Output {
9797+ match self {
9898+ Cid::Ipld { cid, s } => Cid::Ipld {
9999+ cid,
100100+ s: s.into_static(),
101101+ },
102102+ Cid::Str(cow_str) => Cid::Str(cow_str.into_static()),
103103+ }
93104 }
94105}
95106···164175165176impl From<String> for Cid<'_> {
166177 fn from(value: String) -> Self {
167167- Cid::Str(CowStr::Owned(value.to_compact_string()))
178178+ Cid::Str(CowStr::Owned(value.to_smolstr()))
168179 }
169180}
170181
+52
crates/jacquard-common/src/types/collection.rs
···11+use core::fmt;
22+33+use serde::{Serialize, de};
44+55+use crate::types::{
66+ aturi::UriPath,
77+ nsid::Nsid,
88+ recordkey::{RecordKey, RecordKeyType, Rkey},
99+};
1010+1111+/// Trait for a collection of records that can be stored in a repository.
1212+///
1313+/// The records all have the same Lexicon schema.
1414+pub trait Collection: fmt::Debug {
1515+ /// The NSID for the Lexicon that defines the schema of records in this collection.
1616+ const NSID: &'static str;
1717+1818+ /// This collection's record type.
1919+ type Record: fmt::Debug + de::DeserializeOwned + Serialize;
2020+2121+ /// Returns the [`Nsid`] for the Lexicon that defines the schema of records in this
2222+ /// collection.
2323+ ///
2424+ /// This is a convenience method that parses [`Self::NSID`].
2525+ ///
2626+ /// # Panics
2727+ ///
2828+ /// Panics if [`Self::NSID`] is not a valid NSID.
2929+ ///
3030+ /// [`Nsid`]: string::Nsid
3131+ fn nsid() -> crate::types::nsid::Nsid<'static> {
3232+ Nsid::new_static(Self::NSID).expect("should be valid NSID")
3333+ }
3434+3535+ /// Returns the repo path for a record in this collection with the given record key.
3636+ ///
3737+ /// Per the [Repo Data Structure v3] specification:
3838+ /// > Repo paths currently have a fixed structure of `<collection>/<record-key>`. This
3939+ /// > means a valid, normalized [`Nsid`], followed by a `/`, followed by a valid
4040+ /// > [`RecordKey`].
4141+ ///
4242+ /// [Repo Data Structure v3]: https://atproto.com/specs/repository#repo-data-structure-v3
4343+ /// [`Nsid`]: string::Nsid
4444+ fn repo_path<'u, T: RecordKeyType>(
4545+ rkey: &'u crate::types::recordkey::RecordKey<T>,
4646+ ) -> UriPath<'u> {
4747+ UriPath {
4848+ collection: Self::nsid(),
4949+ rkey: Some(RecordKey::from(Rkey::raw(rkey.as_ref()))),
5050+ }
5151+ }
5252+}