···1+use bytes::Bytes;
2+use miette::SourceSpan;
3+use serde::{Deserialize, Deserializer, Serialize, Serializer};
4+use smol_str::{SmolStr, ToSmolStr};
5+use std::{collections::BTreeMap, str::FromStr, sync::Arc};
6+7+use crate::IntoStatic;
8+pub use crate::{
9+ CowStr,
10+ types::{
11+ aturi::AtUri,
12+ cid::Cid,
13+ datetime::Datetime,
14+ did::Did,
15+ handle::Handle,
16+ ident::AtIdentifier,
17+ language::Language,
18+ nsid::Nsid,
19+ recordkey::{RecordKey, Rkey},
20+ tid::Tid,
21+ uri::Uri,
22+ },
23+};
24+25+/// ATProto string value
26+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
27+pub enum AtprotoStr<'s> {
28+ Datetime(Datetime),
29+ Language(Language),
30+ Tid(Tid),
31+ Nsid(Nsid<'s>),
32+ Did(Did<'s>),
33+ Handle(Handle<'s>),
34+ AtIdentifier(AtIdentifier<'s>),
35+ AtUri(AtUri<'s>),
36+ Uri(Uri<'s>),
37+ Cid(Cid<'s>),
38+ RecordKey(RecordKey<Rkey<'s>>),
39+ String(CowStr<'s>),
40+}
41+42+impl<'s> AtprotoStr<'s> {
43+ /// Borrowing constructor for bare atproto string values
44+ /// This is fairly exhaustive and potentially **slow**, prefer using anything
45+ /// that narrows down the search field quicker.
46+ ///
47+ /// Note: We don't construct record keys from bare strings in this because
48+ /// the type is too permissive and too many things would be classified as rkeys.
49+ ///
50+ /// Value object deserialization checks against the field names for common
51+ /// names (uri, cid, did, handle, createdAt, indexedAt, etc.) to improve
52+ /// performance of the happy path.
53+ pub fn new(string: &'s str) -> Self {
54+ // TODO: do some quick prefix checks like in Uri to drop through faster
55+ if let Ok(datetime) = Datetime::from_str(string) {
56+ Self::Datetime(datetime)
57+ } else if let Ok(lang) = Language::new(string) {
58+ Self::Language(lang)
59+ } else if let Ok(tid) = Tid::from_str(string) {
60+ Self::Tid(tid)
61+ } else if let Ok(did) = Did::new(string) {
62+ Self::Did(did)
63+ } else if let Ok(handle) = Handle::new(string) {
64+ Self::Handle(handle)
65+ } else if let Ok(atid) = AtIdentifier::new(string) {
66+ Self::AtIdentifier(atid)
67+ } else if let Ok(nsid) = Nsid::new(string) {
68+ Self::Nsid(nsid)
69+ } else if let Ok(aturi) = AtUri::new(string) {
70+ Self::AtUri(aturi)
71+ } else if let Ok(uri) = Uri::new(string) {
72+ Self::Uri(uri)
73+ } else if let Ok(cid) = Cid::new(string.as_bytes()) {
74+ Self::Cid(cid)
75+ } else {
76+ // We don't construct record keys from bare strings because the type is too permissive
77+ Self::String(CowStr::Borrowed(string))
78+ }
79+ }
80+81+ pub fn as_str(&self) -> &str {
82+ match self {
83+ Self::Datetime(datetime) => datetime.as_str(),
84+ Self::Language(lang) => lang.as_str(),
85+ Self::Handle(handle) => handle.as_str(),
86+ Self::AtIdentifier(atid) => atid.as_str(),
87+ Self::Nsid(nsid) => nsid.as_str(),
88+ Self::AtUri(aturi) => aturi.as_str(),
89+ Self::Uri(uri) => uri.as_str(),
90+ Self::Cid(cid) => cid.as_str(),
91+ Self::Tid(tid) => tid.as_str(),
92+ Self::Did(did) => did.as_str(),
93+ Self::RecordKey(rkey) => rkey.as_ref(),
94+ Self::String(string) => string.as_ref(),
95+ }
96+ }
97+}
98+99+impl AtprotoStr<'static> {
100+ /// Owned constructor for bare atproto string values
101+ /// This is fairly exhaustive and potentially **slow**, prefer using anything
102+ /// that narrows down the search field quicker.
103+ ///
104+ /// Note: We don't construct record keys from bare strings in this because
105+ /// the type is too permissive and too many things would be classified as rkeys.
106+ ///
107+ /// Value object deserialization checks against the field names for common
108+ /// names (uri, cid, did, handle, createdAt, indexedAt, etc.) to improve
109+ /// performance of the happy path.
110+ pub fn new_owned(string: impl AsRef<str>) -> AtprotoStr<'static> {
111+ let string = string.as_ref();
112+ // TODO: do some quick prefix checks like in Uri to drop through faster
113+ if let Ok(datetime) = Datetime::from_str(string) {
114+ Self::Datetime(datetime)
115+ } else if let Ok(lang) = Language::new(string) {
116+ Self::Language(lang)
117+ } else if let Ok(tid) = Tid::from_str(string) {
118+ Self::Tid(tid)
119+ } else if let Ok(did) = Did::new_owned(string) {
120+ Self::Did(did)
121+ } else if let Ok(handle) = Handle::new_owned(string) {
122+ Self::Handle(handle)
123+ } else if let Ok(atid) = AtIdentifier::new_owned(string) {
124+ Self::AtIdentifier(atid)
125+ } else if let Ok(nsid) = Nsid::new_owned(string) {
126+ Self::Nsid(nsid)
127+ } else if let Ok(aturi) = AtUri::new_owned(string) {
128+ Self::AtUri(aturi)
129+ } else if let Ok(uri) = Uri::new_owned(string) {
130+ Self::Uri(uri)
131+ } else if let Ok(cid) = Cid::new_owned(string.as_bytes()) {
132+ Self::Cid(cid)
133+ } else {
134+ // We don't construct record keys from bare strings because the type is too permissive
135+ Self::String(CowStr::Owned(string.to_smolstr()))
136+ }
137+ }
138+}
139+140+impl<'s> AsRef<str> for AtprotoStr<'s> {
141+ fn as_ref(&self) -> &str {
142+ match self {
143+ Self::Datetime(datetime) => datetime.as_str(),
144+ Self::Language(lang) => lang.as_ref(),
145+ Self::Tid(tid) => tid.as_ref(),
146+ Self::Did(did) => did.as_ref(),
147+ Self::Handle(handle) => handle.as_ref(),
148+ Self::AtIdentifier(atid) => atid.as_ref(),
149+ Self::Nsid(nsid) => nsid.as_ref(),
150+ Self::AtUri(aturi) => aturi.as_ref(),
151+ Self::Uri(uri) => uri.as_str(),
152+ Self::Cid(cid) => cid.as_ref(),
153+ Self::RecordKey(rkey) => rkey.as_ref(),
154+ Self::String(string) => string.as_ref(),
155+ }
156+ }
157+}
158+159+impl Serialize for AtprotoStr<'_> {
160+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
161+ where
162+ S: Serializer,
163+ {
164+ serializer.serialize_str(self.as_ref())
165+ }
166+}
167+168+impl<'de> Deserialize<'de> for AtprotoStr<'de> {
169+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
170+ where
171+ D: Deserializer<'de>,
172+ {
173+ let value = Deserialize::deserialize(deserializer)?;
174+ Ok(Self::new(value))
175+ }
176+}
177+178+impl IntoStatic for AtprotoStr<'_> {
179+ type Output = AtprotoStr<'static>;
180+181+ fn into_static(self) -> Self::Output {
182+ match self {
183+ AtprotoStr::Datetime(datetime) => AtprotoStr::Datetime(datetime),
184+ AtprotoStr::Language(language) => AtprotoStr::Language(language),
185+ AtprotoStr::Tid(tid) => AtprotoStr::Tid(tid),
186+ AtprotoStr::Nsid(nsid) => AtprotoStr::Nsid(nsid.into_static()),
187+ AtprotoStr::Did(did) => AtprotoStr::Did(did.into_static()),
188+ AtprotoStr::Handle(handle) => AtprotoStr::Handle(handle.into_static()),
189+ AtprotoStr::AtIdentifier(ident) => AtprotoStr::AtIdentifier(ident.into_static()),
190+ AtprotoStr::AtUri(at_uri) => {
191+ AtprotoStr::AtUri(AtUri::new_owned(at_uri.as_str()).unwrap())
192+ }
193+ AtprotoStr::Uri(uri) => AtprotoStr::Uri(uri.into_static()),
194+ AtprotoStr::Cid(cid) => AtprotoStr::Cid(cid.into_static()),
195+ AtprotoStr::RecordKey(record_key) => AtprotoStr::RecordKey(record_key.into_static()),
196+ AtprotoStr::String(cow_str) => AtprotoStr::String(cow_str.into_static()),
197+ }
198+ }
199+}
200+201+/// Parsing Error for atproto string types which don't have third-party specs
202+/// (e.g. datetime, CIDs, language tags).
203+///
204+/// `spec` refers to the final url path segment on atproto.com/specs,
205+/// detailing the specification for the type
206+/// `source` is the source string, or part of it
207+/// `kind` is the type of parsing error: `[StrParseKind]`
208+#[derive(Debug, thiserror::Error, miette::Diagnostic)]
209+#[error("error in `{source}`: {kind}")]
210+#[diagnostic(
211+ url("https://atproto.com/specs/{spec}"),
212+ help("if something doesn't match the spec, contact the crate author")
213+)]
214+pub struct AtStrError {
215+ pub spec: SmolStr,
216+ #[source_code]
217+ source: String,
218+ #[source]
219+ #[diagnostic_source]
220+ pub kind: StrParseKind,
221+}
222+223+impl AtStrError {
224+ pub fn new(spec: &'static str, source: String, kind: StrParseKind) -> Self {
225+ Self {
226+ spec: SmolStr::new_static(spec),
227+ source,
228+ kind,
229+ }
230+ }
231+232+ pub fn wrap(spec: &'static str, source: String, error: AtStrError) -> Self {
233+ if let Some(span) = match &error.kind {
234+ StrParseKind::Disallowed { problem, .. } => problem,
235+ StrParseKind::MissingComponent { span, .. } => span,
236+ _ => &None,
237+ } {
238+ Self {
239+ spec: SmolStr::new_static(spec),
240+ source,
241+ kind: StrParseKind::Wrap {
242+ span: Some(*span),
243+ err: Arc::new(error),
244+ },
245+ }
246+ } else {
247+ let span = source
248+ .find(&error.source)
249+ .map(|start| (start, error.source.len()).into());
250+ Self {
251+ spec: SmolStr::new_static(spec),
252+ source,
253+ kind: StrParseKind::Wrap {
254+ span,
255+ err: Arc::new(error),
256+ },
257+ }
258+ }
259+ }
260+261+ /// something on the provided disallowed list was found in the source string
262+ /// does a substring search for any of the offending strings to get the span
263+ pub fn disallowed(spec: &'static str, source: &str, disallowed: &[&str]) -> Self {
264+ for item in disallowed {
265+ if let Some(loc) = source.find(item) {
266+ return Self {
267+ spec: SmolStr::new_static(spec),
268+ source: source.to_string(),
269+ kind: StrParseKind::Disallowed {
270+ problem: Some(SourceSpan::new(loc.into(), item.len())),
271+ message: smol_str::format_smolstr!("`{item}`"),
272+ },
273+ };
274+ }
275+ }
276+ Self {
277+ spec: SmolStr::new_static(spec),
278+ source: source.to_string(),
279+ kind: StrParseKind::Disallowed {
280+ problem: None,
281+ message: SmolStr::new_static(""),
282+ },
283+ }
284+ }
285+286+ pub fn too_long(spec: &'static str, source: &str, max: usize, actual: usize) -> Self {
287+ Self {
288+ spec: SmolStr::new_static(spec),
289+ source: source.to_string(),
290+ kind: StrParseKind::TooLong { max, actual },
291+ }
292+ }
293+294+ pub fn too_short(spec: &'static str, source: &str, min: usize, actual: usize) -> Self {
295+ Self {
296+ spec: SmolStr::new_static(spec),
297+ source: source.to_string(),
298+ kind: StrParseKind::TooShort { min, actual },
299+ }
300+ }
301+302+ /// missing component, with what was expected to be found
303+ pub fn missing(spec: &'static str, source: &str, expected: &str) -> Self {
304+ if let Some(loc) = source.find(expected) {
305+ return Self {
306+ spec: SmolStr::new_static(spec),
307+ source: source.to_string(),
308+ kind: StrParseKind::MissingComponent {
309+ span: Some(SourceSpan::new(loc.into(), expected.len())),
310+ message: smol_str::format_smolstr!("`{expected}` found incorrectly here"),
311+ },
312+ };
313+ }
314+ Self {
315+ spec: SmolStr::new_static(spec),
316+ source: source.to_string(),
317+ kind: StrParseKind::MissingComponent {
318+ span: None,
319+ message: SmolStr::new(expected),
320+ },
321+ }
322+ }
323+324+ /// missing component, with the span where it was expected to be founf
325+ pub fn missing_from(
326+ spec: &'static str,
327+ source: &str,
328+ expected: &str,
329+ span: (usize, usize),
330+ ) -> Self {
331+ Self {
332+ spec: SmolStr::new_static(spec),
333+ source: source.to_string(),
334+ kind: StrParseKind::MissingComponent {
335+ span: Some(span.into()),
336+ message: SmolStr::new(expected),
337+ },
338+ }
339+ }
340+341+ pub fn regex(spec: &'static str, source: &str, message: SmolStr) -> Self {
342+ Self {
343+ spec: SmolStr::new_static(spec),
344+ source: source.to_string(),
345+ kind: StrParseKind::RegexFail {
346+ span: None,
347+ message,
348+ },
349+ }
350+ }
351+}
352+353+#[derive(Debug, thiserror::Error, miette::Diagnostic)]
354+pub enum StrParseKind {
355+ #[error("regex failure - {message}")]
356+ #[diagnostic(code(jacquard::types::string::regex_fail))]
357+ RegexFail {
358+ #[label]
359+ span: Option<SourceSpan>,
360+ #[help]
361+ message: SmolStr,
362+ },
363+ #[error("string too long (allowed: {max}, actual: {actual})")]
364+ #[diagnostic(code(jacquard::types::string::wrong_length))]
365+ TooLong { max: usize, actual: usize },
366+367+ #[error("string too short (allowed: {min}, actual: {actual})")]
368+ #[diagnostic(code(jacquard::types::string::wrong_length))]
369+ TooShort { min: usize, actual: usize },
370+ #[error("disallowed - {message}")]
371+ #[diagnostic(code(jacquard::types::string::disallowed))]
372+ Disallowed {
373+ #[label]
374+ problem: Option<SourceSpan>,
375+ #[help]
376+ message: SmolStr,
377+ },
378+ #[error("missing - {message}")]
379+ #[diagnostic(code(jacquard::atstr::missing_component))]
380+ MissingComponent {
381+ #[label]
382+ span: Option<SourceSpan>,
383+ #[help]
384+ message: SmolStr,
385+ },
386+ #[error("{err:?}")]
387+ #[diagnostic(code(jacquard::atstr::inner))]
388+ Wrap {
389+ #[label]
390+ span: Option<SourceSpan>,
391+ #[source]
392+ err: Arc<AtStrError>,
393+ },
394+}
+20-4
crates/jacquard-common/src/types/tid.rs
···67use crate::CowStr;
8use crate::types::integer::LimitedU32;
09use regex::Regex;
1011fn s32_encode(mut i: u64) -> SmolStr {
···4041impl Tid {
42 /// Parses a `TID` from the given string.
43- pub fn new(tid: impl AsRef<str>) -> Result<Self, &'static str> {
44 let tid = tid.as_ref();
45 if tid.len() != 13 {
46- Err("TID must be 13 characters")
0000000000047 } else if !TID_REGEX.is_match(&tid.as_ref()) {
48- Err("Invalid TID")
000049 } else {
50 Ok(Self(SmolStr::new_inline(&tid)))
51 }
···117}
118119impl FromStr for Tid {
120- type Err = &'static str;
121122 /// Has to take ownership due to the lifetime constraints of the FromStr trait.
123 /// Prefer `Did::new()` or `Did::raw` if you want to borrow.
···67use crate::CowStr;
8use crate::types::integer::LimitedU32;
9+use crate::types::string::{AtStrError, StrParseKind};
10use regex::Regex;
1112fn s32_encode(mut i: u64) -> SmolStr {
···4142impl Tid {
43 /// Parses a `TID` from the given string.
44+ pub fn new(tid: impl AsRef<str>) -> Result<Self, AtStrError> {
45 let tid = tid.as_ref();
46 if tid.len() != 13 {
47+ let kind = if tid.len() > 13 {
48+ StrParseKind::TooLong {
49+ max: 13,
50+ actual: tid.len(),
51+ }
52+ } else {
53+ StrParseKind::TooShort {
54+ min: 13,
55+ actual: tid.len(),
56+ }
57+ };
58+ Err(AtStrError::new("tid", tid.to_string(), kind))
59 } else if !TID_REGEX.is_match(&tid.as_ref()) {
60+ let kind = StrParseKind::RegexFail {
61+ span: None,
62+ message: SmolStr::new_static("didn't match schema"),
63+ };
64+ Err(AtStrError::new("tid", tid.to_string(), kind))
65 } else {
66 Ok(Self(SmolStr::new_inline(&tid)))
67 }
···133}
134135impl FromStr for Tid {
136+ type Err = AtStrError;
137138 /// Has to take ownership due to the lifetime constraints of the FromStr trait.
139 /// Prefer `Did::new()` or `Did::raw` if you want to borrow.