···17 if !s.is_ascii() {
18 return None;
19 }
20- // // A-Za-z0-9 . - _ ~
21- // if !s.chars().all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_' | '~')) {
22- // return None
23- // }
2425 // Maximum overall length is 8 kilobytes (which may be shortened in the future)
26 if s.len() > (8 * 2_usize.pow(10)) {
···5960 // The URI scheme is `at`, and an authority part preceded with double slashes is always
61 // required, so the URI always starts at://
62- // -> the spec doesn't explicitly say, but it seems like uri schemes are case-insensitive
63 let (proto, rest) = s.split_at_checked(5)?;
64 if !proto.eq_ignore_ascii_case("at://") {
65 return None;
···225 (
226 "at://bad-example.com/a/../b",
227 Some("at://bad-example.com/b"),
228- "paths have traversals resolved (oof)",
229 ),
230 (
231 "at://bad-example.com/../",
···17 if !s.is_ascii() {
18 return None;
19 }
00002021 // Maximum overall length is 8 kilobytes (which may be shortened in the future)
22 if s.len() > (8 * 2_usize.pow(10)) {
···5556 // The URI scheme is `at`, and an authority part preceded with double slashes is always
57 // required, so the URI always starts at://
58+ // -> the spec doesn't explicitly say, but uri schemes can be case-insensitive?
59 let (proto, rest) = s.split_at_checked(5)?;
60 if !proto.eq_ignore_ascii_case("at://") {
61 return None;
···221 (
222 "at://bad-example.com/a/../b",
223 Some("at://bad-example.com/b"),
224+ "paths have traversals resolved (oof)", // reminder to self: we are normalizing, not sanitizing
225 ),
226 (
227 "at://bad-example.com/../",
···1+/// see https://atproto.com/specs/did#at-protocol-did-identifier-syntax
2+/// this parser is intentinonally lax: it should accept all valid DIDs, and
3+/// may accept some invalid DIDs.
4+///
5+/// at the moment this implementation might also be quite bad and incomplete
6+pub fn parse_did(s: &str) -> Option<String> {
7+ // for now, just working through the rules laid out in the docs in order,
8+ // without much regard for efficiency for now.
9+10+ // The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z),
11+ // digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-)
12+ if !s
13+ .chars()
14+ .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | ':' | '%' | '-'))
15+ {
16+ return None;
17+ }
18+19+ // The URI is case-sensitive
20+ // -> (nothing to check)
21+22+ // The URI starts with lowercase `did:`
23+ let unprefixed = s.strip_prefix("did:")?;
24+25+ // The method segment is one or more lowercase letters (a-z), followed by :
26+ let (method, identifier) = unprefixed.split_once(':')?;
27+ if !method.chars().all(|c| c.is_ascii_lowercase()) {
28+ return None;
29+ }
30+31+ // The remainder of the URI (the identifier) may contain any of the above-allowed
32+ // ASCII characters, except for percent-sign (%)
33+ // -> ok, ugh, gotta know our encoding context for this
34+35+ // The URI (and thus the remaining identifier) may not end in ':'.
36+ if identifier.ends_with(':') {
37+ return None;
38+ }
39+40+ // Percent-sign (%) is used for "percent encoding" in the identifier section, and
41+ // must always be followed by two hex characters
42+ // -> again incoding context (bleh)
43+44+ // Query (?) and fragment (#) sections are allowed in DID URIs, but not in DID
45+ // identifiers. In the context of atproto, the query and fragment parts are not
46+ // allowed.
47+ // -> disallow here -- the uri decoder should already split them out first.
48+49+ // DID identifiers do not generally have a maximum length restriction, but in the
50+ // context of atproto, there is an initial hard limit of 2 KB.
51+ // -> we're in atproto, so sure, let's enforce it. (would be sensible to do this
52+ // -> first but we're following doc order)
53+ if s.len() > (2 * 2_usize.pow(10)) {
54+ return None;
55+ }
56+57+ // -> it's not actually written in the spec, but by example in the spec, the
58+ // -> identifier cannot be empty
59+ if identifier.is_empty() {
60+ return None;
61+ }
62+63+ Some(s.to_string())
64+ // the only normalization we might want would be percent-decoding, but we
65+ // probably leave that to the uri decoder
66+}
67+68+#[cfg(test)]
69+mod tests {
70+ use super::*;
71+72+ #[test]
73+ fn test_did_parse() {
74+ for (case, expected, detail) in vec![
75+ ("", None, "empty str"),
76+ (" ", None, "whitespace str"),
77+ ("z", None, "not a did"),
78+ ("did:plc", None, "no identifier separator colon"),
79+ ("did:plc:", None, "missing identifier"),
80+ (
81+ "did:web:bad-example.com",
82+ Some("did:web:bad-example.com"),
83+ "web did",
84+ ),
85+ (
86+ "did:plc:hdhoaan3xa3jiuq4fg4mefid",
87+ Some("did:plc:hdhoaan3xa3jiuq4fg4mefid"),
88+ "plc did",
89+ ),
90+ (
91+ "DID:plc:hdhoaan3xa3jiuq4fg4mefid",
92+ None,
93+ "'did:' prefix must be lowercase",
94+ ),
95+ (
96+ "did:ok:z",
97+ Some("did:ok:z"),
98+ "unknown did methods are allowed",
99+ ),
100+ ("did:BAD:z", None, "non-lowercase methods are not allowed"),
101+ ("did:bad:z$z", None, "invalid chars are not allowed"),
102+ (
103+ "did:ok:z:z",
104+ Some("did:ok:z:z"),
105+ "colons are allowed in identifier",
106+ ),
107+ ("did:bad:z:", None, "colons not are allowed at the end"),
108+ ("did:bad:z?q=y", None, "queries are not allowed in atproto"),
109+ ("did:bad:z#a", None, "anchors are not allowed in atproto"),
110+ ] {
111+ assert_eq!(parse_did(case), expected.map(|s| s.to_string()), "{detail}");
112+ }
113+ }
114+115+ #[test]
116+ fn test_doc_exmples_atproto() {
117+ // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
118+ for case in vec!["did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz"] {
119+ assert!(parse_did(case).is_some(), "should pass: {case}")
120+ }
121+ }
122+123+ #[test]
124+ fn test_doc_exmples_lexicon() {
125+ // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
126+ for case in vec![
127+ "did:method:val:two",
128+ "did:m:v",
129+ "did:method::::val",
130+ "did:method:-:_:.",
131+ "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N",
132+ ] {
133+ assert!(parse_did(case).is_some(), "should pass: {case}")
134+ }
135+ }
136+137+ #[test]
138+ fn test_doc_exmples_invalid() {
139+ // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
140+ for case in vec![
141+ "did:METHOD:val",
142+ "did:m123:val",
143+ "DID:method:val",
144+ "did:method:",
145+ "did:method:val/two",
146+ "did:method:val?two",
147+ "did:method:val#two",
148+ ] {
149+ assert!(parse_did(case).is_none(), "should fail: {case}")
150+ }
151+ }
152+}
+12-8
src/lib.rs
···1use fluent_uri::Uri;
23pub mod at_uri;
045#[derive(Debug, PartialEq)]
6pub enum Link {
7 AtUri(String),
8 Uri(String),
9-}
10-11-// normalizing is a bit opinionated but ehhh
12-pub fn parse_at_uri(s: &str) -> Option<String> {
13- at_uri::parse_at_uri(s)
14}
1516// normalizing is a bit opinionated but eh
···19}
2021pub fn parse_any(s: &str) -> Option<Link> {
22- parse_at_uri(s)
23- .map(Link::AtUri)
24- .or_else(|| parse_uri(s).map(Link::Uri))
0025}
2627#[cfg(test)]
···60 "at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26".into()
61 )),
62 );
0000063 }
64}
···1use fluent_uri::Uri;
23pub mod at_uri;
4+pub mod did;
56#[derive(Debug, PartialEq)]
7pub enum Link {
8 AtUri(String),
9 Uri(String),
10+ Did(String),
000011}
1213// normalizing is a bit opinionated but eh
···16}
1718pub fn parse_any(s: &str) -> Option<Link> {
19+ at_uri::parse_at_uri(s).map(Link::AtUri).or_else(|| {
20+ did::parse_did(s)
21+ .map(Link::Did)
22+ .or_else(|| parse_uri(s).map(Link::Uri))
23+ })
24}
2526#[cfg(test)]
···59 "at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26".into()
60 )),
61 );
62+63+ assert_eq!(
64+ parse_any("did:plc:44ybard66vv44zksje25o7dz"),
65+ Some(Link::Did("did:plc:44ybard66vv44zksje25o7dz".into()))
66+ )
67 }
68}