this repo has no description
1use serde_json::Value;
2use thiserror::Error;
3#[derive(Debug, Error)]
4pub enum ValidationError {
5 #[error("No $type provided")]
6 MissingType,
7 #[error("Invalid $type: expected {expected}, got {actual}")]
8 TypeMismatch { expected: String, actual: String },
9 #[error("Missing required field: {0}")]
10 MissingField(String),
11 #[error("Invalid field value at {path}: {message}")]
12 InvalidField { path: String, message: String },
13 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")]
14 InvalidDatetime { path: String },
15 #[error("Invalid record: {0}")]
16 InvalidRecord(String),
17 #[error("Unknown record type: {0}")]
18 UnknownType(String),
19}
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ValidationStatus {
22 Valid,
23 Unknown,
24 Invalid,
25}
26pub struct RecordValidator {
27 require_lexicon: bool,
28}
29impl Default for RecordValidator {
30 fn default() -> Self {
31 Self::new()
32 }
33}
34impl RecordValidator {
35 pub fn new() -> Self {
36 Self {
37 require_lexicon: false,
38 }
39 }
40 pub fn require_lexicon(mut self, require: bool) -> Self {
41 self.require_lexicon = require;
42 self
43 }
44 pub fn validate(
45 &self,
46 record: &Value,
47 collection: &str,
48 ) -> Result<ValidationStatus, ValidationError> {
49 let obj = record
50 .as_object()
51 .ok_or_else(|| ValidationError::InvalidRecord("Record must be an object".to_string()))?;
52 let record_type = obj
53 .get("$type")
54 .and_then(|v| v.as_str())
55 .ok_or(ValidationError::MissingType)?;
56 if record_type != collection {
57 return Err(ValidationError::TypeMismatch {
58 expected: collection.to_string(),
59 actual: record_type.to_string(),
60 });
61 }
62 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) {
63 validate_datetime(created_at, "createdAt")?;
64 }
65 match record_type {
66 "app.bsky.feed.post" => self.validate_post(obj)?,
67 "app.bsky.actor.profile" => self.validate_profile(obj)?,
68 "app.bsky.feed.like" => self.validate_like(obj)?,
69 "app.bsky.feed.repost" => self.validate_repost(obj)?,
70 "app.bsky.graph.follow" => self.validate_follow(obj)?,
71 "app.bsky.graph.block" => self.validate_block(obj)?,
72 "app.bsky.graph.list" => self.validate_list(obj)?,
73 "app.bsky.graph.listitem" => self.validate_list_item(obj)?,
74 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
75 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
76 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
77 _ => {
78 if self.require_lexicon {
79 return Err(ValidationError::UnknownType(record_type.to_string()));
80 }
81 return Ok(ValidationStatus::Unknown);
82 }
83 }
84 Ok(ValidationStatus::Valid)
85 }
86 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
87 if !obj.contains_key("text") {
88 return Err(ValidationError::MissingField("text".to_string()));
89 }
90 if !obj.contains_key("createdAt") {
91 return Err(ValidationError::MissingField("createdAt".to_string()));
92 }
93 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) {
94 let grapheme_count = text.chars().count();
95 if grapheme_count > 3000 {
96 return Err(ValidationError::InvalidField {
97 path: "text".to_string(),
98 message: format!("Text exceeds maximum length of 3000 characters (got {})", grapheme_count),
99 });
100 }
101 }
102 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array()) {
103 if langs.len() > 3 {
104 return Err(ValidationError::InvalidField {
105 path: "langs".to_string(),
106 message: "Maximum 3 languages allowed".to_string(),
107 });
108 }
109 }
110 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) {
111 if tags.len() > 8 {
112 return Err(ValidationError::InvalidField {
113 path: "tags".to_string(),
114 message: "Maximum 8 tags allowed".to_string(),
115 });
116 }
117 for (i, tag) in tags.iter().enumerate() {
118 if let Some(tag_str) = tag.as_str() {
119 if tag_str.len() > 640 {
120 return Err(ValidationError::InvalidField {
121 path: format!("tags/{}", i),
122 message: "Tag exceeds maximum length of 640 bytes".to_string(),
123 });
124 }
125 }
126 }
127 }
128 Ok(())
129 }
130 fn validate_profile(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
131 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
132 let grapheme_count = display_name.chars().count();
133 if grapheme_count > 640 {
134 return Err(ValidationError::InvalidField {
135 path: "displayName".to_string(),
136 message: format!("Display name exceeds maximum length of 640 characters (got {})", grapheme_count),
137 });
138 }
139 }
140 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
141 let grapheme_count = description.chars().count();
142 if grapheme_count > 2560 {
143 return Err(ValidationError::InvalidField {
144 path: "description".to_string(),
145 message: format!("Description exceeds maximum length of 2560 characters (got {})", grapheme_count),
146 });
147 }
148 }
149 Ok(())
150 }
151 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
152 if !obj.contains_key("subject") {
153 return Err(ValidationError::MissingField("subject".to_string()));
154 }
155 if !obj.contains_key("createdAt") {
156 return Err(ValidationError::MissingField("createdAt".to_string()));
157 }
158 self.validate_strong_ref(obj.get("subject"), "subject")?;
159 Ok(())
160 }
161 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
162 if !obj.contains_key("subject") {
163 return Err(ValidationError::MissingField("subject".to_string()));
164 }
165 if !obj.contains_key("createdAt") {
166 return Err(ValidationError::MissingField("createdAt".to_string()));
167 }
168 self.validate_strong_ref(obj.get("subject"), "subject")?;
169 Ok(())
170 }
171 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
172 if !obj.contains_key("subject") {
173 return Err(ValidationError::MissingField("subject".to_string()));
174 }
175 if !obj.contains_key("createdAt") {
176 return Err(ValidationError::MissingField("createdAt".to_string()));
177 }
178 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) {
179 if !subject.starts_with("did:") {
180 return Err(ValidationError::InvalidField {
181 path: "subject".to_string(),
182 message: "Subject must be a DID".to_string(),
183 });
184 }
185 }
186 Ok(())
187 }
188 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
189 if !obj.contains_key("subject") {
190 return Err(ValidationError::MissingField("subject".to_string()));
191 }
192 if !obj.contains_key("createdAt") {
193 return Err(ValidationError::MissingField("createdAt".to_string()));
194 }
195 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str()) {
196 if !subject.starts_with("did:") {
197 return Err(ValidationError::InvalidField {
198 path: "subject".to_string(),
199 message: "Subject must be a DID".to_string(),
200 });
201 }
202 }
203 Ok(())
204 }
205 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
206 if !obj.contains_key("name") {
207 return Err(ValidationError::MissingField("name".to_string()));
208 }
209 if !obj.contains_key("purpose") {
210 return Err(ValidationError::MissingField("purpose".to_string()));
211 }
212 if !obj.contains_key("createdAt") {
213 return Err(ValidationError::MissingField("createdAt".to_string()));
214 }
215 if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
216 if name.is_empty() || name.len() > 64 {
217 return Err(ValidationError::InvalidField {
218 path: "name".to_string(),
219 message: "Name must be 1-64 characters".to_string(),
220 });
221 }
222 }
223 Ok(())
224 }
225 fn validate_list_item(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
226 if !obj.contains_key("subject") {
227 return Err(ValidationError::MissingField("subject".to_string()));
228 }
229 if !obj.contains_key("list") {
230 return Err(ValidationError::MissingField("list".to_string()));
231 }
232 if !obj.contains_key("createdAt") {
233 return Err(ValidationError::MissingField("createdAt".to_string()));
234 }
235 Ok(())
236 }
237 fn validate_feed_generator(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
238 if !obj.contains_key("did") {
239 return Err(ValidationError::MissingField("did".to_string()));
240 }
241 if !obj.contains_key("displayName") {
242 return Err(ValidationError::MissingField("displayName".to_string()));
243 }
244 if !obj.contains_key("createdAt") {
245 return Err(ValidationError::MissingField("createdAt".to_string()));
246 }
247 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
248 if display_name.is_empty() || display_name.len() > 240 {
249 return Err(ValidationError::InvalidField {
250 path: "displayName".to_string(),
251 message: "displayName must be 1-240 characters".to_string(),
252 });
253 }
254 }
255 Ok(())
256 }
257 fn validate_threadgate(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
258 if !obj.contains_key("post") {
259 return Err(ValidationError::MissingField("post".to_string()));
260 }
261 if !obj.contains_key("createdAt") {
262 return Err(ValidationError::MissingField("createdAt".to_string()));
263 }
264 Ok(())
265 }
266 fn validate_labeler_service(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
267 if !obj.contains_key("policies") {
268 return Err(ValidationError::MissingField("policies".to_string()));
269 }
270 if !obj.contains_key("createdAt") {
271 return Err(ValidationError::MissingField("createdAt".to_string()));
272 }
273 Ok(())
274 }
275 fn validate_strong_ref(&self, value: Option<&Value>, path: &str) -> Result<(), ValidationError> {
276 let obj = value
277 .and_then(|v| v.as_object())
278 .ok_or_else(|| ValidationError::InvalidField {
279 path: path.to_string(),
280 message: "Must be a strong reference object".to_string(),
281 })?;
282 if !obj.contains_key("uri") {
283 return Err(ValidationError::MissingField(format!("{}/uri", path)));
284 }
285 if !obj.contains_key("cid") {
286 return Err(ValidationError::MissingField(format!("{}/cid", path)));
287 }
288 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str()) {
289 if !uri.starts_with("at://") {
290 return Err(ValidationError::InvalidField {
291 path: format!("{}/uri", path),
292 message: "URI must be an at:// URI".to_string(),
293 });
294 }
295 }
296 Ok(())
297 }
298}
299fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> {
300 if chrono::DateTime::parse_from_rfc3339(value).is_err() {
301 return Err(ValidationError::InvalidDatetime {
302 path: path.to_string(),
303 });
304 }
305 Ok(())
306}
307pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> {
308 if rkey.is_empty() {
309 return Err(ValidationError::InvalidRecord("Record key cannot be empty".to_string()));
310 }
311 if rkey.len() > 512 {
312 return Err(ValidationError::InvalidRecord("Record key exceeds maximum length of 512".to_string()));
313 }
314 if rkey == "." || rkey == ".." {
315 return Err(ValidationError::InvalidRecord("Record key cannot be '.' or '..'".to_string()));
316 }
317 let valid_chars = rkey.chars().all(|c| {
318 c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~'
319 });
320 if !valid_chars {
321 return Err(ValidationError::InvalidRecord(
322 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')".to_string()
323 ));
324 }
325 Ok(())
326}
327pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> {
328 if collection.is_empty() {
329 return Err(ValidationError::InvalidRecord("Collection NSID cannot be empty".to_string()));
330 }
331 let parts: Vec<&str> = collection.split('.').collect();
332 if parts.len() < 3 {
333 return Err(ValidationError::InvalidRecord(
334 "Collection NSID must have at least 3 segments".to_string()
335 ));
336 }
337 for part in &parts {
338 if part.is_empty() {
339 return Err(ValidationError::InvalidRecord(
340 "Collection NSID segments cannot be empty".to_string()
341 ));
342 }
343 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
344 return Err(ValidationError::InvalidRecord(
345 "Collection NSID segments must be alphanumeric or hyphens".to_string()
346 ));
347 }
348 }
349 Ok(())
350}
351#[cfg(test)]
352mod tests {
353 use super::*;
354 use serde_json::json;
355 #[test]
356 fn test_validate_post() {
357 let validator = RecordValidator::new();
358 let valid_post = json!({
359 "$type": "app.bsky.feed.post",
360 "text": "Hello, world!",
361 "createdAt": "2024-01-01T00:00:00.000Z"
362 });
363 assert_eq!(
364 validator.validate(&valid_post, "app.bsky.feed.post").unwrap(),
365 ValidationStatus::Valid
366 );
367 }
368 #[test]
369 fn test_validate_post_missing_text() {
370 let validator = RecordValidator::new();
371 let invalid_post = json!({
372 "$type": "app.bsky.feed.post",
373 "createdAt": "2024-01-01T00:00:00.000Z"
374 });
375 assert!(validator.validate(&invalid_post, "app.bsky.feed.post").is_err());
376 }
377 #[test]
378 fn test_validate_type_mismatch() {
379 let validator = RecordValidator::new();
380 let record = json!({
381 "$type": "app.bsky.feed.like",
382 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."},
383 "createdAt": "2024-01-01T00:00:00.000Z"
384 });
385 let result = validator.validate(&record, "app.bsky.feed.post");
386 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. })));
387 }
388 #[test]
389 fn test_validate_unknown_type() {
390 let validator = RecordValidator::new();
391 let record = json!({
392 "$type": "com.example.custom",
393 "data": "test"
394 });
395 assert_eq!(
396 validator.validate(&record, "com.example.custom").unwrap(),
397 ValidationStatus::Unknown
398 );
399 }
400 #[test]
401 fn test_validate_unknown_type_strict() {
402 let validator = RecordValidator::new().require_lexicon(true);
403 let record = json!({
404 "$type": "com.example.custom",
405 "data": "test"
406 });
407 let result = validator.validate(&record, "com.example.custom");
408 assert!(matches!(result, Err(ValidationError::UnknownType(_))));
409 }
410 #[test]
411 fn test_validate_record_key() {
412 assert!(validate_record_key("valid-key_123").is_ok());
413 assert!(validate_record_key("3k2n5j2").is_ok());
414 assert!(validate_record_key(".").is_err());
415 assert!(validate_record_key("..").is_err());
416 assert!(validate_record_key("").is_err());
417 assert!(validate_record_key("invalid/key").is_err());
418 }
419 #[test]
420 fn test_validate_collection_nsid() {
421 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok());
422 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok());
423 assert!(validate_collection_nsid("invalid").is_err());
424 assert!(validate_collection_nsid("a.b").is_err());
425 assert!(validate_collection_nsid("").is_err());
426 }
427}