this repo has no description
1use serde_json::Value;
2use thiserror::Error;
3
4#[derive(Debug, Error)]
5pub enum ValidationError {
6 #[error("No $type provided")]
7 MissingType,
8 #[error("Invalid $type: expected {expected}, got {actual}")]
9 TypeMismatch { expected: String, actual: String },
10 #[error("Missing required field: {0}")]
11 MissingField(String),
12 #[error("Invalid field value at {path}: {message}")]
13 InvalidField { path: String, message: String },
14 #[error("Invalid datetime format at {path}: must be RFC-3339/ISO-8601")]
15 InvalidDatetime { path: String },
16 #[error("Invalid record: {0}")]
17 InvalidRecord(String),
18 #[error("Unknown record type: {0}")]
19 UnknownType(String),
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ValidationStatus {
24 Valid,
25 Unknown,
26 Invalid,
27}
28
29pub struct RecordValidator {
30 require_lexicon: bool,
31}
32
33impl Default for RecordValidator {
34 fn default() -> Self {
35 Self::new()
36 }
37}
38
39impl RecordValidator {
40 pub fn new() -> Self {
41 Self {
42 require_lexicon: false,
43 }
44 }
45
46 pub fn require_lexicon(mut self, require: bool) -> Self {
47 self.require_lexicon = require;
48 self
49 }
50
51 pub fn validate(
52 &self,
53 record: &Value,
54 collection: &str,
55 ) -> Result<ValidationStatus, ValidationError> {
56 let obj = record.as_object().ok_or_else(|| {
57 ValidationError::InvalidRecord("Record must be an object".to_string())
58 })?;
59 let record_type = obj
60 .get("$type")
61 .and_then(|v| v.as_str())
62 .ok_or(ValidationError::MissingType)?;
63 if record_type != collection {
64 return Err(ValidationError::TypeMismatch {
65 expected: collection.to_string(),
66 actual: record_type.to_string(),
67 });
68 }
69 if let Some(created_at) = obj.get("createdAt").and_then(|v| v.as_str()) {
70 validate_datetime(created_at, "createdAt")?;
71 }
72 match record_type {
73 "app.bsky.feed.post" => self.validate_post(obj)?,
74 "app.bsky.actor.profile" => self.validate_profile(obj)?,
75 "app.bsky.feed.like" => self.validate_like(obj)?,
76 "app.bsky.feed.repost" => self.validate_repost(obj)?,
77 "app.bsky.graph.follow" => self.validate_follow(obj)?,
78 "app.bsky.graph.block" => self.validate_block(obj)?,
79 "app.bsky.graph.list" => self.validate_list(obj)?,
80 "app.bsky.graph.listitem" => self.validate_list_item(obj)?,
81 "app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
82 "app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
83 "app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
84 _ => {
85 if self.require_lexicon {
86 return Err(ValidationError::UnknownType(record_type.to_string()));
87 }
88 return Ok(ValidationStatus::Unknown);
89 }
90 }
91 Ok(ValidationStatus::Valid)
92 }
93
94 fn validate_post(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
95 if !obj.contains_key("text") {
96 return Err(ValidationError::MissingField("text".to_string()));
97 }
98 if !obj.contains_key("createdAt") {
99 return Err(ValidationError::MissingField("createdAt".to_string()));
100 }
101 if let Some(text) = obj.get("text").and_then(|v| v.as_str()) {
102 let grapheme_count = text.chars().count();
103 if grapheme_count > 3000 {
104 return Err(ValidationError::InvalidField {
105 path: "text".to_string(),
106 message: format!(
107 "Text exceeds maximum length of 3000 characters (got {})",
108 grapheme_count
109 ),
110 });
111 }
112 }
113 if let Some(langs) = obj.get("langs").and_then(|v| v.as_array())
114 && langs.len() > 3
115 {
116 return Err(ValidationError::InvalidField {
117 path: "langs".to_string(),
118 message: "Maximum 3 languages allowed".to_string(),
119 });
120 }
121 if let Some(tags) = obj.get("tags").and_then(|v| v.as_array()) {
122 if tags.len() > 8 {
123 return Err(ValidationError::InvalidField {
124 path: "tags".to_string(),
125 message: "Maximum 8 tags allowed".to_string(),
126 });
127 }
128 for (i, tag) in tags.iter().enumerate() {
129 if let Some(tag_str) = tag.as_str()
130 && tag_str.len() > 640
131 {
132 return Err(ValidationError::InvalidField {
133 path: format!("tags/{}", i),
134 message: "Tag exceeds maximum length of 640 bytes".to_string(),
135 });
136 }
137 }
138 }
139 Ok(())
140 }
141
142 fn validate_profile(
143 &self,
144 obj: &serde_json::Map<String, Value>,
145 ) -> Result<(), ValidationError> {
146 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
147 let grapheme_count = display_name.chars().count();
148 if grapheme_count > 640 {
149 return Err(ValidationError::InvalidField {
150 path: "displayName".to_string(),
151 message: format!(
152 "Display name exceeds maximum length of 640 characters (got {})",
153 grapheme_count
154 ),
155 });
156 }
157 }
158 if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
159 let grapheme_count = description.chars().count();
160 if grapheme_count > 2560 {
161 return Err(ValidationError::InvalidField {
162 path: "description".to_string(),
163 message: format!(
164 "Description exceeds maximum length of 2560 characters (got {})",
165 grapheme_count
166 ),
167 });
168 }
169 }
170 Ok(())
171 }
172
173 fn validate_like(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
174 if !obj.contains_key("subject") {
175 return Err(ValidationError::MissingField("subject".to_string()));
176 }
177 if !obj.contains_key("createdAt") {
178 return Err(ValidationError::MissingField("createdAt".to_string()));
179 }
180 self.validate_strong_ref(obj.get("subject"), "subject")?;
181 Ok(())
182 }
183
184 fn validate_repost(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
185 if !obj.contains_key("subject") {
186 return Err(ValidationError::MissingField("subject".to_string()));
187 }
188 if !obj.contains_key("createdAt") {
189 return Err(ValidationError::MissingField("createdAt".to_string()));
190 }
191 self.validate_strong_ref(obj.get("subject"), "subject")?;
192 Ok(())
193 }
194
195 fn validate_follow(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
196 if !obj.contains_key("subject") {
197 return Err(ValidationError::MissingField("subject".to_string()));
198 }
199 if !obj.contains_key("createdAt") {
200 return Err(ValidationError::MissingField("createdAt".to_string()));
201 }
202 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str())
203 && !subject.starts_with("did:")
204 {
205 return Err(ValidationError::InvalidField {
206 path: "subject".to_string(),
207 message: "Subject must be a DID".to_string(),
208 });
209 }
210 Ok(())
211 }
212
213 fn validate_block(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
214 if !obj.contains_key("subject") {
215 return Err(ValidationError::MissingField("subject".to_string()));
216 }
217 if !obj.contains_key("createdAt") {
218 return Err(ValidationError::MissingField("createdAt".to_string()));
219 }
220 if let Some(subject) = obj.get("subject").and_then(|v| v.as_str())
221 && !subject.starts_with("did:")
222 {
223 return Err(ValidationError::InvalidField {
224 path: "subject".to_string(),
225 message: "Subject must be a DID".to_string(),
226 });
227 }
228 Ok(())
229 }
230
231 fn validate_list(&self, obj: &serde_json::Map<String, Value>) -> Result<(), ValidationError> {
232 if !obj.contains_key("name") {
233 return Err(ValidationError::MissingField("name".to_string()));
234 }
235 if !obj.contains_key("purpose") {
236 return Err(ValidationError::MissingField("purpose".to_string()));
237 }
238 if !obj.contains_key("createdAt") {
239 return Err(ValidationError::MissingField("createdAt".to_string()));
240 }
241 if let Some(name) = obj.get("name").and_then(|v| v.as_str())
242 && (name.is_empty() || name.len() > 64)
243 {
244 return Err(ValidationError::InvalidField {
245 path: "name".to_string(),
246 message: "Name must be 1-64 characters".to_string(),
247 });
248 }
249 Ok(())
250 }
251
252 fn validate_list_item(
253 &self,
254 obj: &serde_json::Map<String, Value>,
255 ) -> Result<(), ValidationError> {
256 if !obj.contains_key("subject") {
257 return Err(ValidationError::MissingField("subject".to_string()));
258 }
259 if !obj.contains_key("list") {
260 return Err(ValidationError::MissingField("list".to_string()));
261 }
262 if !obj.contains_key("createdAt") {
263 return Err(ValidationError::MissingField("createdAt".to_string()));
264 }
265 Ok(())
266 }
267
268 fn validate_feed_generator(
269 &self,
270 obj: &serde_json::Map<String, Value>,
271 ) -> Result<(), ValidationError> {
272 if !obj.contains_key("did") {
273 return Err(ValidationError::MissingField("did".to_string()));
274 }
275 if !obj.contains_key("displayName") {
276 return Err(ValidationError::MissingField("displayName".to_string()));
277 }
278 if !obj.contains_key("createdAt") {
279 return Err(ValidationError::MissingField("createdAt".to_string()));
280 }
281 if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str())
282 && (display_name.is_empty() || display_name.len() > 240)
283 {
284 return Err(ValidationError::InvalidField {
285 path: "displayName".to_string(),
286 message: "displayName must be 1-240 characters".to_string(),
287 });
288 }
289 Ok(())
290 }
291
292 fn validate_threadgate(
293 &self,
294 obj: &serde_json::Map<String, Value>,
295 ) -> Result<(), ValidationError> {
296 if !obj.contains_key("post") {
297 return Err(ValidationError::MissingField("post".to_string()));
298 }
299 if !obj.contains_key("createdAt") {
300 return Err(ValidationError::MissingField("createdAt".to_string()));
301 }
302 Ok(())
303 }
304
305 fn validate_labeler_service(
306 &self,
307 obj: &serde_json::Map<String, Value>,
308 ) -> Result<(), ValidationError> {
309 if !obj.contains_key("policies") {
310 return Err(ValidationError::MissingField("policies".to_string()));
311 }
312 if !obj.contains_key("createdAt") {
313 return Err(ValidationError::MissingField("createdAt".to_string()));
314 }
315 Ok(())
316 }
317
318 fn validate_strong_ref(
319 &self,
320 value: Option<&Value>,
321 path: &str,
322 ) -> Result<(), ValidationError> {
323 let obj =
324 value
325 .and_then(|v| v.as_object())
326 .ok_or_else(|| ValidationError::InvalidField {
327 path: path.to_string(),
328 message: "Must be a strong reference object".to_string(),
329 })?;
330 if !obj.contains_key("uri") {
331 return Err(ValidationError::MissingField(format!("{}/uri", path)));
332 }
333 if !obj.contains_key("cid") {
334 return Err(ValidationError::MissingField(format!("{}/cid", path)));
335 }
336 if let Some(uri) = obj.get("uri").and_then(|v| v.as_str())
337 && !uri.starts_with("at://")
338 {
339 return Err(ValidationError::InvalidField {
340 path: format!("{}/uri", path),
341 message: "URI must be an at:// URI".to_string(),
342 });
343 }
344 Ok(())
345 }
346}
347
348fn validate_datetime(value: &str, path: &str) -> Result<(), ValidationError> {
349 if chrono::DateTime::parse_from_rfc3339(value).is_err() {
350 return Err(ValidationError::InvalidDatetime {
351 path: path.to_string(),
352 });
353 }
354 Ok(())
355}
356
357pub fn validate_record_key(rkey: &str) -> Result<(), ValidationError> {
358 if rkey.is_empty() {
359 return Err(ValidationError::InvalidRecord(
360 "Record key cannot be empty".to_string(),
361 ));
362 }
363 if rkey.len() > 512 {
364 return Err(ValidationError::InvalidRecord(
365 "Record key exceeds maximum length of 512".to_string(),
366 ));
367 }
368 if rkey == "." || rkey == ".." {
369 return Err(ValidationError::InvalidRecord(
370 "Record key cannot be '.' or '..'".to_string(),
371 ));
372 }
373 let valid_chars = rkey
374 .chars()
375 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~');
376 if !valid_chars {
377 return Err(ValidationError::InvalidRecord(
378 "Record key contains invalid characters (must be alphanumeric, '.', '-', '_', or '~')"
379 .to_string(),
380 ));
381 }
382 Ok(())
383}
384
385pub fn validate_collection_nsid(collection: &str) -> Result<(), ValidationError> {
386 if collection.is_empty() {
387 return Err(ValidationError::InvalidRecord(
388 "Collection NSID cannot be empty".to_string(),
389 ));
390 }
391 let parts: Vec<&str> = collection.split('.').collect();
392 if parts.len() < 3 {
393 return Err(ValidationError::InvalidRecord(
394 "Collection NSID must have at least 3 segments".to_string(),
395 ));
396 }
397 for part in &parts {
398 if part.is_empty() {
399 return Err(ValidationError::InvalidRecord(
400 "Collection NSID segments cannot be empty".to_string(),
401 ));
402 }
403 if !part.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
404 return Err(ValidationError::InvalidRecord(
405 "Collection NSID segments must be alphanumeric or hyphens".to_string(),
406 ));
407 }
408 }
409 Ok(())
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415 use serde_json::json;
416
417 #[test]
418 fn test_validate_post() {
419 let validator = RecordValidator::new();
420 let valid_post = json!({
421 "$type": "app.bsky.feed.post",
422 "text": "Hello, world!",
423 "createdAt": "2024-01-01T00:00:00.000Z"
424 });
425 assert_eq!(
426 validator
427 .validate(&valid_post, "app.bsky.feed.post")
428 .unwrap(),
429 ValidationStatus::Valid
430 );
431 }
432
433 #[test]
434 fn test_validate_post_missing_text() {
435 let validator = RecordValidator::new();
436 let invalid_post = json!({
437 "$type": "app.bsky.feed.post",
438 "createdAt": "2024-01-01T00:00:00.000Z"
439 });
440 assert!(
441 validator
442 .validate(&invalid_post, "app.bsky.feed.post")
443 .is_err()
444 );
445 }
446
447 #[test]
448 fn test_validate_type_mismatch() {
449 let validator = RecordValidator::new();
450 let record = json!({
451 "$type": "app.bsky.feed.like",
452 "subject": {"uri": "at://did:plc:test/app.bsky.feed.post/123", "cid": "bafyrei..."},
453 "createdAt": "2024-01-01T00:00:00.000Z"
454 });
455 let result = validator.validate(&record, "app.bsky.feed.post");
456 assert!(matches!(result, Err(ValidationError::TypeMismatch { .. })));
457 }
458
459 #[test]
460 fn test_validate_unknown_type() {
461 let validator = RecordValidator::new();
462 let record = json!({
463 "$type": "com.example.custom",
464 "data": "test"
465 });
466 assert_eq!(
467 validator.validate(&record, "com.example.custom").unwrap(),
468 ValidationStatus::Unknown
469 );
470 }
471
472 #[test]
473 fn test_validate_unknown_type_strict() {
474 let validator = RecordValidator::new().require_lexicon(true);
475 let record = json!({
476 "$type": "com.example.custom",
477 "data": "test"
478 });
479 let result = validator.validate(&record, "com.example.custom");
480 assert!(matches!(result, Err(ValidationError::UnknownType(_))));
481 }
482
483 #[test]
484 fn test_validate_record_key() {
485 assert!(validate_record_key("valid-key_123").is_ok());
486 assert!(validate_record_key("3k2n5j2").is_ok());
487 assert!(validate_record_key(".").is_err());
488 assert!(validate_record_key("..").is_err());
489 assert!(validate_record_key("").is_err());
490 assert!(validate_record_key("invalid/key").is_err());
491 }
492
493 #[test]
494 fn test_validate_collection_nsid() {
495 assert!(validate_collection_nsid("app.bsky.feed.post").is_ok());
496 assert!(validate_collection_nsid("com.atproto.repo.record").is_ok());
497 assert!(validate_collection_nsid("invalid").is_err());
498 assert!(validate_collection_nsid("a.b").is_err());
499 assert!(validate_collection_nsid("").is_err());
500 }
501}