tangled
alpha
login
or
join now
tranquil.farm
/
tranquil-pds
156
fork
atom
Our Personal Data Server from scratch!
tranquil.farm
oauth
atproto
pds
rust
postgresql
objectstorage
fun
156
fork
atom
overview
issues
22
pulls
2
pipelines
No slur handles allowed
lewis.moe
2 months ago
e90308ba
8cb82abc
+634
-31
13 changed files
expand all
collapse all
unified
split
Cargo.lock
Cargo.toml
frontend
src
lib
api.ts
src
api
identity
account.rs
did.rs
repo
record
batch.rs
validation.rs
write.rs
validation.rs
lib.rs
moderation
mod.rs
validation
mod.rs
tests
banned_words.rs
+1
Cargo.lock
···
6305
"p384",
6306
"rand 0.8.5",
6307
"redis",
0
6308
"reqwest",
6309
"serde",
6310
"serde_bytes",
···
6305
"p384",
6306
"rand 0.8.5",
6307
"redis",
6308
+
"regex",
6309
"reqwest",
6310
"serde",
6311
"serde_bytes",
+1
Cargo.toml
···
30
multibase = "0.9.1"
31
multihash = "0.19.3"
32
rand = "0.8.5"
0
33
reqwest = { version = "0.12.28", features = ["json"] }
34
serde = { version = "1.0.228", features = ["derive"] }
35
serde_bytes = "0.11.14"
···
30
multibase = "0.9.1"
31
multihash = "0.19.3"
32
rand = "0.8.5"
33
+
regex = "1"
34
reqwest = { version = "0.12.28", features = ["json"] }
35
serde = { version = "1.0.228", features = ["derive"] }
36
serde_bytes = "0.11.14"
+1
-1
frontend/src/lib/api.ts
···
175
});
176
const data = await response.json();
177
if (!response.ok) {
178
-
throw new ApiError(data.error, data.message, response.status);
179
}
180
return data;
181
},
···
175
});
176
const data = await response.json();
177
if (!response.ok) {
178
+
throw new ApiError(response.status, data.error, data.message);
179
}
180
return data;
181
},
+9
-1
src/api/identity/account.rs
···
194
.into_response();
195
}
196
}
197
-
input.handle.to_lowercase()
0
0
0
0
0
0
0
0
198
};
199
let email: Option<String> = input
200
.email
···
194
.into_response();
195
}
196
}
197
+
let handle_lower = input.handle.to_lowercase();
198
+
if crate::moderation::has_explicit_slur(&handle_lower) {
199
+
return (
200
+
StatusCode::BAD_REQUEST,
201
+
Json(json!({"error": "InvalidHandle", "message": "Inappropriate language in handle"})),
202
+
)
203
+
.into_response();
204
+
}
205
+
handle_lower
206
};
207
let email: Option<String> = input
208
.email
+7
src/api/identity/did.rs
···
582
)
583
.into_response();
584
}
0
0
0
0
0
0
0
585
let hostname = std::env::var("PDS_HOSTNAME").unwrap_or_else(|_| "localhost".to_string());
586
let suffix = format!(".{}", hostname);
587
let is_service_domain = crate::handle::is_service_domain_handle(new_handle, &hostname);
···
582
)
583
.into_response();
584
}
585
+
if crate::moderation::has_explicit_slur(new_handle) {
586
+
return (
587
+
StatusCode::BAD_REQUEST,
588
+
Json(json!({"error": "InvalidHandle", "message": "Inappropriate language in handle"})),
589
+
)
590
+
.into_response();
591
+
}
592
let hostname = std::env::var("PDS_HOSTNAME").unwrap_or_else(|_| "localhost".to_string());
593
let suffix = format!(".{}", hostname);
594
let is_service_domain = crate::handle::is_service_domain_handle(new_handle, &hostname);
+5
-3
src/api/repo/record/batch.rs
···
1
-
use super::validation::validate_record;
2
use super::write::has_verified_comms_channel;
3
use crate::api::repo::record::utils::{CommitParams, RecordOp, commit_and_log, extract_blob_cids};
4
use crate::delegation::{self, DelegationActionType};
···
304
value,
305
} => {
306
if input.validate.unwrap_or(true)
307
-
&& let Err(err_response) = validate_record(value, collection)
0
308
{
309
return *err_response;
310
}
···
357
value,
358
} => {
359
if input.validate.unwrap_or(true)
360
-
&& let Err(err_response) = validate_record(value, collection)
0
361
{
362
return *err_response;
363
}
···
1
+
use super::validation::validate_record_with_rkey;
2
use super::write::has_verified_comms_channel;
3
use crate::api::repo::record::utils::{CommitParams, RecordOp, commit_and_log, extract_blob_cids};
4
use crate::delegation::{self, DelegationActionType};
···
304
value,
305
} => {
306
if input.validate.unwrap_or(true)
307
+
&& let Err(err_response) =
308
+
validate_record_with_rkey(value, collection, rkey.as_deref())
309
{
310
return *err_response;
311
}
···
358
value,
359
} => {
360
if input.validate.unwrap_or(true)
361
+
&& let Err(err_response) =
362
+
validate_record_with_rkey(value, collection, Some(rkey))
363
{
364
return *err_response;
365
}
+13
-1
src/api/repo/record/validation.rs
···
7
use serde_json::json;
8
9
pub fn validate_record(record: &serde_json::Value, collection: &str) -> Result<(), Box<Response>> {
0
0
0
0
0
0
0
0
10
let validator = RecordValidator::new();
11
-
match validator.validate(record, collection) {
12
Ok(_) => Ok(()),
13
Err(ValidationError::MissingType) => Err(Box::new((
14
StatusCode::BAD_REQUEST,
···
29
Err(ValidationError::InvalidDatetime { path }) => Err(Box::new((
30
StatusCode::BAD_REQUEST,
31
Json(json!({"error": "InvalidRecord", "message": format!("Invalid datetime format at '{}'", path)})),
0
0
0
0
32
).into_response())),
33
Err(e) => Err(Box::new((
34
StatusCode::BAD_REQUEST,
···
7
use serde_json::json;
8
9
pub fn validate_record(record: &serde_json::Value, collection: &str) -> Result<(), Box<Response>> {
10
+
validate_record_with_rkey(record, collection, None)
11
+
}
12
+
13
+
pub fn validate_record_with_rkey(
14
+
record: &serde_json::Value,
15
+
collection: &str,
16
+
rkey: Option<&str>,
17
+
) -> Result<(), Box<Response>> {
18
let validator = RecordValidator::new();
19
+
match validator.validate_with_rkey(record, collection, rkey) {
20
Ok(_) => Ok(()),
21
Err(ValidationError::MissingType) => Err(Box::new((
22
StatusCode::BAD_REQUEST,
···
37
Err(ValidationError::InvalidDatetime { path }) => Err(Box::new((
38
StatusCode::BAD_REQUEST,
39
Json(json!({"error": "InvalidRecord", "message": format!("Invalid datetime format at '{}'", path)})),
40
+
).into_response())),
41
+
Err(ValidationError::BannedContent { path }) => Err(Box::new((
42
+
StatusCode::BAD_REQUEST,
43
+
Json(json!({"error": "InvalidRecord", "message": format!("Unacceptable slur in record at '{}'", path)})),
44
).into_response())),
45
Err(e) => Err(Box::new((
46
StatusCode::BAD_REQUEST,
+5
-3
src/api/repo/record/write.rs
···
1
-
use super::validation::validate_record;
2
use crate::api::repo::record::utils::{CommitParams, RecordOp, commit_and_log, extract_blob_cids};
3
use crate::delegation::{self, DelegationActionType};
4
use crate::repo::tracking::TrackingBlockStore;
···
257
}
258
};
259
if input.validate.unwrap_or(true)
260
-
&& let Err(err_response) = validate_record(&input.record, &input.collection)
0
261
{
262
return *err_response;
263
}
···
480
};
481
let key = format!("{}/{}", collection_nsid, input.rkey);
482
if input.validate.unwrap_or(true)
483
-
&& let Err(err_response) = validate_record(&input.record, &input.collection)
0
484
{
485
return *err_response;
486
}
···
1
+
use super::validation::validate_record_with_rkey;
2
use crate::api::repo::record::utils::{CommitParams, RecordOp, commit_and_log, extract_blob_cids};
3
use crate::delegation::{self, DelegationActionType};
4
use crate::repo::tracking::TrackingBlockStore;
···
257
}
258
};
259
if input.validate.unwrap_or(true)
260
+
&& let Err(err_response) =
261
+
validate_record_with_rkey(&input.record, &input.collection, input.rkey.as_deref())
262
{
263
return *err_response;
264
}
···
481
};
482
let key = format!("{}/{}", collection_nsid, input.rkey);
483
if input.validate.unwrap_or(true)
484
+
&& let Err(err_response) =
485
+
validate_record_with_rkey(&input.record, &input.collection, Some(&input.rkey))
486
{
487
return *err_response;
488
}
+6
src/api/validation.rs
···
16
StartsWithInvalidChar,
17
EndsWithInvalidChar,
18
ContainsSpaces,
0
19
}
20
21
impl std::fmt::Display for HandleValidationError {
···
41
}
42
Self::EndsWithInvalidChar => write!(f, "Handle cannot end with a hyphen or underscore"),
43
Self::ContainsSpaces => write!(f, "Handle cannot contain spaces"),
0
44
}
45
}
46
}
···
80
if !c.is_ascii_alphanumeric() && c != '-' && c != '_' {
81
return Err(HandleValidationError::InvalidCharacters);
82
}
0
0
0
0
83
}
84
85
Ok(handle.to_lowercase())
···
16
StartsWithInvalidChar,
17
EndsWithInvalidChar,
18
ContainsSpaces,
19
+
BannedWord,
20
}
21
22
impl std::fmt::Display for HandleValidationError {
···
42
}
43
Self::EndsWithInvalidChar => write!(f, "Handle cannot end with a hyphen or underscore"),
44
Self::ContainsSpaces => write!(f, "Handle cannot contain spaces"),
45
+
Self::BannedWord => write!(f, "Inappropriate language in handle"),
46
}
47
}
48
}
···
82
if !c.is_ascii_alphanumeric() && c != '-' && c != '_' {
83
return Err(HandleValidationError::InvalidCharacters);
84
}
85
+
}
86
+
87
+
if crate::moderation::has_explicit_slur(handle) {
88
+
return Err(HandleValidationError::BannedWord);
89
}
90
91
Ok(handle.to_lowercase())
+1
src/lib.rs
···
10
pub mod handle;
11
pub mod image;
12
pub mod metrics;
0
13
pub mod oauth;
14
pub mod plc;
15
pub mod rate_limit;
···
10
pub mod handle;
11
pub mod image;
12
pub mod metrics;
13
+
pub mod moderation;
14
pub mod oauth;
15
pub mod plc;
16
pub mod rate_limit;
+262
src/moderation/mod.rs
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
/*
2
+
* CONTENT WARNING
3
+
*
4
+
* This file contains explicit slurs and hateful language. We're sorry you have to see them.
5
+
*
6
+
* These words exist here for one reason: to ensure our moderation system correctly blocks them.
7
+
* We can't verify the filter catches the n-word without testing against the actual word.
8
+
* Euphemisms wouldn't prove the protection works.
9
+
*
10
+
* If reading this file has caused you distress, please know:
11
+
* - you are valued and welcome in this community
12
+
* - these words do not reflect the views of this project or its contributors
13
+
* - we maintain this code precisely because we believe everyone deserves an experience on the web that is free from this kinda language
14
+
*/
15
+
16
+
use regex::Regex;
17
+
use std::sync::OnceLock;
18
+
19
+
static SLUR_REGEXES: OnceLock<Vec<Regex>> = OnceLock::new();
20
+
static EXTRA_BANNED_WORDS: OnceLock<Vec<String>> = OnceLock::new();
21
+
22
+
fn get_slur_regexes() -> &'static Vec<Regex> {
23
+
SLUR_REGEXES.get_or_init(|| {
24
+
vec![
25
+
Regex::new(r"\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][hĤĥȞȟḦḧḢḣḨḩḤḥḪḫH̱ẖĦħⱧⱨꞪɦꞕΗНн][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
26
+
Regex::new(r"\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0]{2}[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
27
+
Regex::new(r"\b[fḞḟƑƒꞘꞙᵮᶂ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa@4][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGg]{1,2}([ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeiÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ]{1,2}([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
28
+
Regex::new(r"\b[kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLlyÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]*\b").unwrap(),
29
+
Regex::new(r"\b[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeaÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ]?|n[ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]|[a4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa]?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
30
+
Regex::new(r"[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?").unwrap(),
31
+
Regex::new(r"\b[tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa4]+[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn]{1,2}([iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]|[yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
32
+
]
33
+
})
34
+
}
35
+
36
+
fn get_extra_banned_words() -> &'static Vec<String> {
37
+
EXTRA_BANNED_WORDS.get_or_init(|| {
38
+
std::env::var("PDS_BANNED_WORDS")
39
+
.unwrap_or_default()
40
+
.split(',')
41
+
.map(|s| s.trim().to_lowercase())
42
+
.filter(|s| !s.is_empty())
43
+
.collect()
44
+
})
45
+
}
46
+
47
+
fn strip_trailing_digits(s: &str) -> &str {
48
+
s.trim_end_matches(|c: char| c.is_ascii_digit())
49
+
}
50
+
51
+
fn normalize_leetspeak(s: &str) -> String {
52
+
s.chars()
53
+
.map(|c| match c {
54
+
'4' | '@' => 'a',
55
+
'3' => 'e',
56
+
'1' | '!' | '|' => 'i',
57
+
'0' => 'o',
58
+
'5' | '$' => 's',
59
+
'7' => 't',
60
+
'8' => 'b',
61
+
'9' => 'g',
62
+
_ => c,
63
+
})
64
+
.collect()
65
+
}
66
+
67
+
pub fn has_explicit_slur(text: &str) -> bool {
68
+
has_explicit_slur_with_extra_words(text, get_extra_banned_words())
69
+
}
70
+
71
+
fn has_explicit_slur_with_extra_words(text: &str, extra_words: &[String]) -> bool {
72
+
let text_lower = text.to_lowercase();
73
+
let normalized = text_lower.replace('.', "").replace('-', "").replace('_', "");
74
+
let stripped = strip_trailing_digits(&text_lower);
75
+
let normalized_stripped = strip_trailing_digits(&normalized);
76
+
77
+
let regexes = get_slur_regexes();
78
+
if regexes.iter().any(|r| {
79
+
r.is_match(&text_lower)
80
+
|| r.is_match(&normalized)
81
+
|| r.is_match(stripped)
82
+
|| r.is_match(normalized_stripped)
83
+
}) {
84
+
return true;
85
+
}
86
+
87
+
if !extra_words.is_empty() {
88
+
let leet_normalized = normalize_leetspeak(&normalized);
89
+
let leet_stripped = normalize_leetspeak(strip_trailing_digits(&leet_normalized));
90
+
if extra_words.iter().any(|w| {
91
+
text_lower.contains(w)
92
+
|| normalized.contains(w)
93
+
|| stripped.contains(w)
94
+
|| normalized_stripped.contains(w)
95
+
|| leet_normalized.contains(w)
96
+
|| leet_stripped.contains(w)
97
+
}) {
98
+
return true;
99
+
}
100
+
}
101
+
false
102
+
}
103
+
104
+
#[cfg(test)]
105
+
mod tests {
106
+
use super::*;
107
+
108
+
#[test]
109
+
fn test_chink_pattern() {
110
+
assert!(has_explicit_slur("chink"));
111
+
assert!(has_explicit_slur("chinks"));
112
+
assert!(has_explicit_slur("CHINK"));
113
+
assert!(has_explicit_slur("Chinks"));
114
+
}
115
+
116
+
#[test]
117
+
fn test_coon_pattern() {
118
+
assert!(has_explicit_slur("coon"));
119
+
assert!(has_explicit_slur("coons"));
120
+
assert!(has_explicit_slur("COON"));
121
+
}
122
+
123
+
#[test]
124
+
fn test_fag_pattern() {
125
+
assert!(has_explicit_slur("fag"));
126
+
assert!(has_explicit_slur("fags"));
127
+
assert!(has_explicit_slur("faggot"));
128
+
assert!(has_explicit_slur("faggots"));
129
+
assert!(has_explicit_slur("faggotry"));
130
+
}
131
+
132
+
#[test]
133
+
fn test_kike_pattern() {
134
+
assert!(has_explicit_slur("kike"));
135
+
assert!(has_explicit_slur("kikes"));
136
+
assert!(has_explicit_slur("KIKE"));
137
+
assert!(has_explicit_slur("kikery"));
138
+
}
139
+
140
+
#[test]
141
+
fn test_nigger_pattern() {
142
+
assert!(has_explicit_slur("nigger"));
143
+
assert!(has_explicit_slur("niggers"));
144
+
assert!(has_explicit_slur("NIGGER"));
145
+
assert!(has_explicit_slur("nigga"));
146
+
assert!(has_explicit_slur("niggas"));
147
+
}
148
+
149
+
#[test]
150
+
fn test_tranny_pattern() {
151
+
assert!(has_explicit_slur("tranny"));
152
+
assert!(has_explicit_slur("trannies"));
153
+
assert!(has_explicit_slur("TRANNY"));
154
+
}
155
+
156
+
#[test]
157
+
fn test_normalization_bypass() {
158
+
assert!(has_explicit_slur("n.i.g.g.e.r"));
159
+
assert!(has_explicit_slur("n-i-g-g-e-r"));
160
+
assert!(has_explicit_slur("n_i_g_g_e_r"));
161
+
assert!(has_explicit_slur("f.a.g"));
162
+
assert!(has_explicit_slur("f-a-g"));
163
+
assert!(has_explicit_slur("c.h.i.n.k"));
164
+
assert!(has_explicit_slur("k_i_k_e"));
165
+
}
166
+
167
+
#[test]
168
+
fn test_trailing_digits_bypass() {
169
+
assert!(has_explicit_slur("faggot123"));
170
+
assert!(has_explicit_slur("nigger69"));
171
+
assert!(has_explicit_slur("chink420"));
172
+
assert!(has_explicit_slur("fag1"));
173
+
assert!(has_explicit_slur("kike2024"));
174
+
assert!(has_explicit_slur("n_i_g_g_e_r123"));
175
+
}
176
+
177
+
#[test]
178
+
fn test_embedded_in_sentence() {
179
+
assert!(has_explicit_slur("you are a faggot"));
180
+
assert!(has_explicit_slur("stupid nigger"));
181
+
assert!(has_explicit_slur("go away chink"));
182
+
}
183
+
184
+
#[test]
185
+
fn test_safe_words_not_matched() {
186
+
assert!(!has_explicit_slur("hello"));
187
+
assert!(!has_explicit_slur("world"));
188
+
assert!(!has_explicit_slur("bluesky"));
189
+
assert!(!has_explicit_slur("tranquil"));
190
+
assert!(!has_explicit_slur("programmer"));
191
+
assert!(!has_explicit_slur("trigger"));
192
+
assert!(!has_explicit_slur("bigger"));
193
+
assert!(!has_explicit_slur("digger"));
194
+
assert!(!has_explicit_slur("figure"));
195
+
assert!(!has_explicit_slur("configure"));
196
+
}
197
+
198
+
#[test]
199
+
fn test_similar_but_safe_words() {
200
+
assert!(!has_explicit_slur("niggardly"));
201
+
assert!(!has_explicit_slur("raccoon"));
202
+
}
203
+
204
+
#[test]
205
+
fn test_empty_and_whitespace() {
206
+
assert!(!has_explicit_slur(""));
207
+
assert!(!has_explicit_slur(" "));
208
+
assert!(!has_explicit_slur("\t\n"));
209
+
}
210
+
211
+
#[test]
212
+
fn test_case_insensitive() {
213
+
assert!(has_explicit_slur("NIGGER"));
214
+
assert!(has_explicit_slur("Nigger"));
215
+
assert!(has_explicit_slur("NiGgEr"));
216
+
assert!(has_explicit_slur("FAGGOT"));
217
+
assert!(has_explicit_slur("Faggot"));
218
+
}
219
+
220
+
#[test]
221
+
fn test_leetspeak_bypass() {
222
+
assert!(has_explicit_slur("f4ggot"));
223
+
assert!(has_explicit_slur("f4gg0t"));
224
+
assert!(has_explicit_slur("n1gger"));
225
+
assert!(has_explicit_slur("n1gg3r"));
226
+
assert!(has_explicit_slur("k1ke"));
227
+
assert!(has_explicit_slur("ch1nk"));
228
+
assert!(has_explicit_slur("tr4nny"));
229
+
}
230
+
231
+
#[test]
232
+
fn test_normalize_leetspeak() {
233
+
assert_eq!(normalize_leetspeak("h3llo"), "hello");
234
+
assert_eq!(normalize_leetspeak("w0rld"), "world");
235
+
assert_eq!(normalize_leetspeak("t3$t"), "test");
236
+
assert_eq!(normalize_leetspeak("b4dw0rd"), "badword");
237
+
assert_eq!(normalize_leetspeak("l33t5p34k"), "leetspeak");
238
+
assert_eq!(normalize_leetspeak("@ss"), "ass");
239
+
assert_eq!(normalize_leetspeak("sh!t"), "shit");
240
+
assert_eq!(normalize_leetspeak("normal"), "normal");
241
+
}
242
+
243
+
#[test]
244
+
fn test_extra_banned_words() {
245
+
let extra = vec!["badword".to_string(), "offensive".to_string()];
246
+
247
+
assert!(has_explicit_slur_with_extra_words("badword", &extra));
248
+
assert!(has_explicit_slur_with_extra_words("BADWORD", &extra));
249
+
assert!(has_explicit_slur_with_extra_words("b.a.d.w.o.r.d", &extra));
250
+
assert!(has_explicit_slur_with_extra_words("b-a-d-w-o-r-d", &extra));
251
+
assert!(has_explicit_slur_with_extra_words("b_a_d_w_o_r_d", &extra));
252
+
assert!(has_explicit_slur_with_extra_words("badword123", &extra));
253
+
assert!(has_explicit_slur_with_extra_words("b4dw0rd", &extra));
254
+
assert!(has_explicit_slur_with_extra_words("b4dw0rd789", &extra));
255
+
assert!(has_explicit_slur_with_extra_words("b.4.d.w.0.r.d", &extra));
256
+
assert!(has_explicit_slur_with_extra_words("this contains badword here", &extra));
257
+
assert!(has_explicit_slur_with_extra_words("0ff3n$1v3", &extra));
258
+
259
+
assert!(!has_explicit_slur_with_extra_words("goodword", &extra));
260
+
assert!(!has_explicit_slur_with_extra_words("hello world", &extra));
261
+
}
262
+
}
+127
-22
src/validation/mod.rs
···
17
InvalidRecord(String),
18
#[error("Unknown record type: {0}")]
19
UnknownType(String),
0
0
20
}
21
22
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
···
53
record: &Value,
54
collection: &str,
55
) -> Result<ValidationStatus, ValidationError> {
0
0
0
0
0
0
0
0
0
56
let obj = record.as_object().ok_or_else(|| {
57
ValidationError::InvalidRecord("Record must be an object".to_string())
58
})?;
···
78
"app.bsky.graph.block" => self.validate_block(obj)?,
79
"app.bsky.graph.list" => self.validate_list(obj)?,
80
"app.bsky.graph.listitem" => self.validate_list_item(obj)?,
81
-
"app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
82
"app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
83
"app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
0
84
_ => {
85
if self.require_lexicon {
86
return Err(ValidationError::UnknownType(record_type.to_string()));
···
126
});
127
}
128
for (i, tag) in tags.iter().enumerate() {
129
-
if let Some(tag_str) = tag.as_str()
130
-
&& tag_str.len() > 640
131
-
{
132
-
return Err(ValidationError::InvalidField {
133
-
path: format!("tags/{}", i),
134
-
message: "Tag exceeds maximum length of 640 bytes".to_string(),
135
-
});
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
136
}
137
}
138
}
···
154
),
155
});
156
}
0
0
0
0
0
157
}
158
if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
159
let grapheme_count = description.chars().count();
···
164
"Description exceeds maximum length of 2560 characters (got {})",
165
grapheme_count
166
),
0
0
0
0
0
167
});
168
}
169
}
···
238
if !obj.contains_key("createdAt") {
239
return Err(ValidationError::MissingField("createdAt".to_string()));
240
}
241
-
if let Some(name) = obj.get("name").and_then(|v| v.as_str())
242
-
&& (name.is_empty() || name.len() > 64)
243
-
{
244
-
return Err(ValidationError::InvalidField {
245
-
path: "name".to_string(),
246
-
message: "Name must be 1-64 characters".to_string(),
247
-
});
0
0
0
0
0
248
}
249
Ok(())
250
}
···
268
fn validate_feed_generator(
269
&self,
270
obj: &serde_json::Map<String, Value>,
0
271
) -> Result<(), ValidationError> {
272
if !obj.contains_key("did") {
273
return Err(ValidationError::MissingField("did".to_string()));
···
278
if !obj.contains_key("createdAt") {
279
return Err(ValidationError::MissingField("createdAt".to_string()));
280
}
281
-
if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str())
282
-
&& (display_name.is_empty() || display_name.len() > 240)
283
-
{
284
-
return Err(ValidationError::InvalidField {
285
-
path: "displayName".to_string(),
286
-
message: "displayName must be 1-240 characters".to_string(),
287
-
});
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
288
}
289
Ok(())
290
}
···
17
InvalidRecord(String),
18
#[error("Unknown record type: {0}")]
19
UnknownType(String),
20
+
#[error("Unacceptable slur in record at {path}")]
21
+
BannedContent { path: String },
22
}
23
24
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
···
55
record: &Value,
56
collection: &str,
57
) -> Result<ValidationStatus, ValidationError> {
58
+
self.validate_with_rkey(record, collection, None)
59
+
}
60
+
61
+
pub fn validate_with_rkey(
62
+
&self,
63
+
record: &Value,
64
+
collection: &str,
65
+
rkey: Option<&str>,
66
+
) -> Result<ValidationStatus, ValidationError> {
67
let obj = record.as_object().ok_or_else(|| {
68
ValidationError::InvalidRecord("Record must be an object".to_string())
69
})?;
···
89
"app.bsky.graph.block" => self.validate_block(obj)?,
90
"app.bsky.graph.list" => self.validate_list(obj)?,
91
"app.bsky.graph.listitem" => self.validate_list_item(obj)?,
92
+
"app.bsky.feed.generator" => self.validate_feed_generator(obj, rkey)?,
93
"app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
94
"app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
95
+
"app.bsky.graph.starterpack" => self.validate_starterpack(obj)?,
96
_ => {
97
if self.require_lexicon {
98
return Err(ValidationError::UnknownType(record_type.to_string()));
···
138
});
139
}
140
for (i, tag) in tags.iter().enumerate() {
141
+
if let Some(tag_str) = tag.as_str() {
142
+
if tag_str.len() > 640 {
143
+
return Err(ValidationError::InvalidField {
144
+
path: format!("tags/{}", i),
145
+
message: "Tag exceeds maximum length of 640 bytes".to_string(),
146
+
});
147
+
}
148
+
if crate::moderation::has_explicit_slur(tag_str) {
149
+
return Err(ValidationError::BannedContent {
150
+
path: format!("tags/{}", i),
151
+
});
152
+
}
153
+
}
154
+
}
155
+
}
156
+
if let Some(facets) = obj.get("facets").and_then(|v| v.as_array()) {
157
+
for (i, facet) in facets.iter().enumerate() {
158
+
if let Some(features) = facet.get("features").and_then(|v| v.as_array()) {
159
+
for (j, feature) in features.iter().enumerate() {
160
+
let is_tag = feature
161
+
.get("$type")
162
+
.and_then(|v| v.as_str())
163
+
.is_some_and(|t| t == "app.bsky.richtext.facet#tag");
164
+
if is_tag {
165
+
if let Some(tag) = feature.get("tag").and_then(|v| v.as_str()) {
166
+
if crate::moderation::has_explicit_slur(tag) {
167
+
return Err(ValidationError::BannedContent {
168
+
path: format!("facets/{}/features/{}/tag", i, j),
169
+
});
170
+
}
171
+
}
172
+
}
173
+
}
174
}
175
}
176
}
···
192
),
193
});
194
}
195
+
if crate::moderation::has_explicit_slur(display_name) {
196
+
return Err(ValidationError::BannedContent {
197
+
path: "displayName".to_string(),
198
+
});
199
+
}
200
}
201
if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
202
let grapheme_count = description.chars().count();
···
207
"Description exceeds maximum length of 2560 characters (got {})",
208
grapheme_count
209
),
210
+
});
211
+
}
212
+
if crate::moderation::has_explicit_slur(description) {
213
+
return Err(ValidationError::BannedContent {
214
+
path: "description".to_string(),
215
});
216
}
217
}
···
286
if !obj.contains_key("createdAt") {
287
return Err(ValidationError::MissingField("createdAt".to_string()));
288
}
289
+
if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
290
+
if name.is_empty() || name.len() > 64 {
291
+
return Err(ValidationError::InvalidField {
292
+
path: "name".to_string(),
293
+
message: "Name must be 1-64 characters".to_string(),
294
+
});
295
+
}
296
+
if crate::moderation::has_explicit_slur(name) {
297
+
return Err(ValidationError::BannedContent {
298
+
path: "name".to_string(),
299
+
});
300
+
}
301
}
302
Ok(())
303
}
···
321
fn validate_feed_generator(
322
&self,
323
obj: &serde_json::Map<String, Value>,
324
+
rkey: Option<&str>,
325
) -> Result<(), ValidationError> {
326
if !obj.contains_key("did") {
327
return Err(ValidationError::MissingField("did".to_string()));
···
332
if !obj.contains_key("createdAt") {
333
return Err(ValidationError::MissingField("createdAt".to_string()));
334
}
335
+
if let Some(rkey) = rkey {
336
+
if crate::moderation::has_explicit_slur(rkey) {
337
+
return Err(ValidationError::BannedContent {
338
+
path: "rkey".to_string(),
339
+
});
340
+
}
341
+
}
342
+
if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
343
+
if display_name.is_empty() || display_name.len() > 240 {
344
+
return Err(ValidationError::InvalidField {
345
+
path: "displayName".to_string(),
346
+
message: "displayName must be 1-240 characters".to_string(),
347
+
});
348
+
}
349
+
if crate::moderation::has_explicit_slur(display_name) {
350
+
return Err(ValidationError::BannedContent {
351
+
path: "displayName".to_string(),
352
+
});
353
+
}
354
+
}
355
+
Ok(())
356
+
}
357
+
358
+
fn validate_starterpack(
359
+
&self,
360
+
obj: &serde_json::Map<String, Value>,
361
+
) -> Result<(), ValidationError> {
362
+
if !obj.contains_key("name") {
363
+
return Err(ValidationError::MissingField("name".to_string()));
364
+
}
365
+
if !obj.contains_key("createdAt") {
366
+
return Err(ValidationError::MissingField("createdAt".to_string()));
367
+
}
368
+
if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
369
+
if name.is_empty() || name.len() > 500 {
370
+
return Err(ValidationError::InvalidField {
371
+
path: "name".to_string(),
372
+
message: "name must be 1-500 characters".to_string(),
373
+
});
374
+
}
375
+
if crate::moderation::has_explicit_slur(name) {
376
+
return Err(ValidationError::BannedContent {
377
+
path: "name".to_string(),
378
+
});
379
+
}
380
+
}
381
+
if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
382
+
if description.len() > 3000 {
383
+
return Err(ValidationError::InvalidField {
384
+
path: "description".to_string(),
385
+
message: "description must be at most 3000 characters".to_string(),
386
+
});
387
+
}
388
+
if crate::moderation::has_explicit_slur(description) {
389
+
return Err(ValidationError::BannedContent {
390
+
path: "description".to_string(),
391
+
});
392
+
}
393
}
394
Ok(())
395
}
+196
tests/banned_words.rs
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
/*
2
+
* CONTENT WARNING
3
+
*
4
+
* This file contains explicit slurs and hateful language. We're sorry you have to see them.
5
+
*
6
+
* These words exist here for one reason: to ensure our moderation system correctly blocks them.
7
+
* We can't verify the filter catches the n-word without testing against the actual word.
8
+
* Euphemisms wouldn't prove the protection works.
9
+
*
10
+
* If reading this file has caused you distress, please know:
11
+
* - you are valued and welcome in this community
12
+
* - these words do not reflect the views of this project or its contributors
13
+
* - we maintain this code precisely because we believe everyone deserves an experience on the web that is free from this kinda language
14
+
*/
15
+
16
+
mod common;
17
+
mod helpers;
18
+
use common::*;
19
+
use helpers::*;
20
+
use reqwest::StatusCode;
21
+
use serde_json::json;
22
+
23
+
#[tokio::test]
24
+
async fn test_handle_with_slur_rejected() {
25
+
let client = client();
26
+
let timestamp = chrono::Utc::now().timestamp_millis();
27
+
let offensive_handle = format!("nigger{}", timestamp);
28
+
29
+
let create_payload = json!({
30
+
"handle": offensive_handle,
31
+
"email": format!("test{}@example.com", timestamp),
32
+
"password": "TestPassword123!"
33
+
});
34
+
35
+
let res = client
36
+
.post(format!(
37
+
"{}/xrpc/com.atproto.server.createAccount",
38
+
base_url().await
39
+
))
40
+
.json(&create_payload)
41
+
.send()
42
+
.await
43
+
.expect("Request failed");
44
+
45
+
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
46
+
let body: serde_json::Value = res.json().await.unwrap();
47
+
assert_eq!(body["error"], "InvalidHandle");
48
+
assert!(body["message"]
49
+
.as_str()
50
+
.unwrap_or("")
51
+
.contains("Inappropriate language"));
52
+
}
53
+
54
+
#[tokio::test]
55
+
async fn test_handle_with_normalized_slur_rejected() {
56
+
let client = client();
57
+
let timestamp = chrono::Utc::now().timestamp_millis();
58
+
let offensive_handle = format!("n-i-g-g-e-r{}", timestamp);
59
+
60
+
let create_payload = json!({
61
+
"handle": offensive_handle,
62
+
"email": format!("test{}@example.com", timestamp),
63
+
"password": "TestPassword123!"
64
+
});
65
+
66
+
let res = client
67
+
.post(format!(
68
+
"{}/xrpc/com.atproto.server.createAccount",
69
+
base_url().await
70
+
))
71
+
.json(&create_payload)
72
+
.send()
73
+
.await
74
+
.expect("Request failed");
75
+
76
+
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
77
+
let body: serde_json::Value = res.json().await.unwrap();
78
+
assert_eq!(body["error"], "InvalidHandle");
79
+
}
80
+
81
+
#[tokio::test]
82
+
async fn test_handle_update_with_slur_rejected() {
83
+
let client = client();
84
+
let (_, jwt) = setup_new_user("handleupdate").await;
85
+
86
+
let update_payload = json!({
87
+
"handle": "faggots"
88
+
});
89
+
90
+
let res = client
91
+
.post(format!(
92
+
"{}/xrpc/com.atproto.identity.updateHandle",
93
+
base_url().await
94
+
))
95
+
.bearer_auth(&jwt)
96
+
.json(&update_payload)
97
+
.send()
98
+
.await
99
+
.expect("Request failed");
100
+
101
+
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
102
+
let body: serde_json::Value = res.json().await.unwrap();
103
+
assert_eq!(body["error"], "InvalidHandle");
104
+
}
105
+
106
+
#[tokio::test]
107
+
async fn test_profile_displayname_with_slur_rejected() {
108
+
let client = client();
109
+
let (did, jwt) = setup_new_user("profileslur").await;
110
+
111
+
let profile = json!({
112
+
"repo": did,
113
+
"collection": "app.bsky.actor.profile",
114
+
"rkey": "self",
115
+
"record": {
116
+
"$type": "app.bsky.actor.profile",
117
+
"displayName": "I am a kike"
118
+
}
119
+
});
120
+
121
+
let res = client
122
+
.post(format!(
123
+
"{}/xrpc/com.atproto.repo.putRecord",
124
+
base_url().await
125
+
))
126
+
.bearer_auth(&jwt)
127
+
.json(&profile)
128
+
.send()
129
+
.await
130
+
.expect("Request failed");
131
+
132
+
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
133
+
let body: serde_json::Value = res.json().await.unwrap();
134
+
assert_eq!(body["error"], "InvalidRecord");
135
+
}
136
+
137
+
#[tokio::test]
138
+
async fn test_profile_description_with_slur_rejected() {
139
+
let client = client();
140
+
let (did, jwt) = setup_new_user("profiledesc").await;
141
+
142
+
let profile = json!({
143
+
"repo": did,
144
+
"collection": "app.bsky.actor.profile",
145
+
"rkey": "self",
146
+
"record": {
147
+
"$type": "app.bsky.actor.profile",
148
+
"displayName": "Normal Name",
149
+
"description": "I hate all chinks"
150
+
}
151
+
});
152
+
153
+
let res = client
154
+
.post(format!(
155
+
"{}/xrpc/com.atproto.repo.putRecord",
156
+
base_url().await
157
+
))
158
+
.bearer_auth(&jwt)
159
+
.json(&profile)
160
+
.send()
161
+
.await
162
+
.expect("Request failed");
163
+
164
+
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
165
+
let body: serde_json::Value = res.json().await.unwrap();
166
+
assert_eq!(body["error"], "InvalidRecord");
167
+
}
168
+
169
+
#[tokio::test]
170
+
async fn test_clean_content_allowed() {
171
+
let client = client();
172
+
let (did, jwt) = setup_new_user("cleanpost").await;
173
+
174
+
let post = json!({
175
+
"repo": did,
176
+
"collection": "app.bsky.feed.post",
177
+
"record": {
178
+
"$type": "app.bsky.feed.post",
179
+
"text": "This is a perfectly normal post about coding and technology!",
180
+
"createdAt": chrono::Utc::now().to_rfc3339()
181
+
}
182
+
});
183
+
184
+
let res = client
185
+
.post(format!(
186
+
"{}/xrpc/com.atproto.repo.createRecord",
187
+
base_url().await
188
+
))
189
+
.bearer_auth(&jwt)
190
+
.json(&post)
191
+
.send()
192
+
.await
193
+
.expect("Request failed");
194
+
195
+
assert_eq!(res.status(), StatusCode::OK);
196
+
}