Our Personal Data Server from scratch! tranquil.farm
oauth atproto pds rust postgresql objectstorage fun

feat: initial in-house cache distribution

+4668 -397
+19
.config/nextest.toml
··· 5 5 retries = 0 6 6 fail-fast = true 7 7 test-threads = "num-cpus" 8 + slow-timeout = { period = "30s", terminate-after = 4 } 8 9 9 10 [profile.ci] 10 11 retries = 2 11 12 fail-fast = false 12 13 test-threads = "num-cpus" 14 + slow-timeout = { period = "30s", terminate-after = 4 } 13 15 14 16 [test-groups] 15 17 serial-env-tests = { max-threads = 1 } 18 + heavy-load-tests = { max-threads = 4 } 16 19 17 20 [[profile.default.overrides]] 18 21 filter = "test(/import_with_verification/) | test(/plc_migration/)" 19 22 test-group = "serial-env-tests" 20 23 24 + [[profile.default.overrides]] 25 + filter = "binary(ripple_cluster)" 26 + test-group = "serial-env-tests" 27 + 28 + [[profile.default.overrides]] 29 + filter = "binary(whole_story)" 30 + test-group = "heavy-load-tests" 31 + 21 32 [[profile.ci.overrides]] 22 33 filter = "test(/import_with_verification/) | test(/plc_migration/)" 23 34 test-group = "serial-env-tests" 35 + 36 + [[profile.ci.overrides]] 37 + filter = "binary(ripple_cluster)" 38 + test-group = "serial-env-tests" 39 + 40 + [[profile.ci.overrides]] 41 + filter = "binary(whole_story)" 42 + test-group = "heavy-load-tests"
+10 -2
.env.example
··· 41 41 # BACKUP_RETENTION_COUNT=7 42 42 # BACKUP_INTERVAL_SECS=86400 43 43 # ============================================================================= 44 - # Valkey (for caching and distributed rate limiting) 44 + # Cache & Rate Limiting 45 45 # ============================================================================= 46 - # If not set, falls back to in-memory caching (single-node only) 46 + # Ripple (in-process CRDT cache) is the default. No config needed for single-node. 47 + # Set VALKEY_URL to use valkey instead (disables ripple). 47 48 # VALKEY_URL=redis://localhost:6379 49 + # 50 + # Ripple multi-node settings (only needed when clustering): 51 + # RIPPLE_BIND=0.0.0.0:7890 52 + # RIPPLE_PEERS=10.0.0.2:7890,10.0.0.3:7890 53 + # RIPPLE_MACHINE_ID=1 54 + # RIPPLE_GOSSIP_INTERVAL_MS=200 55 + # RIPPLE_CACHE_MAX_MB=256 48 56 # ============================================================================= 49 57 # Security Secrets 50 58 # =============================================================================
+70
Cargo.lock
··· 867 867 ] 868 868 869 869 [[package]] 870 + name = "bincode" 871 + version = "2.0.1" 872 + source = "registry+https://github.com/rust-lang/crates.io-index" 873 + checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" 874 + dependencies = [ 875 + "bincode_derive", 876 + "serde", 877 + "unty", 878 + ] 879 + 880 + [[package]] 881 + name = "bincode_derive" 882 + version = "2.0.1" 883 + source = "registry+https://github.com/rust-lang/crates.io-index" 884 + checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" 885 + dependencies = [ 886 + "virtue", 887 + ] 888 + 889 + [[package]] 870 890 name = "bitflags" 871 891 version = "2.10.0" 872 892 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2014 2034 version = "1.0.7" 2015 2035 source = "registry+https://github.com/rust-lang/crates.io-index" 2016 2036 checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 2037 + 2038 + [[package]] 2039 + name = "foca" 2040 + version = "1.0.0" 2041 + source = "registry+https://github.com/rust-lang/crates.io-index" 2042 + checksum = "1f59e967f3f675997e4a4a6b99d2a75148d59d64c46211b78b4f34ebb951b273" 2043 + dependencies = [ 2044 + "bincode", 2045 + "bytes", 2046 + "rand 0.9.2", 2047 + "serde", 2048 + "tracing", 2049 + ] 2017 2050 2018 2051 [[package]] 2019 2052 name = "foldhash" ··· 5957 5990 "async-trait", 5958 5991 "base64 0.22.1", 5959 5992 "redis", 5993 + "tokio-util", 5960 5994 "tracing", 5961 5995 "tranquil-infra", 5996 + "tranquil-ripple", 5962 5997 ] 5963 5998 5964 5999 [[package]] ··· 6131 6166 "tranquil-db-traits", 6132 6167 "tranquil-oauth", 6133 6168 "tranquil-repo", 6169 + "tranquil-ripple", 6134 6170 "tranquil-scopes", 6135 6171 "tranquil-storage", 6136 6172 "tranquil-types", ··· 6154 6190 ] 6155 6191 6156 6192 [[package]] 6193 + name = "tranquil-ripple" 6194 + version = "0.1.0" 6195 + dependencies = [ 6196 + "async-trait", 6197 + "backon", 6198 + "bincode", 6199 + "bytes", 6200 + "foca", 6201 + "futures", 6202 + "parking_lot", 6203 + "rand 0.9.2", 6204 + "serde", 6205 + "thiserror 2.0.17", 6206 + "tokio", 6207 + "tokio-util", 6208 + "tracing", 6209 + "tracing-subscriber", 6210 + "tranquil-infra", 6211 + "uuid", 6212 + ] 6213 + 6214 + [[package]] 6157 6215 name = "tranquil-scopes" 6158 6216 version = "0.1.0" 6159 6217 dependencies = [ ··· 6301 6359 checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" 6302 6360 6303 6361 [[package]] 6362 + name = "unty" 6363 + version = "0.0.4" 6364 + source = "registry+https://github.com/rust-lang/crates.io-index" 6365 + checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" 6366 + 6367 + [[package]] 6304 6368 name = "ureq" 6305 6369 version = "3.1.4" 6306 6370 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 6389 6453 version = "0.9.5" 6390 6454 source = "registry+https://github.com/rust-lang/crates.io-index" 6391 6455 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 6456 + 6457 + [[package]] 6458 + name = "virtue" 6459 + version = "0.0.18" 6460 + source = "registry+https://github.com/rust-lang/crates.io-index" 6461 + checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" 6392 6462 6393 6463 [[package]] 6394 6464 name = "vsimd"
+6 -1
Cargo.toml
··· 6 6 "crates/tranquil-crypto", 7 7 "crates/tranquil-storage", 8 8 "crates/tranquil-cache", 9 + "crates/tranquil-ripple", 9 10 "crates/tranquil-repo", 10 11 "crates/tranquil-scopes", 11 12 "crates/tranquil-auth", ··· 17 18 ] 18 19 19 20 [workspace.package] 20 - version = "0.1.0" 21 + version = "0.2.0" 21 22 edition = "2024" 22 23 license = "AGPL-3.0-or-later" 23 24 ··· 34 35 tranquil-comms = { path = "crates/tranquil-comms" } 35 36 tranquil-db-traits = { path = "crates/tranquil-db-traits" } 36 37 tranquil-db = { path = "crates/tranquil-db" } 38 + tranquil-ripple = { path = "crates/tranquil-ripple" } 37 39 38 40 aes-gcm = "0.10" 39 41 backon = "1" 42 + bincode = { version = "2", features = ["serde"] } 40 43 anyhow = "1.0" 41 44 async-trait = "0.1" 42 45 aws-config = "1.8" ··· 51 54 cid = "0.11" 52 55 dotenvy = "0.15" 53 56 ed25519-dalek = { version = "2.1", features = ["pkcs8"] } 57 + foca = { version = "1", features = ["bincode-codec", "tracing"] } 54 58 futures = "0.3" 55 59 futures-util = "0.3" 56 60 governor = "0.10" ··· 70 74 metrics = "0.24" 71 75 metrics-exporter-prometheus = { version = "0.16", default-features = false, features = ["http-listener"] } 72 76 multibase = "0.9" 77 + parking_lot = "0.12" 73 78 multihash = "0.19" 74 79 p256 = { version = "0.13", features = ["ecdsa"] } 75 80 p384 = { version = "0.13", features = ["ecdsa"] }
+1 -1
README.md
··· 14 14 15 15 It is a superset of the reference PDS, including: passkeys and 2FA (WebAuthn/FIDO2, TOTP, backup codes, trusted devices), SSO login and signup, did:web support (PDS-hosted subdomains or bring-your-own), multi-channel communication (email, discord, telegram, signal) for verification and alerts, granular OAuth scopes with a consent UI showing human-readable descriptions, app passwords with granular permissions (read-only, post-only, or custom scopes), account delegation (letting others manage an account with configurable permission levels), automatic backups (configurable retention and frequency, one-click restore), and a built-in web UI for account management, OAuth consent, repo browsing, and admin. 16 16 17 - The PDS itself is a single small binary with no node/npm runtime. It requires postgres and stores blobs on the local filesystem. Valkey is optional (enables distributed rate limiting for multi-node setups). 17 + The PDS itself is a single small binary with no node/npm runtime. It requires postgres. Blobs are stored on the local filesystem by default (S3 optional). Valkey is optional (supported as an alternative to the built-in cache). 18 18 19 19 ## Quick Start 20 20
+2
crates/tranquil-cache/Cargo.toml
··· 6 6 7 7 [dependencies] 8 8 tranquil-infra = { workspace = true } 9 + tranquil-ripple = { workspace = true } 9 10 10 11 async-trait = { workspace = true } 11 12 base64 = { workspace = true } 12 13 redis = { workspace = true } 14 + tokio-util = { workspace = true } 13 15 tracing = { workspace = true }
+58 -28
crates/tranquil-cache/src/lib.rs
··· 42 42 redis::cmd("SET") 43 43 .arg(key) 44 44 .arg(value) 45 - .arg("EX") 46 - .arg(ttl.as_secs() as i64) 45 + .arg("PX") 46 + .arg(ttl.as_millis().min(i64::MAX as u128) as i64) 47 47 .query_async::<()>(&mut conn) 48 48 .await 49 49 .map_err(|e| CacheError::Connection(e.to_string())) ··· 114 114 let mut conn = self.conn.clone(); 115 115 let full_key = format!("rl:{}", key); 116 116 let window_secs = window_ms.div_ceil(1000).max(1) as i64; 117 - let count: Result<i64, _> = redis::cmd("INCR") 118 - .arg(&full_key) 119 - .query_async(&mut conn) 120 - .await; 121 - let count = match count { 122 - Ok(c) => c, 117 + let result: Result<i64, _> = redis::Script::new( 118 + r"local c = redis.call('INCR', KEYS[1]) 119 + if c == 1 then redis.call('EXPIRE', KEYS[1], ARGV[1]) end 120 + if redis.call('TTL', KEYS[1]) == -1 then redis.call('EXPIRE', KEYS[1], ARGV[1]) end 121 + return c" 122 + ) 123 + .key(&full_key) 124 + .arg(window_secs) 125 + .invoke_async(&mut conn) 126 + .await; 127 + match result { 128 + Ok(count) => count <= limit as i64, 123 129 Err(e) => { 124 - tracing::warn!("Redis rate limit INCR failed: {}. Allowing request.", e); 125 - return true; 130 + tracing::warn!(error = %e, "redis rate limit script failed, allowing request"); 131 + true 126 132 } 127 - }; 128 - if count == 1 { 129 - let _: Result<bool, redis::RedisError> = redis::cmd("EXPIRE") 130 - .arg(&full_key) 131 - .arg(window_secs) 132 - .query_async(&mut conn) 133 - .await; 134 133 } 135 - count <= limit as i64 134 + } 135 + 136 + async fn peek_rate_limit_count(&self, key: &str, _window_ms: u64) -> u64 { 137 + let mut conn = self.conn.clone(); 138 + let full_key = format!("rl:{}", key); 139 + redis::cmd("GET") 140 + .arg(&full_key) 141 + .query_async::<Option<u64>>(&mut conn) 142 + .await 143 + .ok() 144 + .flatten() 145 + .unwrap_or(0) 136 146 } 137 147 } 138 148 ··· 145 155 } 146 156 } 147 157 148 - pub async fn create_cache() -> (Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>) { 149 - match std::env::var("VALKEY_URL") { 150 - Ok(url) => match ValkeyCache::new(&url).await { 158 + pub async fn create_cache( 159 + shutdown: tokio_util::sync::CancellationToken, 160 + ) -> (Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>) { 161 + if let Ok(url) = std::env::var("VALKEY_URL") { 162 + match ValkeyCache::new(&url).await { 151 163 Ok(cache) => { 152 - tracing::info!("Connected to Valkey cache at {}", url); 164 + tracing::info!("using valkey cache at {url}"); 153 165 let rate_limiter = Arc::new(RedisRateLimiter::new(cache.connection())); 154 - (Arc::new(cache), rate_limiter) 166 + return (Arc::new(cache), rate_limiter); 155 167 } 156 168 Err(e) => { 157 - tracing::warn!("Failed to connect to Valkey: {}. Running without cache.", e); 158 - (Arc::new(NoOpCache), Arc::new(NoOpRateLimiter)) 169 + tracing::warn!("failed to connect to valkey: {e}. falling back to ripple."); 170 + } 171 + } 172 + } 173 + 174 + match tranquil_ripple::RippleConfig::from_env() { 175 + Ok(config) => { 176 + let peer_count = config.seed_peers.len(); 177 + match tranquil_ripple::RippleEngine::start(config, shutdown).await { 178 + Ok((cache, rate_limiter, _bound_addr)) => { 179 + match peer_count { 180 + 0 => tracing::info!("ripple cache started (single-node)"), 181 + n => tracing::info!("ripple cache started ({n} seed peers)"), 182 + } 183 + (cache, rate_limiter) 184 + } 185 + Err(e) => { 186 + tracing::error!("ripple engine failed to start: {e}. running without cache."); 187 + (Arc::new(NoOpCache), Arc::new(NoOpRateLimiter)) 188 + } 159 189 } 160 - }, 161 - Err(_) => { 162 - tracing::info!("VALKEY_URL not set. Running without cache."); 190 + } 191 + Err(e) => { 192 + tracing::error!("ripple config error: {e}. running without cache."); 163 193 (Arc::new(NoOpCache), Arc::new(NoOpRateLimiter)) 164 194 } 165 195 }
+3
crates/tranquil-infra/src/lib.rs
··· 81 81 #[async_trait] 82 82 pub trait DistributedRateLimiter: Send + Sync { 83 83 async fn check_rate_limit(&self, key: &str, limit: u32, window_ms: u64) -> bool; 84 + async fn peek_rate_limit_count(&self, _key: &str, _window_ms: u64) -> u64 { 85 + 0 86 + } 84 87 }
+1
crates/tranquil-pds/Cargo.toml
··· 89 89 ctor = { workspace = true } 90 90 testcontainers = { workspace = true } 91 91 testcontainers-modules = { workspace = true } 92 + tranquil-ripple = { workspace = true } 92 93 wiremock = { workspace = true }
+23 -16
crates/tranquil-pds/src/api/repo/blob.rs
··· 30 30 ); 31 31 } 32 32 detected 33 - } else if client_hint == "*/*" || client_hint.is_empty() { 34 - warn!( 35 - "Could not detect MIME type and client sent invalid hint: '{}'", 36 - client_hint 37 - ); 38 - "application/octet-stream".to_string() 39 33 } else { 40 - client_hint.to_string() 34 + match client_hint { 35 + "" | "*/*" => "application/octet-stream".to_string(), 36 + hint if hint.starts_with("text/html") || hint.starts_with("application/xhtml") => { 37 + "application/octet-stream".to_string() 38 + } 39 + hint => hint.to_string(), 40 + } 41 41 } 42 42 } 43 43 ··· 85 85 .user_repo 86 86 .get_id_by_did(&did) 87 87 .await 88 - .ok() 89 - .flatten() 88 + .log_db_err("fetching user id for blob upload")? 90 89 .ok_or(ApiError::InternalError(None))?; 91 90 92 91 let temp_key = format!("temp/{}", uuid::Uuid::new_v4()); ··· 136 135 }; 137 136 let cid = Cid::new_v1(0x55, multihash); 138 137 let cid_str = cid.to_string(); 139 - let cid_link: CidLink = unsafe { CidLink::new_unchecked(&cid_str) }; 138 + let cid_link: CidLink = CidLink::new(&cid_str).map_err(|e| { 139 + error!("Failed to construct CidLink from computed CID: {:?}", e); 140 + ApiError::InternalError(Some("Failed to construct CID".into())) 141 + })?; 140 142 let storage_key = cid_str.clone(); 141 143 142 144 info!( ··· 144 146 size, cid_str 145 147 ); 146 148 147 - let was_inserted = match state 149 + match state 148 150 .blob_repo 149 151 .insert_blob(&cid_link, &mime_type, size as i64, user_id, &storage_key) 150 152 .await 151 153 { 152 - Ok(Some(_)) => true, 153 - Ok(None) => false, 154 + Ok(_) => {} 154 155 Err(e) => { 155 156 let _ = state.blob_store.delete(&temp_key).await; 156 157 error!("Failed to insert blob record: {:?}", e); ··· 158 159 } 159 160 }; 160 161 161 - if was_inserted && let Err(e) = state.blob_store.copy(&temp_key, &storage_key).await { 162 + if let Err(e) = state.blob_store.copy(&temp_key, &storage_key).await { 162 163 let _ = state.blob_store.delete(&temp_key).await; 164 + if let Err(db_err) = state.blob_repo.delete_blob_by_cid(&cid_link).await { 165 + error!("Failed to clean up orphaned blob record after copy failure: {:?}", db_err); 166 + } 163 167 error!("Failed to copy blob to final location: {:?}", e); 164 168 return Err(ApiError::InternalError(Some("Failed to store blob".into()))); 165 169 } ··· 167 171 let _ = state.blob_store.delete(&temp_key).await; 168 172 169 173 if let Some(ref controller) = controller_did { 170 - let _ = state 174 + if let Err(e) = state 171 175 .delegation_repo 172 176 .log_delegation_action( 173 177 &did, ··· 182 186 None, 183 187 None, 184 188 ) 185 - .await; 189 + .await 190 + { 191 + warn!("Failed to log delegation action for blob upload: {:?}", e); 192 + } 186 193 } 187 194 188 195 Ok(Json(json!({
+40 -16
crates/tranquil-pds/src/state.rs
··· 13 13 use sqlx::PgPool; 14 14 use std::error::Error; 15 15 use std::sync::Arc; 16 + use std::sync::atomic::{AtomicBool, Ordering}; 16 17 use tokio::sync::broadcast; 17 18 use tokio_util::sync::CancellationToken; 18 19 use tranquil_db::{ ··· 20 21 OAuthRepository, PostgresRepositories, RepoEventNotifier, RepoRepository, SessionRepository, 21 22 SsoRepository, UserRepository, 22 23 }; 24 + 25 + static RATE_LIMITING_DISABLED: AtomicBool = AtomicBool::new(false); 26 + 27 + pub fn init_rate_limit_override() { 28 + let disabled = std::env::var("DISABLE_RATE_LIMITING").is_ok(); 29 + RATE_LIMITING_DISABLED.store(disabled, Ordering::Relaxed); 30 + if disabled { 31 + tracing::warn!("rate limiting is DISABLED via DISABLE_RATE_LIMITING env var"); 32 + } 33 + } 23 34 24 35 #[derive(Clone)] 25 36 pub struct AppState { ··· 173 184 174 185 pub async fn from_db(db: PgPool, shutdown: CancellationToken) -> Self { 175 186 AuthConfig::init(); 187 + init_rate_limit_override(); 176 188 177 189 let repos = Arc::new(PostgresRepositories::new(db.clone())); 178 190 let block_store = PostgresBlockStore::new(db); ··· 188 200 let rate_limiters = Arc::new(RateLimiters::new()); 189 201 let repo_write_locks = Arc::new(RepoWriteLocks::new()); 190 202 let circuit_breakers = Arc::new(CircuitBreakers::new()); 191 - let (cache, distributed_rate_limiter) = create_cache().await; 203 + let (cache, distributed_rate_limiter) = create_cache(shutdown.clone()).await; 192 204 let did_resolver = Arc::new(DidResolver::new()); 193 205 let sso_config = SsoConfig::init(); 194 206 let sso_manager = SsoManager::from_config(sso_config); ··· 231 243 self 232 244 } 233 245 246 + pub fn with_cache( 247 + mut self, 248 + cache: Arc<dyn Cache>, 249 + distributed_rate_limiter: Arc<dyn DistributedRateLimiter>, 250 + ) -> Self { 251 + self.cache = cache; 252 + self.distributed_rate_limiter = distributed_rate_limiter; 253 + self 254 + } 255 + 234 256 pub fn with_circuit_breakers(mut self, circuit_breakers: CircuitBreakers) -> Self { 235 257 self.circuit_breakers = Arc::new(circuit_breakers); 236 258 self 237 259 } 238 260 239 261 pub async fn check_rate_limit(&self, kind: RateLimitKind, client_ip: &str) -> bool { 240 - if std::env::var("DISABLE_RATE_LIMITING").is_ok() { 262 + if RATE_LIMITING_DISABLED.load(Ordering::Relaxed) { 241 263 return true; 242 264 } 243 265 244 - let key = format!("{}:{}", kind.key_prefix(), client_ip); 245 266 let limiter_name = kind.key_prefix(); 246 - let (limit, window_ms) = kind.limit_and_window_ms(); 247 - 248 - if !self 249 - .distributed_rate_limiter 250 - .check_rate_limit(&key, limit, window_ms) 251 - .await 252 - { 253 - crate::metrics::record_rate_limit_rejection(limiter_name); 254 - return false; 255 - } 256 267 257 268 let limiter = match kind { 258 269 RateLimitKind::Login => &self.rate_limiters.login, ··· 277 288 RateLimitKind::HandleVerification => &self.rate_limiters.handle_verification, 278 289 }; 279 290 280 - let ok = limiter.check_key(&client_ip.to_string()).is_ok(); 281 - if !ok { 291 + if limiter.check_key(&client_ip.to_string()).is_err() { 292 + crate::metrics::record_rate_limit_rejection(limiter_name); 293 + return false; 294 + } 295 + 296 + let key = format!("{}:{}", kind.key_prefix(), client_ip); 297 + let (limit, window_ms) = kind.limit_and_window_ms(); 298 + 299 + if !self 300 + .distributed_rate_limiter 301 + .check_rate_limit(&key, limit, window_ms) 302 + .await 303 + { 282 304 crate::metrics::record_rate_limit_rejection(limiter_name); 305 + return false; 283 306 } 284 - ok 307 + 308 + true 285 309 } 286 310 }
+1 -1
crates/tranquil-pds/tests/admin_search.rs
··· 12 12 let (user_did, _) = setup_new_user("search-target").await; 13 13 let mut found = false; 14 14 let mut cursor: Option<String> = None; 15 - for _ in 0..10 { 15 + for _ in 0..100 { 16 16 let url = match &cursor { 17 17 Some(c) => format!( 18 18 "{}/xrpc/com.atproto.admin.searchAccounts?limit=100&cursor={}",
+270 -72
crates/tranquil-pds/tests/common/mod.rs
··· 9 9 use serde_json::{Value, json}; 10 10 use sqlx::postgres::PgPoolOptions; 11 11 use std::collections::HashMap; 12 + use std::net::SocketAddr; 12 13 use std::path::PathBuf; 13 14 use std::sync::{Arc, OnceLock, RwLock}; 14 15 #[allow(unused_imports)] 15 16 use std::time::Duration; 16 17 use tokio::net::TcpListener; 17 18 use tokio_util::sync::CancellationToken; 19 + use tranquil_pds::cache::{Cache, DistributedRateLimiter}; 18 20 use tranquil_pds::state::AppState; 19 21 use wiremock::matchers::{method, path}; 20 22 use wiremock::{Mock, MockServer, Request, Respond, ResponseTemplate}; ··· 25 27 static MOCK_PLC: OnceLock<MockServer> = OnceLock::new(); 26 28 static TEST_DB_POOL: OnceLock<sqlx::PgPool> = OnceLock::new(); 27 29 static TEST_TEMP_DIR: OnceLock<PathBuf> = OnceLock::new(); 30 + static CLUSTER: OnceLock<Vec<ServerInstance>> = OnceLock::new(); 31 + 32 + #[allow(dead_code)] 33 + pub struct ServerConfig { 34 + pub pool: sqlx::PgPool, 35 + pub cache: Option<(Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>)>, 36 + } 37 + 38 + #[allow(dead_code)] 39 + #[derive(Clone)] 40 + pub struct ServerInstance { 41 + pub url: String, 42 + pub port: u16, 43 + pub cache: Option<Arc<dyn Cache>>, 44 + pub distributed_rate_limiter: Option<Arc<dyn DistributedRateLimiter>>, 45 + } 28 46 29 47 #[cfg(all(not(feature = "external-infra"), feature = "s3-storage"))] 30 48 use testcontainers::GenericImage; ··· 139 157 std::env::var("DATABASE_URL").expect("DATABASE_URL must be set when using external infra"); 140 158 let plc_url = setup_mock_plc_directory().await; 141 159 unsafe { 142 - if std::env::var("S3_ENDPOINT").is_ok() { 143 - let s3_endpoint = std::env::var("S3_ENDPOINT").unwrap(); 144 - std::env::set_var("BLOB_STORAGE_BACKEND", "s3"); 145 - std::env::set_var("BACKUP_STORAGE_BACKEND", "s3"); 146 - std::env::set_var("BACKUP_S3_BUCKET", "test-backups"); 147 - std::env::set_var( 148 - "S3_BUCKET", 149 - std::env::var("S3_BUCKET").unwrap_or_else(|_| "test-bucket".to_string()), 150 - ); 151 - std::env::set_var( 152 - "AWS_ACCESS_KEY_ID", 153 - std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "minioadmin".to_string()), 154 - ); 155 - std::env::set_var( 156 - "AWS_SECRET_ACCESS_KEY", 157 - std::env::var("AWS_SECRET_ACCESS_KEY").unwrap_or_else(|_| "minioadmin".to_string()), 158 - ); 159 - std::env::set_var( 160 - "AWS_REGION", 161 - std::env::var("AWS_REGION").unwrap_or_else(|_| "us-east-1".to_string()), 162 - ); 163 - std::env::set_var("S3_ENDPOINT", &s3_endpoint); 164 - } else if std::env::var("BLOB_STORAGE_PATH").is_ok() { 165 - std::env::set_var("BLOB_STORAGE_BACKEND", "filesystem"); 166 - std::env::set_var("BACKUP_STORAGE_BACKEND", "filesystem"); 167 - } else { 168 - panic!("Either S3_ENDPOINT or BLOB_STORAGE_PATH must be set for external-infra"); 169 - } 170 - std::env::set_var("MAX_IMPORT_SIZE", "100000000"); 171 - std::env::set_var("SKIP_IMPORT_VERIFICATION", "true"); 160 + configure_external_storage_env(); 172 161 std::env::set_var("PLC_DIRECTORY_URL", &plc_url); 173 162 } 174 - let mock_server = MockServer::start().await; 175 - setup_mock_appview(&mock_server).await; 176 - let mock_uri = mock_server.uri(); 177 - let mock_host = mock_uri.strip_prefix("http://").unwrap_or(&mock_uri); 178 - let mock_did = format!("did:web:{}", mock_host.replace(':', "%3A")); 179 - setup_mock_did_document(&mock_server, &mock_did, &mock_uri).await; 180 - MOCK_APPVIEW.set(mock_server).ok(); 163 + register_mock_appview().await; 181 164 spawn_app(database_url).await 182 165 } 183 166 ··· 199 182 std::env::set_var("SKIP_IMPORT_VERIFICATION", "true"); 200 183 std::env::set_var("PLC_DIRECTORY_URL", &plc_url); 201 184 } 202 - let mock_server = MockServer::start().await; 203 - setup_mock_appview(&mock_server).await; 204 - let mock_uri = mock_server.uri(); 205 - let mock_host = mock_uri.strip_prefix("http://").unwrap_or(&mock_uri); 206 - let mock_did = format!("did:web:{}", mock_host.replace(':', "%3A")); 207 - setup_mock_did_document(&mock_server, &mock_did, &mock_uri).await; 208 - MOCK_APPVIEW.set(mock_server).ok(); 185 + register_mock_appview().await; 209 186 let container = Postgres::default() 210 187 .with_tag("18-alpine") 211 188 .with_label("tranquil_pds_test", "true") ··· 275 252 .bucket("test-backups") 276 253 .send() 277 254 .await; 278 - let mock_server = MockServer::start().await; 279 - setup_mock_appview(&mock_server).await; 280 - let mock_uri = mock_server.uri(); 281 - let mock_host = mock_uri.strip_prefix("http://").unwrap_or(&mock_uri); 282 - let mock_did = format!("did:web:{}", mock_host.replace(':', "%3A")); 283 - setup_mock_did_document(&mock_server, &mock_did, &mock_uri).await; 284 - MOCK_APPVIEW.set(mock_server).ok(); 255 + register_mock_appview().await; 285 256 S3_CONTAINER.set(s3_container).ok(); 286 257 let container = Postgres::default() 287 258 .with_tag("18-alpine") ··· 324 295 325 296 async fn setup_mock_appview(_mock_server: &MockServer) {} 326 297 298 + async fn register_mock_appview() { 299 + let mock_server = MockServer::start().await; 300 + setup_mock_appview(&mock_server).await; 301 + let mock_uri = mock_server.uri(); 302 + let mock_host = mock_uri.strip_prefix("http://").unwrap_or(&mock_uri); 303 + let mock_did = format!("did:web:{}", mock_host.replace(':', "%3A")); 304 + setup_mock_did_document(&mock_server, &mock_did, &mock_uri).await; 305 + MOCK_APPVIEW.set(mock_server).ok(); 306 + } 307 + 308 + unsafe fn configure_external_storage_env() { 309 + unsafe { 310 + if std::env::var("S3_ENDPOINT").is_ok() { 311 + let s3_endpoint = std::env::var("S3_ENDPOINT").unwrap(); 312 + std::env::set_var("BLOB_STORAGE_BACKEND", "s3"); 313 + std::env::set_var("BACKUP_STORAGE_BACKEND", "s3"); 314 + std::env::set_var("BACKUP_S3_BUCKET", "test-backups"); 315 + std::env::set_var( 316 + "S3_BUCKET", 317 + std::env::var("S3_BUCKET").unwrap_or_else(|_| "test-bucket".to_string()), 318 + ); 319 + std::env::set_var( 320 + "AWS_ACCESS_KEY_ID", 321 + std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "minioadmin".to_string()), 322 + ); 323 + std::env::set_var( 324 + "AWS_SECRET_ACCESS_KEY", 325 + std::env::var("AWS_SECRET_ACCESS_KEY").unwrap_or_else(|_| "minioadmin".to_string()), 326 + ); 327 + std::env::set_var( 328 + "AWS_REGION", 329 + std::env::var("AWS_REGION").unwrap_or_else(|_| "us-east-1".to_string()), 330 + ); 331 + std::env::set_var("S3_ENDPOINT", &s3_endpoint); 332 + } else { 333 + let process_dir = std::env::temp_dir().join(format!( 334 + "tranquil-pds-test-{}", 335 + std::process::id() 336 + )); 337 + let blob_path = process_dir.join("blobs"); 338 + let backup_path = process_dir.join("backups"); 339 + std::fs::create_dir_all(&blob_path).expect("Failed to create blob directory"); 340 + std::fs::create_dir_all(&backup_path).expect("Failed to create backup directory"); 341 + TEST_TEMP_DIR.set(process_dir).ok(); 342 + std::env::set_var("BLOB_STORAGE_BACKEND", "filesystem"); 343 + std::env::set_var("BLOB_STORAGE_PATH", blob_path.to_str().unwrap()); 344 + std::env::set_var("BACKUP_STORAGE_BACKEND", "filesystem"); 345 + std::env::set_var("BACKUP_STORAGE_PATH", backup_path.to_str().unwrap()); 346 + } 347 + std::env::set_var("MAX_IMPORT_SIZE", "100000000"); 348 + std::env::set_var("SKIP_IMPORT_VERIFICATION", "true"); 349 + } 350 + } 351 + 327 352 type PlcOperationStore = Arc<RwLock<HashMap<String, Value>>>; 328 353 329 354 struct PlcPostResponder { ··· 515 540 plc_url 516 541 } 517 542 518 - async fn spawn_app(database_url: String) -> String { 543 + async fn spawn_server(config: ServerConfig) -> ServerInstance { 519 544 use tranquil_pds::rate_limit::RateLimiters; 520 - let pool = PgPoolOptions::new() 521 - .max_connections(10) 522 - .acquire_timeout(std::time::Duration::from_secs(30)) 523 - .connect(&database_url) 524 - .await 525 - .expect("Failed to connect to Postgres. Make sure the database is running."); 526 - sqlx::migrate!("./migrations") 527 - .run(&pool) 528 - .await 529 - .expect("Failed to run migrations"); 530 - let test_pool = PgPoolOptions::new() 531 - .max_connections(5) 532 - .acquire_timeout(std::time::Duration::from_secs(30)) 533 - .connect(&database_url) 534 - .await 535 - .expect("Failed to create test pool"); 536 - TEST_DB_POOL.set(test_pool).ok(); 537 545 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); 538 546 let addr = listener.local_addr().unwrap(); 539 - APP_PORT.set(addr.port()).ok(); 540 547 unsafe { 541 548 std::env::set_var("PDS_HOSTNAME", format!("pds.test:{}", addr.port())); 542 549 } ··· 547 554 .with_email_update_limit(10000) 548 555 .with_oauth_authorize_limit(10000) 549 556 .with_oauth_token_limit(10000); 550 - let state = AppState::from_db(pool, CancellationToken::new()) 557 + let cache_refs = config.cache.as_ref().map(|(c, r)| (c.clone(), r.clone())); 558 + let mut state = AppState::from_db(config.pool, CancellationToken::new()) 551 559 .await 552 560 .with_rate_limiters(rate_limiters); 561 + if let Some((cache, distributed_rate_limiter)) = config.cache { 562 + state = state.with_cache(cache, distributed_rate_limiter); 563 + } 553 564 tranquil_pds::sync::listener::start_sequencer_listener(state.clone()).await; 554 565 let app = tranquil_pds::app(state); 555 566 tokio::spawn(async move { 556 567 axum::serve(listener, app).await.unwrap(); 557 568 }); 558 - format!("http://localhost:{}", addr.port()) 569 + let (cache, distributed_rate_limiter) = cache_refs 570 + .map(|(c, r)| (Some(c), Some(r))) 571 + .unwrap_or((None, None)); 572 + ServerInstance { 573 + url: format!("http://localhost:{}", addr.port()), 574 + port: addr.port(), 575 + cache, 576 + distributed_rate_limiter, 577 + } 578 + } 579 + 580 + async fn spawn_app(database_url: String) -> String { 581 + let pool = PgPoolOptions::new() 582 + .max_connections(10) 583 + .acquire_timeout(std::time::Duration::from_secs(30)) 584 + .connect(&database_url) 585 + .await 586 + .expect("Failed to connect to Postgres. Make sure the database is running."); 587 + sqlx::migrate!("./migrations") 588 + .run(&pool) 589 + .await 590 + .expect("Failed to run migrations"); 591 + let test_pool = PgPoolOptions::new() 592 + .max_connections(2) 593 + .acquire_timeout(std::time::Duration::from_secs(30)) 594 + .connect(&database_url) 595 + .await 596 + .expect("Failed to create test pool"); 597 + TEST_DB_POOL.set(test_pool).ok(); 598 + let instance = spawn_server(ServerConfig { pool, cache: None }).await; 599 + APP_PORT.set(instance.port).ok(); 600 + instance.url 601 + } 602 + 603 + #[allow(dead_code)] 604 + pub async fn spawn_cluster(database_url: String, node_count: usize) -> Vec<ServerInstance> { 605 + use tranquil_ripple::{RippleConfig, RippleEngine}; 606 + 607 + let pool = PgPoolOptions::new() 608 + .max_connections(10) 609 + .acquire_timeout(std::time::Duration::from_secs(30)) 610 + .connect(&database_url) 611 + .await 612 + .expect("Failed to connect to Postgres for cluster"); 613 + sqlx::migrate!("./migrations") 614 + .run(&pool) 615 + .await 616 + .expect("Failed to run migrations for cluster"); 617 + let test_pool = PgPoolOptions::new() 618 + .max_connections(2) 619 + .acquire_timeout(std::time::Duration::from_secs(30)) 620 + .connect(&database_url) 621 + .await 622 + .expect("Failed to create test pool for cluster"); 623 + TEST_DB_POOL.set(test_pool).ok(); 624 + 625 + let shutdown = CancellationToken::new(); 626 + 627 + let mut ripple_nodes: Vec<(Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>)> = 628 + Vec::with_capacity(node_count); 629 + let mut bound_addrs: Vec<SocketAddr> = Vec::with_capacity(node_count); 630 + 631 + for i in 0..node_count { 632 + let config = RippleConfig { 633 + bind_addr: "127.0.0.1:0".parse().unwrap(), 634 + seed_peers: bound_addrs.clone(), 635 + machine_id: i as u64 + 1, 636 + gossip_interval_ms: 100, 637 + cache_max_bytes: 64 * 1024 * 1024, 638 + }; 639 + let (cache, rate_limiter, addr) = RippleEngine::start(config, shutdown.clone()) 640 + .await 641 + .expect("failed to start ripple node"); 642 + bound_addrs.push(addr); 643 + ripple_nodes.push((cache, rate_limiter)); 644 + } 645 + 646 + let mut instances: Vec<ServerInstance> = Vec::with_capacity(node_count); 647 + for (cache, rate_limiter) in ripple_nodes { 648 + let server_config = ServerConfig { 649 + pool: pool.clone(), 650 + cache: Some((cache, rate_limiter)), 651 + }; 652 + let instance = spawn_server(server_config).await; 653 + instances.push(instance); 654 + } 655 + 656 + let first = &instances[0]; 657 + APP_PORT.set(first.port).ok(); 658 + 659 + tokio::time::sleep(Duration::from_millis(2000)).await; 660 + 661 + instances 662 + } 663 + 664 + #[allow(dead_code)] 665 + pub async fn cluster() -> &'static [ServerInstance] { 666 + CLUSTER.get_or_init(|| { 667 + let (tx, rx) = std::sync::mpsc::channel(); 668 + std::thread::spawn(move || { 669 + unsafe { 670 + std::env::set_var("TRANQUIL_PDS_ALLOW_INSECURE_SECRETS", "1"); 671 + } 672 + if std::env::var("DOCKER_HOST").is_err() 673 + && let Ok(runtime_dir) = std::env::var("XDG_RUNTIME_DIR") 674 + { 675 + let podman_sock = std::path::Path::new(&runtime_dir).join("podman/podman.sock"); 676 + if podman_sock.exists() { 677 + unsafe { 678 + std::env::set_var( 679 + "DOCKER_HOST", 680 + format!("unix://{}", podman_sock.display()), 681 + ); 682 + } 683 + } 684 + } 685 + let rt = tokio::runtime::Runtime::new().unwrap(); 686 + rt.block_on(async move { 687 + unsafe { 688 + std::env::remove_var("DISABLE_RATE_LIMITING"); 689 + } 690 + let database_url = if has_external_infra() { 691 + setup_cluster_external_infra().await 692 + } else { 693 + setup_cluster_testcontainers().await 694 + }; 695 + let nodes = spawn_cluster(database_url, 3).await; 696 + tx.send(nodes).unwrap(); 697 + std::future::pending::<()>().await; 698 + }); 699 + }); 700 + rx.recv().expect("Failed to start test cluster") 701 + }) 702 + } 703 + 704 + async fn setup_cluster_external_infra() -> String { 705 + let database_url = 706 + std::env::var("DATABASE_URL").expect("DATABASE_URL must be set when using external infra"); 707 + let plc_url = setup_mock_plc_directory().await; 708 + unsafe { 709 + configure_external_storage_env(); 710 + std::env::set_var("PLC_DIRECTORY_URL", &plc_url); 711 + } 712 + register_mock_appview().await; 713 + database_url 714 + } 715 + 716 + #[cfg(not(feature = "external-infra"))] 717 + async fn setup_cluster_testcontainers() -> String { 718 + let temp_dir = std::env::temp_dir().join(format!("tranquil-pds-cluster-{}", uuid::Uuid::new_v4())); 719 + let blob_path = temp_dir.join("blobs"); 720 + let backup_path = temp_dir.join("backups"); 721 + std::fs::create_dir_all(&blob_path).expect("Failed to create blob temp directory"); 722 + std::fs::create_dir_all(&backup_path).expect("Failed to create backup temp directory"); 723 + TEST_TEMP_DIR.set(temp_dir).ok(); 724 + let plc_url = setup_mock_plc_directory().await; 725 + unsafe { 726 + std::env::set_var("BLOB_STORAGE_BACKEND", "filesystem"); 727 + std::env::set_var("BLOB_STORAGE_PATH", blob_path.to_str().unwrap()); 728 + std::env::set_var("BACKUP_STORAGE_BACKEND", "filesystem"); 729 + std::env::set_var("BACKUP_STORAGE_PATH", backup_path.to_str().unwrap()); 730 + std::env::set_var("MAX_IMPORT_SIZE", "100000000"); 731 + std::env::set_var("SKIP_IMPORT_VERIFICATION", "true"); 732 + std::env::set_var("PLC_DIRECTORY_URL", &plc_url); 733 + } 734 + register_mock_appview().await; 735 + let container = Postgres::default() 736 + .with_tag("18-alpine") 737 + .with_label("tranquil_pds_test", "true") 738 + .start() 739 + .await 740 + .expect("Failed to start Postgres for cluster"); 741 + let connection_string = format!( 742 + "postgres://postgres:postgres@127.0.0.1:{}", 743 + container 744 + .get_host_port_ipv4(5432) 745 + .await 746 + .expect("Failed to get port") 747 + ); 748 + DB_CONTAINER.set(container).ok(); 749 + connection_string 750 + } 751 + 752 + #[cfg(feature = "external-infra")] 753 + async fn setup_cluster_testcontainers() -> String { 754 + panic!( 755 + "Testcontainers disabled with external-infra feature. Set DATABASE_URL and BLOB_STORAGE_PATH (or S3_ENDPOINT)." 756 + ); 559 757 } 560 758 561 759 #[allow(dead_code)]
+6 -1
crates/tranquil-pds/tests/firehose_validation.rs
··· 800 800 801 801 tokio::time::sleep(std::time::Duration::from_millis(100)).await; 802 802 803 - let outdated_cursor = 1i64; 803 + let pool = get_test_db_pool().await; 804 + let max_seq: i64 = sqlx::query_scalar::<_, i64>("SELECT COALESCE(MAX(seq), 0) FROM repo_seq") 805 + .fetch_one(pool) 806 + .await 807 + .unwrap(); 808 + let outdated_cursor = (max_seq - 100).max(1); 804 809 let url = format!( 805 810 "ws://127.0.0.1:{}/xrpc/com.atproto.sync.subscribeRepos?cursor={}", 806 811 app_port(),
+4 -2
crates/tranquil-pds/tests/repo_blob.rs
··· 25 25 async fn test_upload_blob_success() { 26 26 let client = client(); 27 27 let (token, _) = create_account_and_login(&client).await; 28 + let blob_data = format!("blob-{}", uuid::Uuid::new_v4()); 28 29 let res = client 29 30 .post(format!( 30 31 "{}/xrpc/com.atproto.repo.uploadBlob", ··· 32 33 )) 33 34 .header(header::CONTENT_TYPE, "text/plain") 34 35 .bearer_auth(token) 35 - .body("This is our blob data") 36 + .body(blob_data) 36 37 .send() 37 38 .await 38 39 .expect("Failed to send request"); 39 - assert_eq!(res.status(), StatusCode::OK); 40 + let status = res.status(); 40 41 let body: Value = res.json().await.expect("Response was not valid JSON"); 42 + assert_eq!(status, StatusCode::OK, "uploadBlob failed: {body}"); 41 43 assert!(body["blob"]["ref"]["$link"].as_str().is_some()); 42 44 } 43 45
+1001
crates/tranquil-pds/tests/ripple_cluster.rs
··· 1 + mod common; 2 + 3 + use reqwest::StatusCode; 4 + use serde_json::json; 5 + use std::sync::Arc; 6 + use std::time::Duration; 7 + use tranquil_pds::cache::{Cache, DistributedRateLimiter}; 8 + 9 + async fn poll_until<F, Fut>(max_ms: u64, interval_ms: u64, check_fn: F) 10 + where 11 + F: Fn() -> Fut, 12 + Fut: std::future::Future<Output = bool>, 13 + { 14 + let deadline = tokio::time::Instant::now() + Duration::from_millis(max_ms); 15 + let interval = Duration::from_millis(interval_ms); 16 + 17 + loop { 18 + if check_fn().await { 19 + return; 20 + } 21 + if tokio::time::Instant::now() + interval > deadline { 22 + panic!("poll_until timed out after {max_ms}ms"); 23 + } 24 + tokio::time::sleep(interval).await; 25 + } 26 + } 27 + 28 + fn cache_for(nodes: &[common::ServerInstance], idx: usize) -> Arc<dyn Cache> { 29 + nodes[idx] 30 + .cache 31 + .clone() 32 + .unwrap_or_else(|| panic!("node {idx} should have a cache")) 33 + } 34 + 35 + fn rl_for(nodes: &[common::ServerInstance], idx: usize) -> Arc<dyn DistributedRateLimiter> { 36 + nodes[idx] 37 + .distributed_rate_limiter 38 + .clone() 39 + .unwrap_or_else(|| panic!("node {idx} should have a rate limiter")) 40 + } 41 + 42 + #[tokio::test] 43 + async fn cluster_formation() { 44 + let nodes = common::cluster().await; 45 + assert!(nodes.len() >= 3, "expected at least 3 cluster nodes"); 46 + 47 + let client = common::client(); 48 + let results: Vec<_> = futures::future::join_all( 49 + nodes.iter().map(|node| { 50 + let client = client.clone(); 51 + let url = node.url.clone(); 52 + async move { 53 + client 54 + .get(format!("{url}/xrpc/com.atproto.server.describeServer")) 55 + .send() 56 + .await 57 + } 58 + }) 59 + ).await; 60 + 61 + results.iter().enumerate().for_each(|(i, result)| { 62 + let resp = result.as_ref().unwrap_or_else(|e| panic!("node {i} unreachable: {e}")); 63 + assert_eq!( 64 + resp.status(), 65 + StatusCode::OK, 66 + "node {i} returned non-200 status" 67 + ); 68 + }); 69 + } 70 + 71 + #[tokio::test] 72 + async fn cluster_any_node_access() { 73 + let nodes = common::cluster().await; 74 + let client = common::client(); 75 + 76 + let handle = format!("u{}", &uuid::Uuid::new_v4().simple().to_string()[..12]); 77 + let payload = serde_json::json!({ 78 + "handle": handle, 79 + "email": format!("{handle}@example.com"), 80 + "password": "Testpass123!" 81 + }); 82 + let create_res = client 83 + .post(format!( 84 + "{}/xrpc/com.atproto.server.createAccount", 85 + nodes[0].url 86 + )) 87 + .json(&payload) 88 + .send() 89 + .await 90 + .expect("createAccount on node 0 failed"); 91 + assert_eq!(create_res.status(), StatusCode::OK); 92 + let body: serde_json::Value = create_res.json().await.expect("invalid json"); 93 + let did = body["did"].as_str().expect("no did").to_string(); 94 + let access_jwt = body["accessJwt"].as_str().expect("no accessJwt").to_string(); 95 + 96 + let pool = common::get_test_db_pool().await; 97 + let body_text: String = sqlx::query_scalar!( 98 + "SELECT body FROM comms_queue WHERE user_id = (SELECT id FROM users WHERE did = $1) AND comms_type = 'email_verification' ORDER BY created_at DESC LIMIT 1", 99 + &did 100 + ) 101 + .fetch_one(pool) 102 + .await 103 + .expect("verification code not found"); 104 + 105 + let lines: Vec<&str> = body_text.lines().collect(); 106 + let verification_code = lines 107 + .iter() 108 + .enumerate() 109 + .find(|(_, line)| line.contains("verification code is:") || line.contains("code is:")) 110 + .and_then(|(i, _)| lines.get(i + 1).map(|s| s.trim().to_string())) 111 + .or_else(|| { 112 + body_text 113 + .lines() 114 + .find(|line| line.trim().starts_with("MX")) 115 + .map(|s| s.trim().to_string()) 116 + }) 117 + .unwrap_or_else(|| body_text.clone()); 118 + 119 + let confirm_payload = serde_json::json!({ 120 + "did": did, 121 + "verificationCode": verification_code 122 + }); 123 + let confirm_res = client 124 + .post(format!( 125 + "{}/xrpc/com.atproto.server.confirmSignup", 126 + nodes[0].url 127 + )) 128 + .json(&confirm_payload) 129 + .send() 130 + .await 131 + .expect("confirmSignup failed"); 132 + 133 + let token = match confirm_res.status() { 134 + StatusCode::OK => { 135 + let confirm_body: serde_json::Value = 136 + confirm_res.json().await.expect("invalid json from confirmSignup"); 137 + confirm_body["accessJwt"] 138 + .as_str() 139 + .unwrap_or(&access_jwt) 140 + .to_string() 141 + } 142 + _ => access_jwt, 143 + }; 144 + 145 + let describe_res = client 146 + .get(format!( 147 + "{}/xrpc/com.atproto.server.getSession", 148 + nodes[1].url 149 + )) 150 + .bearer_auth(&token) 151 + .send() 152 + .await 153 + .expect("getSession on node 1 failed"); 154 + assert_eq!( 155 + describe_res.status(), 156 + StatusCode::OK, 157 + "session created on node 0 should be valid on node 1 (shared postgres)" 158 + ); 159 + let session: serde_json::Value = describe_res.json().await.expect("invalid json"); 160 + assert_eq!(session["did"].as_str().unwrap(), did); 161 + } 162 + 163 + #[tokio::test] 164 + async fn cache_convergence() { 165 + let nodes = common::cluster().await; 166 + 167 + let cache_a = nodes[0] 168 + .cache 169 + .as_ref() 170 + .expect("node 0 should have a cache"); 171 + let cache_b = nodes[1] 172 + .cache 173 + .as_ref() 174 + .expect("node 1 should have a cache"); 175 + 176 + let test_key = format!("ripple-test-{}", uuid::Uuid::new_v4()); 177 + let test_value = "converged-value"; 178 + 179 + cache_a 180 + .set(&test_key, test_value, Duration::from_secs(300)) 181 + .await 182 + .expect("cache set on node A failed"); 183 + 184 + let found_on_a = cache_a.get(&test_key).await; 185 + assert_eq!( 186 + found_on_a.as_deref(), 187 + Some(test_value), 188 + "value should be immediately readable on the originating node" 189 + ); 190 + 191 + let mut converged = false; 192 + let mut attempts = 0; 193 + let max_attempts = 50; 194 + while attempts < max_attempts { 195 + tokio::time::sleep(Duration::from_millis(200)).await; 196 + if let Some(val) = cache_b.get(&test_key).await { 197 + assert_eq!(val, test_value, "converged value should match"); 198 + converged = true; 199 + break; 200 + } 201 + attempts += 1; 202 + } 203 + 204 + assert!( 205 + converged, 206 + "cache value did not converge to node B within {}ms", 207 + max_attempts * 200 208 + ); 209 + } 210 + 211 + #[tokio::test] 212 + async fn rate_limit_convergence() { 213 + let nodes = common::cluster().await; 214 + 215 + let rl_a = nodes[0] 216 + .distributed_rate_limiter 217 + .as_ref() 218 + .expect("node 0 should have a rate limiter"); 219 + let rl_b = nodes[1] 220 + .distributed_rate_limiter 221 + .as_ref() 222 + .expect("node 1 should have a rate limiter"); 223 + 224 + let test_key = format!("rl-test-{}", uuid::Uuid::new_v4()); 225 + let limit: u32 = 100; 226 + let window_ms: u64 = 600_000; 227 + let hits_on_a: u32 = 80; 228 + 229 + let mut count = 0u32; 230 + while count < hits_on_a { 231 + let allowed = rl_a.check_rate_limit(&test_key, limit, window_ms).await; 232 + assert!( 233 + allowed, 234 + "request {count} should be allowed (limit is {limit})" 235 + ); 236 + count += 1; 237 + } 238 + 239 + let rl_b2 = rl_b.clone(); 240 + let k = test_key.clone(); 241 + poll_until(15_000, 200, move || { 242 + let rl = rl_b2.clone(); 243 + let k = k.clone(); 244 + async move { rl.peek_rate_limit_count(&k, window_ms).await >= hits_on_a as u64 } 245 + }) 246 + .await; 247 + 248 + let mut allowed_on_b = 0u32; 249 + while allowed_on_b < limit { 250 + if !rl_b.check_rate_limit(&test_key, limit, window_ms).await { 251 + break; 252 + } 253 + allowed_on_b += 1; 254 + } 255 + 256 + assert!( 257 + allowed_on_b < limit, 258 + "node B should have been rate limited after convergence, but {allowed_on_b} requests were allowed (limit={limit})" 259 + ); 260 + assert!( 261 + allowed_on_b <= limit - hits_on_a + 10, 262 + "node B allowed {allowed_on_b} requests but expected at most {} (convergence margin)", 263 + limit - hits_on_a + 10 264 + ); 265 + } 266 + 267 + #[tokio::test] 268 + async fn delete_convergence() { 269 + let nodes = common::cluster().await; 270 + let cache_0 = cache_for(nodes, 0); 271 + let cache_1 = cache_for(nodes, 1); 272 + 273 + let key = format!("del-cluster-{}", uuid::Uuid::new_v4()); 274 + 275 + cache_0 276 + .set(&key, "to-delete", Duration::from_secs(300)) 277 + .await 278 + .expect("set on node 0 failed"); 279 + 280 + let c1 = cache_1.clone(); 281 + let k = key.clone(); 282 + poll_until(10_000, 200, move || { 283 + let c = c1.clone(); 284 + let k = k.clone(); 285 + async move { c.get(&k).await.is_some() } 286 + }) 287 + .await; 288 + 289 + cache_0.delete(&key).await.expect("delete on node 0 failed"); 290 + 291 + let c1 = cache_1.clone(); 292 + let k = key.clone(); 293 + poll_until(10_000, 200, move || { 294 + let c = c1.clone(); 295 + let k = k.clone(); 296 + async move { c.get(&k).await.is_none() } 297 + }) 298 + .await; 299 + } 300 + 301 + #[tokio::test] 302 + async fn three_node_transitive_convergence() { 303 + let nodes = common::cluster().await; 304 + let cache_0 = cache_for(nodes, 0); 305 + let cache_2 = cache_for(nodes, 2); 306 + 307 + let key = format!("trans-{}", uuid::Uuid::new_v4()); 308 + 309 + cache_0 310 + .set(&key, "reaches-all", Duration::from_secs(300)) 311 + .await 312 + .expect("set on node 0 failed"); 313 + 314 + let c2 = cache_2.clone(); 315 + let k = key.clone(); 316 + poll_until(15_000, 200, move || { 317 + let c = c2.clone(); 318 + let k = k.clone(); 319 + async move { c.get(&k).await.as_deref() == Some("reaches-all") } 320 + }) 321 + .await; 322 + } 323 + 324 + #[tokio::test] 325 + async fn cluster_overwrite_conflict_resolution() { 326 + let nodes = common::cluster().await; 327 + let cache_0 = cache_for(nodes, 0); 328 + let cache_1 = cache_for(nodes, 1); 329 + let cache_2 = cache_for(nodes, 2); 330 + 331 + let key = format!("conflict-{}", uuid::Uuid::new_v4()); 332 + 333 + cache_0 334 + .set(&key, "from-node-0", Duration::from_secs(300)) 335 + .await 336 + .expect("set on node 0 failed"); 337 + 338 + cache_1 339 + .set(&key, "from-node-1", Duration::from_secs(300)) 340 + .await 341 + .expect("set on node 1 failed"); 342 + 343 + let c0 = cache_0.clone(); 344 + let c1 = cache_1.clone(); 345 + let c2 = cache_2.clone(); 346 + let k = key.clone(); 347 + poll_until(15_000, 200, move || { 348 + let c0 = c0.clone(); 349 + let c1 = c1.clone(); 350 + let c2 = c2.clone(); 351 + let k = k.clone(); 352 + async move { 353 + let (v0, v1, v2) = tokio::join!(c0.get(&k), c1.get(&k), c2.get(&k)); 354 + matches!((v0, v1, v2), (Some(a), Some(b), Some(c)) if a == b && b == c) 355 + } 356 + }) 357 + .await; 358 + 359 + let v0 = cache_0.get(&key).await.expect("node 0 should have key"); 360 + let v1 = cache_1.get(&key).await.expect("node 1 should have key"); 361 + let v2 = cache_2.get(&key).await.expect("node 2 should have key"); 362 + 363 + assert_eq!(v0, v1, "node 0 and 1 must agree"); 364 + assert_eq!(v1, v2, "node 1 and 2 must agree"); 365 + } 366 + 367 + #[tokio::test] 368 + async fn cluster_bulk_key_convergence() { 369 + let nodes = common::cluster().await; 370 + let cache_0 = cache_for(nodes, 0); 371 + let cache_1 = cache_for(nodes, 1); 372 + let cache_2 = cache_for(nodes, 2); 373 + 374 + let prefix = format!("bulk-{}", uuid::Uuid::new_v4()); 375 + 376 + futures::future::join_all((0..500).map(|i| { 377 + let cache = cache_0.clone(); 378 + let p = prefix.clone(); 379 + async move { 380 + cache 381 + .set( 382 + &format!("{p}-{i}"), 383 + &format!("v-{i}"), 384 + Duration::from_secs(300), 385 + ) 386 + .await 387 + .expect("set failed"); 388 + } 389 + })) 390 + .await; 391 + 392 + let c1 = cache_1.clone(); 393 + let p = prefix.clone(); 394 + poll_until(60_000, 500, move || { 395 + let c = c1.clone(); 396 + let p = p.clone(); 397 + async move { 398 + futures::future::join_all((0..500).map(|i| { 399 + let c = c.clone(); 400 + let p = p.clone(); 401 + async move { c.get(&format!("{p}-{i}")).await.is_some() } 402 + })) 403 + .await 404 + .into_iter() 405 + .all(|v| v) 406 + } 407 + }) 408 + .await; 409 + 410 + let spot_checks: Vec<Option<String>> = futures::future::join_all( 411 + [0, 99, 250, 499].iter().map(|&i| { 412 + let c = cache_2.clone(); 413 + let p = prefix.clone(); 414 + async move { c.get(&format!("{p}-{i}")).await } 415 + }), 416 + ) 417 + .await; 418 + 419 + spot_checks.iter().enumerate().for_each(|(idx, val)| { 420 + assert!( 421 + val.is_some(), 422 + "node 2 missing spot-check key at index {idx}" 423 + ); 424 + }); 425 + } 426 + 427 + #[tokio::test] 428 + async fn cluster_concurrent_multi_node_writes() { 429 + let nodes = common::cluster().await; 430 + let cache_0 = cache_for(nodes, 0); 431 + let cache_1 = cache_for(nodes, 1); 432 + let cache_2 = cache_for(nodes, 2); 433 + 434 + let prefix = format!("multi-{}", uuid::Uuid::new_v4()); 435 + 436 + let write_0 = { 437 + let cache = cache_0.clone(); 438 + let p = prefix.clone(); 439 + async move { 440 + futures::future::join_all((0..100).map(|i| { 441 + let cache = cache.clone(); 442 + let p = p.clone(); 443 + async move { 444 + cache 445 + .set( 446 + &format!("{p}-0-{i}"), 447 + &format!("n0-{i}"), 448 + Duration::from_secs(300), 449 + ) 450 + .await 451 + .expect("set failed"); 452 + } 453 + })) 454 + .await; 455 + } 456 + }; 457 + 458 + let write_1 = { 459 + let cache = cache_1.clone(); 460 + let p = prefix.clone(); 461 + async move { 462 + futures::future::join_all((0..100).map(|i| { 463 + let cache = cache.clone(); 464 + let p = p.clone(); 465 + async move { 466 + cache 467 + .set( 468 + &format!("{p}-1-{i}"), 469 + &format!("n1-{i}"), 470 + Duration::from_secs(300), 471 + ) 472 + .await 473 + .expect("set failed"); 474 + } 475 + })) 476 + .await; 477 + } 478 + }; 479 + 480 + let write_2 = { 481 + let cache = cache_2.clone(); 482 + let p = prefix.clone(); 483 + async move { 484 + futures::future::join_all((0..100).map(|i| { 485 + let cache = cache.clone(); 486 + let p = p.clone(); 487 + async move { 488 + cache 489 + .set( 490 + &format!("{p}-2-{i}"), 491 + &format!("n2-{i}"), 492 + Duration::from_secs(300), 493 + ) 494 + .await 495 + .expect("set failed"); 496 + } 497 + })) 498 + .await; 499 + } 500 + }; 501 + 502 + tokio::join!(write_0, write_1, write_2); 503 + 504 + let caches: Vec<Arc<dyn Cache>> = vec![cache_0.clone(), cache_1.clone(), cache_2.clone()]; 505 + 506 + futures::future::join_all(caches.iter().enumerate().map(|(ci, cache)| { 507 + let cache = cache.clone(); 508 + let p = prefix.clone(); 509 + async move { 510 + let c = cache.clone(); 511 + let p2 = p.clone(); 512 + poll_until(60_000, 500, move || { 513 + let c = c.clone(); 514 + let p = p2.clone(); 515 + async move { 516 + let checks = futures::future::join_all((0..3u8).flat_map(|node| { 517 + let c = c.clone(); 518 + let p = p.clone(); 519 + (0..100).map(move |i| { 520 + let c = c.clone(); 521 + let p = p.clone(); 522 + async move { c.get(&format!("{p}-{node}-{i}")).await.is_some() } 523 + }) 524 + })) 525 + .await; 526 + checks.into_iter().all(|v| v) 527 + } 528 + }) 529 + .await; 530 + eprintln!("node {ci} has all 300 keys"); 531 + } 532 + })) 533 + .await; 534 + } 535 + 536 + #[tokio::test] 537 + async fn cluster_rate_limit_multi_node_convergence() { 538 + let nodes = common::cluster().await; 539 + let rl_0 = rl_for(nodes, 0); 540 + let rl_1 = rl_for(nodes, 1); 541 + let rl_2 = rl_for(nodes, 2); 542 + 543 + let key = format!("rl-multi-{}", uuid::Uuid::new_v4()); 544 + let limit: u32 = 300; 545 + let window_ms: u64 = 600_000; 546 + 547 + futures::future::join_all((0..50).map(|_| { 548 + let rl = rl_0.clone(); 549 + let k = key.clone(); 550 + async move { 551 + assert!(rl.check_rate_limit(&k, limit, window_ms).await); 552 + } 553 + })) 554 + .await; 555 + 556 + futures::future::join_all((0..40).map(|_| { 557 + let rl = rl_1.clone(); 558 + let k = key.clone(); 559 + async move { 560 + assert!(rl.check_rate_limit(&k, limit, window_ms).await); 561 + } 562 + })) 563 + .await; 564 + 565 + futures::future::join_all((0..30).map(|_| { 566 + let rl = rl_2.clone(); 567 + let k = key.clone(); 568 + async move { 569 + assert!(rl.check_rate_limit(&k, limit, window_ms).await); 570 + } 571 + })) 572 + .await; 573 + 574 + let rl_peek = rl_0.clone(); 575 + let k = key.clone(); 576 + poll_until(15_000, 200, move || { 577 + let rl = rl_peek.clone(); 578 + let k = k.clone(); 579 + async move { rl.peek_rate_limit_count(&k, window_ms).await >= 120 } 580 + }) 581 + .await; 582 + 583 + let mut remaining = 0u32; 584 + loop { 585 + if !rl_0.check_rate_limit(&key, limit, window_ms).await { 586 + break; 587 + } 588 + remaining += 1; 589 + if remaining > limit { 590 + panic!("rate limiter never denied - convergence failed"); 591 + } 592 + } 593 + 594 + let expected_remaining = limit - 120; 595 + let margin = 20; 596 + assert!( 597 + remaining.abs_diff(expected_remaining) <= margin, 598 + "expected ~{expected_remaining} remaining hits, got {remaining} (margin={margin})" 599 + ); 600 + } 601 + 602 + fn create_account_on_node<'a>( 603 + client: &'a reqwest::Client, 604 + node_url: &'a str, 605 + ) -> std::pin::Pin<Box<dyn std::future::Future<Output = (String, String)> + Send + 'a>> { 606 + let url = node_url.to_string(); 607 + Box::pin(async move { 608 + let handle = format!("u{}", &uuid::Uuid::new_v4().simple().to_string()[..12]); 609 + let payload = json!({ 610 + "handle": handle, 611 + "email": format!("{handle}@example.com"), 612 + "password": "Testpass123!" 613 + }); 614 + let create_res = client 615 + .post(format!("{url}/xrpc/com.atproto.server.createAccount")) 616 + .json(&payload) 617 + .send() 618 + .await 619 + .expect("createAccount failed"); 620 + assert_eq!(create_res.status(), StatusCode::OK, "createAccount non-200"); 621 + let body: serde_json::Value = create_res.json().await.expect("invalid json"); 622 + let did = body["did"].as_str().expect("no did").to_string(); 623 + let access_jwt = body["accessJwt"].as_str().expect("no accessJwt").to_string(); 624 + 625 + let pool = common::get_test_db_pool().await; 626 + let body_text: String = sqlx::query_scalar!( 627 + "SELECT body FROM comms_queue WHERE user_id = (SELECT id FROM users WHERE did = $1) AND comms_type = 'email_verification' ORDER BY created_at DESC LIMIT 1", 628 + &did 629 + ) 630 + .fetch_one(pool) 631 + .await 632 + .expect("verification code not found"); 633 + 634 + let lines: Vec<&str> = body_text.lines().collect(); 635 + let verification_code = lines 636 + .iter() 637 + .enumerate() 638 + .find(|(_, line)| line.contains("verification code is:") || line.contains("code is:")) 639 + .and_then(|(i, _)| lines.get(i + 1).map(|s| s.trim().to_string())) 640 + .or_else(|| { 641 + body_text 642 + .lines() 643 + .find(|line| line.trim().starts_with("MX")) 644 + .map(|s| s.trim().to_string()) 645 + }) 646 + .unwrap_or_else(|| body_text.clone()); 647 + 648 + let confirm_res = client 649 + .post(format!("{url}/xrpc/com.atproto.server.confirmSignup")) 650 + .json(&json!({ "did": did, "verificationCode": verification_code })) 651 + .send() 652 + .await 653 + .expect("confirmSignup failed"); 654 + 655 + let token = match confirm_res.status() { 656 + StatusCode::OK => { 657 + let confirm_body: serde_json::Value = 658 + confirm_res.json().await.expect("invalid json from confirmSignup"); 659 + confirm_body["accessJwt"] 660 + .as_str() 661 + .unwrap_or(&access_jwt) 662 + .to_string() 663 + } 664 + _ => access_jwt, 665 + }; 666 + 667 + (token, did) 668 + }) 669 + } 670 + 671 + #[tokio::test] 672 + async fn cross_node_rate_limit_via_login() { 673 + let nodes = common::cluster().await; 674 + let client = common::client(); 675 + 676 + let uuid_bytes = uuid::Uuid::new_v4(); 677 + let b = uuid_bytes.as_bytes(); 678 + let unique_ip = format!("10.{}.{}.{}", b[0], b[1], b[2]); 679 + 680 + let statuses: Vec<StatusCode> = futures::future::join_all((0..10).map(|_| { 681 + let client = client.clone(); 682 + let url = nodes[0].url.clone(); 683 + let ip = unique_ip.clone(); 684 + async move { 685 + client 686 + .post(format!("{url}/xrpc/com.atproto.server.createSession")) 687 + .header("X-Forwarded-For", &ip) 688 + .json(&json!({ 689 + "identifier": "nonexistent@example.com", 690 + "password": "wrongpass" 691 + })) 692 + .send() 693 + .await 694 + .expect("request failed") 695 + .status() 696 + } 697 + })) 698 + .await; 699 + 700 + statuses.iter().enumerate().for_each(|(i, status)| { 701 + assert_ne!( 702 + *status, 703 + StatusCode::TOO_MANY_REQUESTS, 704 + "request {i} should not be rate limited within first 10 attempts" 705 + ); 706 + }); 707 + 708 + let rl_1 = rl_for(nodes, 1); 709 + let rl_key = format!("login:{unique_ip}"); 710 + let rl_1c = rl_1.clone(); 711 + let k = rl_key.clone(); 712 + poll_until(30_000, 200, move || { 713 + let rl = rl_1c.clone(); 714 + let k = k.clone(); 715 + async move { rl.peek_rate_limit_count(&k, 60_000).await >= 10 } 716 + }) 717 + .await; 718 + 719 + let cross_node_res = client 720 + .post(format!( 721 + "{}/xrpc/com.atproto.server.createSession", 722 + nodes[1].url 723 + )) 724 + .header("X-Forwarded-For", &unique_ip) 725 + .json(&json!({ 726 + "identifier": "nonexistent@example.com", 727 + "password": "wrongpass" 728 + })) 729 + .send() 730 + .await 731 + .expect("cross-node request failed"); 732 + 733 + assert_eq!( 734 + cross_node_res.status(), 735 + StatusCode::TOO_MANY_REQUESTS, 736 + "node 1 should rate limit after cross-node convergence of login attempts" 737 + ); 738 + } 739 + 740 + #[tokio::test] 741 + async fn cross_node_handle_resolution_from_cache() { 742 + let nodes = common::cluster().await; 743 + let client = common::client(); 744 + let cache_0 = cache_for(nodes, 0); 745 + 746 + let fake_handle = format!("cached-{}.test", uuid::Uuid::new_v4().simple()); 747 + let fake_did = format!("did:plc:cached{}", &uuid::Uuid::new_v4().simple().to_string()[..16]); 748 + 749 + cache_0 750 + .set( 751 + &format!("handle:{fake_handle}"), 752 + &fake_did, 753 + Duration::from_secs(300), 754 + ) 755 + .await 756 + .expect("cache set failed"); 757 + 758 + let cache_1 = cache_for(nodes, 1); 759 + let c1 = cache_1.clone(); 760 + let k = format!("handle:{fake_handle}"); 761 + poll_until(10_000, 200, move || { 762 + let c = c1.clone(); 763 + let k = k.clone(); 764 + async move { c.get(&k).await.is_some() } 765 + }) 766 + .await; 767 + 768 + let res = client 769 + .get(format!( 770 + "{}/xrpc/com.atproto.identity.resolveHandle?handle={}", 771 + nodes[1].url, fake_handle 772 + )) 773 + .send() 774 + .await 775 + .expect("resolveHandle request failed"); 776 + 777 + assert_eq!( 778 + res.status(), 779 + StatusCode::OK, 780 + "resolveHandle should succeed from propagated cache" 781 + ); 782 + let body: serde_json::Value = res.json().await.expect("invalid json"); 783 + assert_eq!( 784 + body["did"].as_str().unwrap(), 785 + fake_did, 786 + "resolved DID should match the cache-propagated value" 787 + ); 788 + } 789 + 790 + #[tokio::test] 791 + async fn cross_node_cache_delete_observable_via_http() { 792 + let nodes = common::cluster().await; 793 + let client = common::client(); 794 + let cache_0 = cache_for(nodes, 0); 795 + let cache_1 = cache_for(nodes, 1); 796 + 797 + let fake_handle = format!("deltest-{}.test", uuid::Uuid::new_v4().simple()); 798 + let fake_did = format!("did:plc:del{}", &uuid::Uuid::new_v4().simple().to_string()[..16]); 799 + let cache_key = format!("handle:{fake_handle}"); 800 + 801 + cache_0 802 + .set(&cache_key, &fake_did, Duration::from_secs(300)) 803 + .await 804 + .expect("cache set failed"); 805 + 806 + let c1 = cache_1.clone(); 807 + let k = cache_key.clone(); 808 + poll_until(10_000, 200, move || { 809 + let c = c1.clone(); 810 + let k = k.clone(); 811 + async move { c.get(&k).await.is_some() } 812 + }) 813 + .await; 814 + 815 + let res = client 816 + .get(format!( 817 + "{}/xrpc/com.atproto.identity.resolveHandle?handle={}", 818 + nodes[1].url, fake_handle 819 + )) 820 + .send() 821 + .await 822 + .expect("resolveHandle request failed"); 823 + assert_eq!(res.status(), StatusCode::OK, "should resolve before delete"); 824 + 825 + cache_0 826 + .delete(&cache_key) 827 + .await 828 + .expect("cache delete failed"); 829 + 830 + let c1 = cache_1.clone(); 831 + let k = cache_key.clone(); 832 + poll_until(10_000, 200, move || { 833 + let c = c1.clone(); 834 + let k = k.clone(); 835 + async move { c.get(&k).await.is_none() } 836 + }) 837 + .await; 838 + 839 + let res = client 840 + .get(format!( 841 + "{}/xrpc/com.atproto.identity.resolveHandle?handle={}", 842 + nodes[1].url, fake_handle 843 + )) 844 + .send() 845 + .await 846 + .expect("resolveHandle request failed after delete"); 847 + assert_ne!( 848 + res.status(), 849 + StatusCode::OK, 850 + "resolveHandle should fail after cache delete propagation (handle not in DB)" 851 + ); 852 + } 853 + 854 + #[tokio::test] 855 + async fn cross_node_email_update_status() { 856 + let nodes = common::cluster().await; 857 + let client = common::client(); 858 + let cache_0 = cache_for(nodes, 0); 859 + let cache_1 = cache_for(nodes, 1); 860 + 861 + let (token, did) = create_account_on_node(&client, &nodes[0].url).await; 862 + 863 + let new_email = format!("updated-{}@example.com", uuid::Uuid::new_v4().simple()); 864 + let update_res = client 865 + .post(format!( 866 + "{}/xrpc/com.atproto.server.requestEmailUpdate", 867 + nodes[0].url 868 + )) 869 + .bearer_auth(&token) 870 + .json(&json!({ "newEmail": new_email })) 871 + .send() 872 + .await 873 + .expect("requestEmailUpdate failed"); 874 + assert_eq!( 875 + update_res.status(), 876 + StatusCode::OK, 877 + "requestEmailUpdate should succeed" 878 + ); 879 + let update_body: serde_json::Value = update_res.json().await.expect("invalid json"); 880 + assert_eq!( 881 + update_body["tokenRequired"].as_bool(), 882 + Some(true), 883 + "tokenRequired should be true (email is verified after confirmSignup)" 884 + ); 885 + 886 + let cache_key = format!("email_update:{did}"); 887 + let val_on_0 = cache_0.get(&cache_key).await; 888 + assert!( 889 + val_on_0.is_some(), 890 + "email_update entry should exist on node 0 immediately after requestEmailUpdate" 891 + ); 892 + 893 + let c1 = cache_1.clone(); 894 + let k = cache_key.clone(); 895 + poll_until(10_000, 200, move || { 896 + let c = c1.clone(); 897 + let k = k.clone(); 898 + async move { c.get(&k).await.is_some() } 899 + }) 900 + .await; 901 + 902 + let status_res = client 903 + .get(format!( 904 + "{}/xrpc/_account.checkEmailUpdateStatus", 905 + nodes[1].url 906 + )) 907 + .bearer_auth(&token) 908 + .send() 909 + .await 910 + .expect("checkEmailUpdateStatus on node 1 failed"); 911 + assert_eq!( 912 + status_res.status(), 913 + StatusCode::OK, 914 + "checkEmailUpdateStatus should succeed on node 1" 915 + ); 916 + let status_body: serde_json::Value = status_res.json().await.expect("invalid json"); 917 + assert_eq!( 918 + status_body["pending"].as_bool(), 919 + Some(true), 920 + "email update should be pending on node 1 via cache propagation" 921 + ); 922 + assert_eq!( 923 + status_body["newEmail"].as_str().unwrap(), 924 + new_email, 925 + "new email should match on node 1" 926 + ); 927 + } 928 + 929 + #[tokio::test] 930 + async fn cross_node_session_revocation() { 931 + let nodes = common::cluster().await; 932 + let client = common::client(); 933 + 934 + let (token, _did) = create_account_on_node(&client, &nodes[0].url).await; 935 + 936 + let session_res = client 937 + .get(format!( 938 + "{}/xrpc/com.atproto.server.getSession", 939 + nodes[0].url 940 + )) 941 + .bearer_auth(&token) 942 + .send() 943 + .await 944 + .expect("getSession on node 0 failed"); 945 + assert_eq!( 946 + session_res.status(), 947 + StatusCode::OK, 948 + "session should be valid on node 0" 949 + ); 950 + 951 + let client2 = client.clone(); 952 + let url1 = nodes[1].url.clone(); 953 + let t = token.clone(); 954 + poll_until(15_000, 200, move || { 955 + let c = client2.clone(); 956 + let u = url1.clone(); 957 + let t = t.clone(); 958 + async move { 959 + c.get(format!("{u}/xrpc/com.atproto.server.getSession")) 960 + .bearer_auth(&t) 961 + .send() 962 + .await 963 + .map(|r| r.status() == StatusCode::OK) 964 + .unwrap_or(false) 965 + } 966 + }) 967 + .await; 968 + 969 + let delete_res = client 970 + .post(format!( 971 + "{}/xrpc/com.atproto.server.deleteSession", 972 + nodes[0].url 973 + )) 974 + .bearer_auth(&token) 975 + .send() 976 + .await 977 + .expect("deleteSession failed"); 978 + assert_eq!( 979 + delete_res.status(), 980 + StatusCode::OK, 981 + "deleteSession should succeed" 982 + ); 983 + 984 + let client3 = client.clone(); 985 + let url1 = nodes[1].url.clone(); 986 + let t = token.clone(); 987 + poll_until(15_000, 200, move || { 988 + let c = client3.clone(); 989 + let u = url1.clone(); 990 + let t = t.clone(); 991 + async move { 992 + c.get(format!("{u}/xrpc/com.atproto.server.getSession")) 993 + .bearer_auth(&t) 994 + .send() 995 + .await 996 + .map(|r| r.status() != StatusCode::OK) 997 + .unwrap_or(false) 998 + } 999 + }) 1000 + .await; 1001 + }
+27 -14
crates/tranquil-pds/tests/sync_conformance.rs
··· 160 160 161 161 set_account_takedown(&did, Some("test-takedown-ref")).await; 162 162 163 - let res = client 164 - .get(format!( 163 + let mut cursor: Option<String> = None; 164 + let mut takendown_repo: Option<Value> = None; 165 + loop { 166 + let mut url = format!( 165 167 "{}/xrpc/com.atproto.sync.listRepos?limit=1000", 166 168 base_url().await 167 - )) 168 - .send() 169 - .await 170 - .expect("Failed to send request"); 171 - 172 - assert_eq!(res.status(), StatusCode::OK); 173 - let body: Value = res.json().await.expect("Response was not valid JSON"); 174 - let repos = body["repos"].as_array().unwrap(); 175 - 176 - let takendown_repo = repos.iter().find(|r| r["did"] == did); 169 + ); 170 + if let Some(ref c) = cursor { 171 + url.push_str(&format!("&cursor={}", c)); 172 + } 173 + let res = client 174 + .get(&url) 175 + .send() 176 + .await 177 + .expect("Failed to send request"); 178 + assert_eq!(res.status(), StatusCode::OK); 179 + let body: Value = res.json().await.expect("Response was not valid JSON"); 180 + let repos = body["repos"].as_array().unwrap(); 181 + if let Some(found) = repos.iter().find(|r| r["did"] == did) { 182 + takendown_repo = Some(found.clone()); 183 + break; 184 + } 185 + match body["cursor"].as_str() { 186 + Some(c) => cursor = Some(c.to_string()), 187 + None => break, 188 + } 189 + } 177 190 assert!(takendown_repo.is_some(), "Takendown repo should be in list"); 178 191 let repo = takendown_repo.unwrap(); 179 - assert_eq!(repo["active"], false); 180 - assert_eq!(repo["status"], "takendown"); 192 + assert_eq!(repo["active"], false, "repo should be inactive: {:?}", repo); 193 + assert_eq!(repo["status"], "takendown", "repo status should be takendown: {:?}", repo); 181 194 } 182 195 183 196 #[tokio::test]
+63 -63
crates/tranquil-pds/tests/whole_story.rs
··· 486 486 let base = base_url().await; 487 487 let (did, jwt) = setup_new_user("blob-lifecycle").await; 488 488 489 - let blob1_data = b"First blob for testing lifecycle"; 489 + let blob1_data = format!("First blob for testing lifecycle {}", uuid::Uuid::new_v4()); 490 + let blob1_data = blob1_data.as_bytes(); 490 491 let upload1_res = client 491 492 .post(format!("{}/xrpc/com.atproto.repo.uploadBlob", base)) 492 493 .header(header::CONTENT_TYPE, "text/plain") ··· 500 501 let blob1 = upload1_body["blob"].clone(); 501 502 let blob1_cid = blob1["ref"]["$link"].as_str().unwrap(); 502 503 503 - let blob2_data = b"Second blob for testing lifecycle"; 504 + let blob2_data = format!("Second blob for testing lifecycle {}", uuid::Uuid::new_v4()); 505 + let blob2_data = blob2_data.as_bytes(); 504 506 let upload2_res = client 505 507 .post(format!("{}/xrpc/com.atproto.repo.uploadBlob", base)) 506 508 .header(header::CONTENT_TYPE, "text/plain") ··· 1278 1280 let (did, jwt) = setup_new_user("scale-posts").await; 1279 1281 1280 1282 let post_count = 1000; 1281 - let post_futures: Vec<_> = (0..post_count) 1283 + futures::stream::iter(0..post_count) 1282 1284 .map(|i| { 1283 1285 let client = client.clone(); 1284 1286 let base = base.to_string(); ··· 1311 1313 ); 1312 1314 } 1313 1315 }) 1314 - .collect(); 1315 - 1316 - join_all(post_futures).await; 1316 + .buffer_unordered(50) 1317 + .collect::<Vec<()>>() 1318 + .await; 1317 1319 1318 1320 let count_res = client 1319 1321 .get(format!("{}/xrpc/com.atproto.repo.listRecords", base)) ··· 1349 1351 "All posts should have unique URIs" 1350 1352 ); 1351 1353 1352 - let delete_futures: Vec<_> = all_uris 1353 - .iter() 1354 - .take(500) 1354 + futures::stream::iter(all_uris.iter().take(500)) 1355 1355 .map(|uri| { 1356 1356 let client = client.clone(); 1357 1357 let base = base.to_string(); ··· 1373 1373 assert_eq!(res.status(), StatusCode::OK); 1374 1374 } 1375 1375 }) 1376 - .collect(); 1377 - 1378 - join_all(delete_futures).await; 1376 + .buffer_unordered(50) 1377 + .collect::<Vec<()>>() 1378 + .await; 1379 1379 1380 1380 let final_count = count_records(&client, base, &jwt, &did, "app.bsky.feed.post").await; 1381 1381 assert_eq!( ··· 1396 1396 1397 1397 let users: Vec<(String, String)> = join_all(user_futures).await; 1398 1398 1399 - let follow_futures: Vec<_> = users 1399 + let follow_pairs: Vec<(String, String, String)> = users 1400 1400 .iter() 1401 1401 .enumerate() 1402 1402 .flat_map(|(i, (follower_did, follower_jwt))| { 1403 + users.iter().enumerate() 1404 + .filter(move |(j, _)| *j != i) 1405 + .map(|(_, (followee_did, _))| { 1406 + (follower_did.clone(), follower_jwt.clone(), followee_did.clone()) 1407 + }) 1408 + .collect::<Vec<_>>() 1409 + }) 1410 + .collect(); 1411 + futures::stream::iter(follow_pairs) 1412 + .map(|(follower_did, follower_jwt, followee_did)| { 1403 1413 let client = client.clone(); 1404 1414 let base = base.to_string(); 1405 - users.iter().enumerate().filter(move |(j, _)| *j != i).map({ 1406 - let client = client.clone(); 1407 - let base = base.clone(); 1408 - let follower_did = follower_did.clone(); 1409 - let follower_jwt = follower_jwt.clone(); 1410 - move |(_, (followee_did, _))| { 1411 - let client = client.clone(); 1412 - let base = base.clone(); 1413 - let follower_did = follower_did.clone(); 1414 - let follower_jwt = follower_jwt.clone(); 1415 - let followee_did = followee_did.clone(); 1416 - async move { 1417 - let rkey = format!( 1418 - "follow_{}", 1419 - &uuid::Uuid::new_v4().simple().to_string()[..12] 1420 - ); 1421 - let res = client 1422 - .post(format!("{}/xrpc/com.atproto.repo.putRecord", base)) 1423 - .bearer_auth(&follower_jwt) 1424 - .json(&json!({ 1425 - "repo": follower_did, 1426 - "collection": "app.bsky.graph.follow", 1427 - "rkey": rkey, 1428 - "record": { 1429 - "$type": "app.bsky.graph.follow", 1430 - "subject": followee_did, 1431 - "createdAt": Utc::now().to_rfc3339() 1432 - } 1433 - })) 1434 - .send() 1435 - .await 1436 - .expect("Follow failed"); 1437 - let status = res.status(); 1438 - let body: Value = res.json().await.unwrap_or_default(); 1439 - assert_eq!(status, StatusCode::OK, "Follow failed: {:?}", body); 1440 - } 1441 - } 1442 - }) 1415 + async move { 1416 + let rkey = format!( 1417 + "follow_{}", 1418 + &uuid::Uuid::new_v4().simple().to_string()[..12] 1419 + ); 1420 + let res = client 1421 + .post(format!("{}/xrpc/com.atproto.repo.putRecord", base)) 1422 + .bearer_auth(&follower_jwt) 1423 + .json(&json!({ 1424 + "repo": follower_did, 1425 + "collection": "app.bsky.graph.follow", 1426 + "rkey": rkey, 1427 + "record": { 1428 + "$type": "app.bsky.graph.follow", 1429 + "subject": followee_did, 1430 + "createdAt": Utc::now().to_rfc3339() 1431 + } 1432 + })) 1433 + .send() 1434 + .await 1435 + .expect("Follow failed"); 1436 + let status = res.status(); 1437 + let body: Value = res.json().await.unwrap_or_default(); 1438 + assert_eq!(status, StatusCode::OK, "Follow failed: {:?}", body); 1439 + } 1443 1440 }) 1444 - .collect(); 1445 - 1446 - join_all(follow_futures).await; 1441 + .buffer_unordered(50) 1442 + .collect::<Vec<()>>() 1443 + .await; 1447 1444 1448 1445 let expected_follows_per_user = user_count - 1; 1449 1446 let verify_futures: Vec<_> = users ··· 1529 1526 let (did, jwt) = setup_new_user("scale-blobs").await; 1530 1527 1531 1528 let blob_count = 300; 1532 - let blob_futures: Vec<_> = (0..blob_count) 1529 + let blobs: Vec<Value> = futures::stream::iter(0..blob_count) 1533 1530 .map(|i| { 1534 1531 let client = client.clone(); 1535 1532 let base = base.to_string(); 1536 1533 let jwt = jwt.clone(); 1537 1534 async move { 1538 - let blob_data = format!("Blob data number {} with some padding to make it realistic size for testing purposes", i); 1535 + let blob_data = format!("Blob data number {} {} with some padding to make it realistic size for testing purposes", i, uuid::Uuid::new_v4()); 1539 1536 let res = client 1540 1537 .post(format!("{}/xrpc/com.atproto.repo.uploadBlob", base)) 1541 1538 .header(header::CONTENT_TYPE, "text/plain") ··· 1549 1546 body["blob"].clone() 1550 1547 } 1551 1548 }) 1552 - .collect(); 1549 + .buffer_unordered(50) 1550 + .collect::<Vec<Value>>() 1551 + .await; 1553 1552 1554 - let blobs: Vec<Value> = join_all(blob_futures).await; 1555 - 1556 - let post_futures: Vec<_> = blobs 1553 + let blob_chunks: Vec<(usize, Vec<Value>)> = blobs 1557 1554 .chunks(3) 1558 1555 .enumerate() 1556 + .map(|(i, chunk)| (i, chunk.to_vec())) 1557 + .collect(); 1558 + futures::stream::iter(blob_chunks) 1559 1559 .map(|(i, blob_chunk)| { 1560 1560 let client = client.clone(); 1561 1561 let base = base.to_string(); ··· 1596 1596 assert_eq!(status, StatusCode::OK, "Post with blobs failed: {:?}", body); 1597 1597 } 1598 1598 }) 1599 - .collect(); 1600 - 1601 - join_all(post_futures).await; 1599 + .buffer_unordered(50) 1600 + .collect::<Vec<()>>() 1601 + .await; 1602 1602 1603 1603 let list_blobs_res = client 1604 1604 .get(format!("{}/xrpc/com.atproto.sync.listBlobs", base))
+27
crates/tranquil-ripple/Cargo.toml
··· 1 + [package] 2 + name = "tranquil-ripple" 3 + version.workspace = true 4 + edition.workspace = true 5 + license.workspace = true 6 + 7 + [dependencies] 8 + tranquil-infra = { workspace = true } 9 + 10 + async-trait = { workspace = true } 11 + backon = { workspace = true } 12 + bincode = { workspace = true } 13 + bytes = { workspace = true } 14 + foca = { workspace = true } 15 + parking_lot = { workspace = true } 16 + rand = "0.9" 17 + serde = { workspace = true } 18 + thiserror = { workspace = true } 19 + tokio = { workspace = true, features = ["net", "io-util", "sync", "time"] } 20 + tokio-util = { workspace = true } 21 + tracing = { workspace = true } 22 + 23 + [dev-dependencies] 24 + futures = { workspace = true } 25 + tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } 26 + tracing-subscriber = { workspace = true } 27 + uuid = { workspace = true }
+89
crates/tranquil-ripple/src/cache.rs
··· 1 + use crate::crdt::CrdtStore; 2 + use async_trait::async_trait; 3 + use parking_lot::RwLock; 4 + use std::sync::Arc; 5 + use std::time::Duration; 6 + use tranquil_infra::{Cache, CacheError}; 7 + 8 + pub struct RippleCache { 9 + store: Arc<RwLock<CrdtStore>>, 10 + } 11 + 12 + impl RippleCache { 13 + pub fn new(store: Arc<RwLock<CrdtStore>>) -> Self { 14 + Self { store } 15 + } 16 + } 17 + 18 + #[async_trait] 19 + impl Cache for RippleCache { 20 + async fn get(&self, key: &str) -> Option<String> { 21 + self.store 22 + .read() 23 + .cache_get(key) 24 + .and_then(|bytes| String::from_utf8(bytes).ok()) 25 + } 26 + 27 + async fn set(&self, key: &str, value: &str, ttl: Duration) -> Result<(), CacheError> { 28 + self.store 29 + .write() 30 + .cache_set(key.to_string(), value.as_bytes().to_vec(), ttl.as_millis() as u64); 31 + Ok(()) 32 + } 33 + 34 + async fn delete(&self, key: &str) -> Result<(), CacheError> { 35 + self.store.write().cache_delete(key); 36 + Ok(()) 37 + } 38 + 39 + async fn get_bytes(&self, key: &str) -> Option<Vec<u8>> { 40 + self.store.read().cache_get(key) 41 + } 42 + 43 + async fn set_bytes(&self, key: &str, value: &[u8], ttl: Duration) -> Result<(), CacheError> { 44 + self.store 45 + .write() 46 + .cache_set(key.to_string(), value.to_vec(), ttl.as_millis() as u64); 47 + Ok(()) 48 + } 49 + } 50 + 51 + #[cfg(test)] 52 + mod tests { 53 + use super::*; 54 + 55 + #[tokio::test] 56 + async fn cache_trait_roundtrip() { 57 + let store = Arc::new(RwLock::new(CrdtStore::new(1))); 58 + let cache = RippleCache::new(store); 59 + cache 60 + .set("test", "value", Duration::from_secs(60)) 61 + .await 62 + .unwrap(); 63 + assert_eq!(cache.get("test").await, Some("value".to_string())); 64 + } 65 + 66 + #[tokio::test] 67 + async fn cache_trait_bytes() { 68 + let store = Arc::new(RwLock::new(CrdtStore::new(1))); 69 + let cache = RippleCache::new(store); 70 + let data = vec![0xDE, 0xAD, 0xBE, 0xEF]; 71 + cache 72 + .set_bytes("bin", &data, Duration::from_secs(60)) 73 + .await 74 + .unwrap(); 75 + assert_eq!(cache.get_bytes("bin").await, Some(data)); 76 + } 77 + 78 + #[tokio::test] 79 + async fn cache_trait_delete() { 80 + let store = Arc::new(RwLock::new(CrdtStore::new(1))); 81 + let cache = RippleCache::new(store); 82 + cache 83 + .set("del", "x", Duration::from_secs(60)) 84 + .await 85 + .unwrap(); 86 + cache.delete("del").await.unwrap(); 87 + assert_eq!(cache.get("del").await, None); 88 + } 89 + }
+85
crates/tranquil-ripple/src/config.rs
··· 1 + use std::net::SocketAddr; 2 + 3 + fn fnv1a(data: &[u8]) -> u64 { 4 + data.iter().fold(0xcbf29ce484222325u64, |hash, &byte| { 5 + (hash ^ byte as u64).wrapping_mul(0x100000001b3) 6 + }) 7 + } 8 + 9 + #[derive(Debug, Clone)] 10 + pub struct RippleConfig { 11 + pub bind_addr: SocketAddr, 12 + pub seed_peers: Vec<SocketAddr>, 13 + pub machine_id: u64, 14 + pub gossip_interval_ms: u64, 15 + pub cache_max_bytes: usize, 16 + } 17 + 18 + fn parse_env_with_warning<T: std::str::FromStr>(var_name: &str, raw: &str) -> Option<T> { 19 + match raw.parse::<T>() { 20 + Ok(v) => Some(v), 21 + Err(_) => { 22 + tracing::warn!(var = var_name, value = raw, "invalid env var value, using default"); 23 + None 24 + } 25 + } 26 + } 27 + 28 + impl RippleConfig { 29 + pub fn from_env() -> Result<Self, RippleConfigError> { 30 + let bind_addr: SocketAddr = std::env::var("RIPPLE_BIND") 31 + .unwrap_or_else(|_| "0.0.0.0:0".into()) 32 + .parse() 33 + .map_err(|e| RippleConfigError::InvalidAddr(format!("{e}")))?; 34 + 35 + let seed_peers: Vec<SocketAddr> = std::env::var("RIPPLE_PEERS") 36 + .unwrap_or_default() 37 + .split(',') 38 + .filter(|s| !s.trim().is_empty()) 39 + .map(|s| { 40 + s.trim() 41 + .parse() 42 + .map_err(|e| RippleConfigError::InvalidAddr(format!("{s}: {e}"))) 43 + }) 44 + .collect::<Result<Vec<_>, _>>()?; 45 + 46 + let machine_id: u64 = std::env::var("RIPPLE_MACHINE_ID") 47 + .ok() 48 + .and_then(|v| parse_env_with_warning::<u64>("RIPPLE_MACHINE_ID", &v)) 49 + .unwrap_or_else(|| { 50 + let host_str = std::fs::read_to_string("/etc/hostname") 51 + .map(|s| s.trim().to_string()) 52 + .unwrap_or_else(|_| format!("pid-{}", std::process::id())); 53 + let input = format!("{host_str}:{bind_addr}:{}", std::process::id()); 54 + fnv1a(input.as_bytes()) 55 + }); 56 + 57 + let gossip_interval_ms: u64 = std::env::var("RIPPLE_GOSSIP_INTERVAL_MS") 58 + .ok() 59 + .and_then(|v| parse_env_with_warning::<u64>("RIPPLE_GOSSIP_INTERVAL_MS", &v)) 60 + .unwrap_or(200) 61 + .max(50); 62 + 63 + let cache_max_mb: usize = std::env::var("RIPPLE_CACHE_MAX_MB") 64 + .ok() 65 + .and_then(|v| parse_env_with_warning::<usize>("RIPPLE_CACHE_MAX_MB", &v)) 66 + .unwrap_or(256) 67 + .clamp(1, 16_384); 68 + 69 + let cache_max_bytes = cache_max_mb.saturating_mul(1024).saturating_mul(1024); 70 + 71 + Ok(Self { 72 + bind_addr, 73 + seed_peers, 74 + machine_id, 75 + gossip_interval_ms, 76 + cache_max_bytes, 77 + }) 78 + } 79 + } 80 + 81 + #[derive(Debug, thiserror::Error)] 82 + pub enum RippleConfigError { 83 + #[error("invalid address: {0}")] 84 + InvalidAddr(String), 85 + }
+31
crates/tranquil-ripple/src/crdt/delta.rs
··· 1 + use super::lww_map::LwwDelta; 2 + use super::g_counter::GCounterDelta; 3 + use serde::{Deserialize, Serialize}; 4 + 5 + const SCHEMA_VERSION: u8 = 1; 6 + 7 + #[derive(Debug, Clone, Serialize, Deserialize)] 8 + pub struct CrdtDelta { 9 + #[serde(default = "default_version")] 10 + pub version: u8, 11 + pub source_node: u64, 12 + pub cache_delta: Option<LwwDelta>, 13 + pub rate_limit_deltas: Vec<GCounterDelta>, 14 + } 15 + 16 + fn default_version() -> u8 { 17 + 1 18 + } 19 + 20 + impl CrdtDelta { 21 + pub fn is_empty(&self) -> bool { 22 + self.cache_delta 23 + .as_ref() 24 + .map_or(true, |d| d.entries.is_empty()) 25 + && self.rate_limit_deltas.is_empty() 26 + } 27 + 28 + pub fn is_compatible(&self) -> bool { 29 + self.version == SCHEMA_VERSION 30 + } 31 + }
+305
crates/tranquil-ripple/src/crdt/g_counter.rs
··· 1 + use serde::{Deserialize, Serialize}; 2 + use std::collections::{HashMap, HashSet}; 3 + 4 + #[derive(Debug, Clone, Serialize, Deserialize)] 5 + pub struct GCounter { 6 + pub increments: HashMap<u64, u64>, 7 + pub window_start_ms: u64, 8 + pub window_duration_ms: u64, 9 + } 10 + 11 + impl GCounter { 12 + pub fn new(window_start_ms: u64, window_duration_ms: u64) -> Self { 13 + Self { 14 + increments: HashMap::new(), 15 + window_start_ms, 16 + window_duration_ms, 17 + } 18 + } 19 + 20 + pub fn total(&self) -> u64 { 21 + self.increments 22 + .values() 23 + .copied() 24 + .fold(0u64, u64::saturating_add) 25 + } 26 + 27 + pub fn increment(&mut self, node_id: u64) { 28 + let slot = self.increments.entry(node_id).or_insert(0); 29 + *slot = slot.saturating_add(1); 30 + } 31 + 32 + pub fn merge(&mut self, other: &GCounter) -> bool { 33 + let mut changed = false; 34 + other.increments.iter().for_each(|(&node, &count)| { 35 + let slot = self.increments.entry(node).or_insert(0); 36 + let new_val = (*slot).max(count); 37 + if new_val != *slot { 38 + *slot = new_val; 39 + changed = true; 40 + } 41 + }); 42 + changed 43 + } 44 + 45 + pub fn is_expired(&self, now_wall_ms: u64) -> bool { 46 + now_wall_ms.saturating_sub(self.window_start_ms) >= self.window_duration_ms 47 + } 48 + } 49 + 50 + #[derive(Debug, Clone, Serialize, Deserialize)] 51 + pub struct GCounterDelta { 52 + pub key: String, 53 + pub counter: GCounter, 54 + } 55 + 56 + pub struct RateLimitStore { 57 + counters: HashMap<String, GCounter>, 58 + node_id: u64, 59 + dirty: HashSet<String>, 60 + } 61 + 62 + impl RateLimitStore { 63 + pub fn new(node_id: u64) -> Self { 64 + Self { 65 + counters: HashMap::new(), 66 + node_id, 67 + dirty: HashSet::new(), 68 + } 69 + } 70 + 71 + fn aligned_window_start(now_wall_ms: u64, window_ms: u64) -> u64 { 72 + (now_wall_ms / window_ms.max(1)) * window_ms.max(1) 73 + } 74 + 75 + pub fn check_and_increment( 76 + &mut self, 77 + key: &str, 78 + limit: u32, 79 + window_ms: u64, 80 + now_wall_ms: u64, 81 + ) -> bool { 82 + if window_ms == 0 { 83 + return false; 84 + } 85 + let window_start = Self::aligned_window_start(now_wall_ms, window_ms); 86 + 87 + let counter = self 88 + .counters 89 + .entry(key.to_string()) 90 + .and_modify(|c| { 91 + if c.window_start_ms != window_start { 92 + *c = GCounter::new(window_start, window_ms); 93 + } 94 + }) 95 + .or_insert_with(|| GCounter::new(window_start, window_ms)); 96 + 97 + let current = counter.total(); 98 + if current >= limit as u64 { 99 + return false; 100 + } 101 + counter.increment(self.node_id); 102 + self.dirty.insert(key.to_string()); 103 + true 104 + } 105 + 106 + pub fn merge_counter(&mut self, key: String, remote: &GCounter) -> bool { 107 + if remote.window_duration_ms == 0 { 108 + return false; 109 + } 110 + match self.counters.get_mut(&key) { 111 + Some(local) if local.window_start_ms == remote.window_start_ms => { 112 + if local.window_duration_ms != remote.window_duration_ms { 113 + tracing::warn!( 114 + key = %key, 115 + local_window = local.window_duration_ms, 116 + remote_window = remote.window_duration_ms, 117 + "window_duration_ms mismatch, rejecting merge" 118 + ); 119 + return false; 120 + } 121 + let changed = local.merge(remote); 122 + if changed { 123 + self.dirty.insert(key); 124 + } 125 + changed 126 + } 127 + Some(local) if remote.window_start_ms > local.window_start_ms => { 128 + self.counters.insert(key.clone(), remote.clone()); 129 + self.dirty.insert(key); 130 + true 131 + } 132 + None => { 133 + self.counters.insert(key.clone(), remote.clone()); 134 + self.dirty.insert(key); 135 + true 136 + } 137 + _ => false, 138 + } 139 + } 140 + 141 + pub fn extract_dirty_deltas(&self) -> Vec<GCounterDelta> { 142 + self.dirty 143 + .iter() 144 + .filter_map(|key| { 145 + self.counters 146 + .get(key) 147 + .map(|counter| GCounterDelta { 148 + key: key.clone(), 149 + counter: counter.clone(), 150 + }) 151 + }) 152 + .collect() 153 + } 154 + 155 + pub fn clear_dirty(&mut self) { 156 + self.dirty.clear(); 157 + } 158 + 159 + pub fn clear_dirty_keys(&mut self, keys: impl Iterator<Item = impl AsRef<str>>) { 160 + keys.for_each(|k| { 161 + self.dirty.remove(k.as_ref()); 162 + }); 163 + } 164 + 165 + pub fn peek_count(&self, key: &str, window_ms: u64, now_wall_ms: u64) -> u64 { 166 + match self.counters.get(key) { 167 + Some(counter) if counter.window_start_ms == Self::aligned_window_start(now_wall_ms, window_ms) => { 168 + counter.total() 169 + } 170 + _ => 0, 171 + } 172 + } 173 + 174 + pub fn peek_dirty_counter(&self, key: &str) -> Option<&GCounter> { 175 + match self.dirty.contains(key) { 176 + true => self.counters.get(key), 177 + false => None, 178 + } 179 + } 180 + 181 + pub fn clear_single_dirty(&mut self, key: &str) { 182 + self.dirty.remove(key); 183 + } 184 + 185 + pub fn estimated_bytes(&self) -> usize { 186 + const PER_COUNTER_OVERHEAD: usize = 128; 187 + self.counters 188 + .iter() 189 + .map(|(key, counter)| { 190 + key.len() 191 + + std::mem::size_of::<GCounter>() 192 + + counter.increments.len() * (std::mem::size_of::<u64>() * 2) 193 + + PER_COUNTER_OVERHEAD 194 + }) 195 + .fold(0usize, usize::saturating_add) 196 + } 197 + 198 + pub fn gc_expired(&mut self, now_wall_ms: u64) { 199 + let expired: Vec<String> = self 200 + .counters 201 + .iter() 202 + .filter(|(_, c)| c.is_expired(now_wall_ms)) 203 + .map(|(k, _)| k.clone()) 204 + .collect(); 205 + expired.iter().for_each(|key| { 206 + self.counters.remove(key); 207 + self.dirty.remove(key); 208 + }); 209 + } 210 + } 211 + 212 + #[cfg(test)] 213 + mod tests { 214 + use super::*; 215 + 216 + #[test] 217 + fn increment_and_total() { 218 + let mut counter = GCounter::new(0, 60_000); 219 + counter.increment(1); 220 + counter.increment(1); 221 + counter.increment(2); 222 + assert_eq!(counter.total(), 3); 223 + } 224 + 225 + #[test] 226 + fn merge_per_node_max() { 227 + let mut a = GCounter::new(0, 60_000); 228 + a.increment(1); 229 + a.increment(1); 230 + a.increment(2); 231 + 232 + let mut b = GCounter::new(0, 60_000); 233 + b.increment(1); 234 + b.increment(2); 235 + b.increment(2); 236 + b.increment(2); 237 + 238 + a.merge(&b); 239 + assert_eq!(*a.increments.get(&1).unwrap(), 2); 240 + assert_eq!(*a.increments.get(&2).unwrap(), 3); 241 + assert_eq!(a.total(), 5); 242 + } 243 + 244 + #[test] 245 + fn merge_commutativity() { 246 + let mut a = GCounter::new(0, 60_000); 247 + a.increments.insert(1, 5); 248 + a.increments.insert(2, 3); 249 + 250 + let mut b = GCounter::new(0, 60_000); 251 + b.increments.insert(1, 3); 252 + b.increments.insert(2, 7); 253 + 254 + let mut ab = a.clone(); 255 + ab.merge(&b); 256 + let mut ba = b.clone(); 257 + ba.merge(&a); 258 + assert_eq!(ab.total(), ba.total()); 259 + } 260 + 261 + #[test] 262 + fn window_rollover() { 263 + let mut store = RateLimitStore::new(1); 264 + assert!(store.check_and_increment("k", 2, 1000, 500)); 265 + assert!(store.check_and_increment("k", 2, 1000, 600)); 266 + assert!(!store.check_and_increment("k", 2, 1000, 700)); 267 + assert!(store.check_and_increment("k", 2, 1000, 1500)); 268 + } 269 + 270 + #[test] 271 + fn rate_limit_enforcement() { 272 + let mut store = RateLimitStore::new(1); 273 + assert!(store.check_and_increment("k", 3, 60_000, 100)); 274 + assert!(store.check_and_increment("k", 3, 60_000, 200)); 275 + assert!(store.check_and_increment("k", 3, 60_000, 300)); 276 + assert!(!store.check_and_increment("k", 3, 60_000, 400)); 277 + } 278 + 279 + #[test] 280 + fn gc_expired_windows() { 281 + let mut store = RateLimitStore::new(1); 282 + store.check_and_increment("k", 10, 1000, 0); 283 + assert_eq!(store.counters.len(), 1); 284 + assert_eq!(store.dirty.len(), 1); 285 + store.gc_expired(2000); 286 + assert_eq!(store.counters.len(), 0); 287 + assert_eq!(store.dirty.len(), 0); 288 + } 289 + 290 + #[test] 291 + fn dirty_tracking() { 292 + let mut store = RateLimitStore::new(1); 293 + assert!(store.extract_dirty_deltas().is_empty()); 294 + 295 + store.check_and_increment("k1", 10, 60_000, 100); 296 + store.check_and_increment("k2", 10, 60_000, 100); 297 + assert_eq!(store.extract_dirty_deltas().len(), 2); 298 + 299 + store.clear_dirty(); 300 + assert!(store.extract_dirty_deltas().is_empty()); 301 + 302 + store.check_and_increment("k1", 10, 60_000, 200); 303 + assert_eq!(store.extract_dirty_deltas().len(), 1); 304 + } 305 + }
+206
crates/tranquil-ripple/src/crdt/hlc.rs
··· 1 + use serde::{Deserialize, Serialize}; 2 + use std::time::{SystemTime, UNIX_EPOCH}; 3 + 4 + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] 5 + pub struct HlcTimestamp { 6 + pub wall_ms: u64, 7 + pub counter: u32, 8 + pub node_id: u64, 9 + } 10 + 11 + impl HlcTimestamp { 12 + pub const ZERO: Self = Self { 13 + wall_ms: 0, 14 + counter: 0, 15 + node_id: 0, 16 + }; 17 + } 18 + 19 + impl PartialOrd for HlcTimestamp { 20 + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { 21 + Some(self.cmp(other)) 22 + } 23 + } 24 + 25 + impl Ord for HlcTimestamp { 26 + fn cmp(&self, other: &Self) -> std::cmp::Ordering { 27 + self.wall_ms 28 + .cmp(&other.wall_ms) 29 + .then(self.counter.cmp(&other.counter)) 30 + .then(self.node_id.cmp(&other.node_id)) 31 + } 32 + } 33 + 34 + fn advance_counter(wall: u64, counter: u32) -> (u64, u32) { 35 + match counter == u32::MAX { 36 + true => (wall.saturating_add(1), 0), 37 + false => (wall, counter + 1), 38 + } 39 + } 40 + 41 + pub struct Hlc { 42 + node_id: u64, 43 + last_wall_ms: u64, 44 + last_counter: u32, 45 + } 46 + 47 + impl Hlc { 48 + pub fn new(node_id: u64) -> Self { 49 + Self { 50 + node_id, 51 + last_wall_ms: 0, 52 + last_counter: 0, 53 + } 54 + } 55 + 56 + fn physical_now() -> u64 { 57 + SystemTime::now() 58 + .duration_since(UNIX_EPOCH) 59 + .unwrap_or_default() 60 + .as_millis() as u64 61 + } 62 + 63 + pub fn now(&mut self) -> HlcTimestamp { 64 + let phys = Self::physical_now(); 65 + let (wall, counter) = match phys > self.last_wall_ms { 66 + true => (phys, 0u32), 67 + false => advance_counter(self.last_wall_ms, self.last_counter), 68 + }; 69 + self.last_wall_ms = wall; 70 + self.last_counter = counter; 71 + HlcTimestamp { 72 + wall_ms: wall, 73 + counter, 74 + node_id: self.node_id, 75 + } 76 + } 77 + 78 + pub fn receive(&mut self, remote: HlcTimestamp) -> HlcTimestamp { 79 + let phys = Self::physical_now(); 80 + let max_allowed = phys + 60_000; 81 + let capped_remote_wall = remote.wall_ms.min(max_allowed); 82 + if remote.wall_ms > max_allowed { 83 + tracing::warn!( 84 + remote_wall_ms = remote.wall_ms, 85 + local_wall_ms = phys, 86 + drift_ms = remote.wall_ms.saturating_sub(phys), 87 + capped_to = max_allowed, 88 + "remote HLC wall clock >60s ahead, capping" 89 + ); 90 + } 91 + let remote_counter = match capped_remote_wall == remote.wall_ms { 92 + true => remote.counter, 93 + false => 0u32, 94 + }; 95 + let max_wall = phys.max(self.last_wall_ms).max(capped_remote_wall); 96 + let (wall, counter) = match max_wall { 97 + w if w == phys && w > self.last_wall_ms && w > capped_remote_wall => (w, 0u32), 98 + w if w == self.last_wall_ms && w == capped_remote_wall => { 99 + advance_counter(w, self.last_counter.max(remote_counter)) 100 + } 101 + w if w == self.last_wall_ms => advance_counter(w, self.last_counter), 102 + w if w == capped_remote_wall => advance_counter(w, remote_counter), 103 + w => (w, 0u32), 104 + }; 105 + self.last_wall_ms = wall; 106 + self.last_counter = counter; 107 + HlcTimestamp { 108 + wall_ms: wall, 109 + counter, 110 + node_id: self.node_id, 111 + } 112 + } 113 + 114 + pub fn node_id(&self) -> u64 { 115 + self.node_id 116 + } 117 + } 118 + 119 + #[cfg(test)] 120 + mod tests { 121 + use super::*; 122 + 123 + #[test] 124 + fn monotonicity() { 125 + let mut hlc = Hlc::new(1); 126 + let timestamps: Vec<HlcTimestamp> = (0..100).map(|_| hlc.now()).collect(); 127 + timestamps.windows(2).for_each(|w| { 128 + assert!(w[1] > w[0], "timestamps must be strictly increasing"); 129 + }); 130 + } 131 + 132 + #[test] 133 + fn merge_takes_max_within_drift_cap() { 134 + let mut hlc = Hlc::new(1); 135 + let now = Hlc::physical_now(); 136 + let remote = HlcTimestamp { 137 + wall_ms: now + 5000, 138 + counter: 10, 139 + node_id: 2, 140 + }; 141 + let merged = hlc.receive(remote); 142 + assert!(merged.wall_ms >= remote.wall_ms); 143 + let after = hlc.now(); 144 + assert!(after > merged); 145 + } 146 + 147 + #[test] 148 + fn drift_cap_limits_remote_wall() { 149 + let mut hlc = Hlc::new(1); 150 + let now = Hlc::physical_now(); 151 + let remote = HlcTimestamp { 152 + wall_ms: now + 120_000, 153 + counter: 50, 154 + node_id: 2, 155 + }; 156 + let merged = hlc.receive(remote); 157 + assert!(merged.wall_ms <= now + 60_000 + 1); 158 + let after = hlc.now(); 159 + assert!(after > merged); 160 + } 161 + 162 + #[test] 163 + fn total_order_across_nodes() { 164 + let a = HlcTimestamp { 165 + wall_ms: 100, 166 + counter: 0, 167 + node_id: 1, 168 + }; 169 + let b = HlcTimestamp { 170 + wall_ms: 100, 171 + counter: 0, 172 + node_id: 2, 173 + }; 174 + assert!(a < b); 175 + assert_ne!(a, b); 176 + } 177 + 178 + #[test] 179 + fn counter_overflow_bumps_wall() { 180 + let mut hlc = Hlc::new(1); 181 + let future_wall = u64::MAX / 2; 182 + hlc.last_wall_ms = future_wall; 183 + hlc.last_counter = u32::MAX; 184 + let ts = hlc.now(); 185 + assert_eq!(ts.wall_ms, future_wall + 1); 186 + assert_eq!(ts.counter, 0); 187 + let ts2 = hlc.now(); 188 + assert!(ts2 > ts); 189 + } 190 + 191 + #[test] 192 + fn receive_counter_overflow_bumps_wall() { 193 + let mut hlc = Hlc::new(1); 194 + let future_wall = u64::MAX / 2; 195 + hlc.last_wall_ms = future_wall; 196 + hlc.last_counter = u32::MAX; 197 + let remote = HlcTimestamp { 198 + wall_ms: future_wall, 199 + counter: u32::MAX, 200 + node_id: 2, 201 + }; 202 + let merged = hlc.receive(remote); 203 + assert_eq!(merged.wall_ms, future_wall + 1); 204 + assert_eq!(merged.counter, 0); 205 + } 206 + }
+388
crates/tranquil-ripple/src/crdt/lww_map.rs
··· 1 + use super::hlc::HlcTimestamp; 2 + use parking_lot::Mutex; 3 + use serde::{Deserialize, Serialize}; 4 + use std::collections::{BTreeMap, HashMap}; 5 + 6 + #[derive(Debug, Clone, Serialize, Deserialize)] 7 + pub struct LwwEntry { 8 + pub value: Option<Vec<u8>>, 9 + pub timestamp: HlcTimestamp, 10 + pub ttl_ms: u64, 11 + pub created_at_wall_ms: u64, 12 + } 13 + 14 + impl LwwEntry { 15 + fn is_expired(&self, now_wall_ms: u64) -> bool { 16 + self.ttl_ms > 0 && now_wall_ms.saturating_sub(self.created_at_wall_ms) >= self.ttl_ms 17 + } 18 + 19 + fn is_tombstone(&self) -> bool { 20 + self.value.is_none() 21 + } 22 + 23 + fn tombstone_expired(&self, now_wall_ms: u64) -> bool { 24 + self.is_tombstone() 25 + && self.ttl_ms > 0 26 + && now_wall_ms.saturating_sub(self.created_at_wall_ms) >= self.ttl_ms.saturating_mul(2) 27 + } 28 + 29 + fn entry_byte_size(&self, key: &str) -> usize { 30 + const OVERHEAD: usize = 128; 31 + key.len() 32 + + self.value.as_ref().map_or(0, Vec::len) 33 + + std::mem::size_of::<Self>() 34 + + OVERHEAD 35 + } 36 + } 37 + 38 + #[derive(Debug, Clone, Serialize, Deserialize)] 39 + pub struct LwwDelta { 40 + pub entries: Vec<(String, LwwEntry)>, 41 + } 42 + 43 + struct LruTracker { 44 + counter: u64, 45 + counter_to_key: BTreeMap<u64, String>, 46 + key_to_counter: HashMap<String, u64>, 47 + } 48 + 49 + impl LruTracker { 50 + fn new() -> Self { 51 + Self { 52 + counter: 0, 53 + counter_to_key: BTreeMap::new(), 54 + key_to_counter: HashMap::new(), 55 + } 56 + } 57 + 58 + fn promote(&mut self, key: &str) { 59 + if let Some(old_counter) = self.key_to_counter.remove(key) { 60 + self.counter_to_key.remove(&old_counter); 61 + } 62 + self.counter = self.counter.saturating_add(1); 63 + self.counter_to_key.insert(self.counter, key.to_string()); 64 + self.key_to_counter.insert(key.to_string(), self.counter); 65 + } 66 + 67 + fn remove(&mut self, key: &str) { 68 + if let Some(counter) = self.key_to_counter.remove(key) { 69 + self.counter_to_key.remove(&counter); 70 + } 71 + } 72 + 73 + fn pop_least_recent(&mut self) -> Option<String> { 74 + let (&counter, _) = self.counter_to_key.iter().next()?; 75 + let key = self.counter_to_key.remove(&counter)?; 76 + self.key_to_counter.remove(&key); 77 + Some(key) 78 + } 79 + } 80 + 81 + pub struct LwwMap { 82 + entries: HashMap<String, LwwEntry>, 83 + lru: Mutex<LruTracker>, 84 + estimated_bytes: usize, 85 + } 86 + 87 + impl LwwMap { 88 + pub fn new() -> Self { 89 + Self { 90 + entries: HashMap::new(), 91 + lru: Mutex::new(LruTracker::new()), 92 + estimated_bytes: 0, 93 + } 94 + } 95 + 96 + pub fn get(&self, key: &str, now_wall_ms: u64) -> Option<Vec<u8>> { 97 + let entry = self.entries.get(key)?; 98 + if entry.is_expired(now_wall_ms) || entry.is_tombstone() { 99 + return None; 100 + } 101 + let value = entry.value.clone(); 102 + self.lru.lock().promote(key); 103 + value 104 + } 105 + 106 + pub fn set(&mut self, key: String, value: Vec<u8>, timestamp: HlcTimestamp, ttl_ms: u64, wall_ms_now: u64) { 107 + let entry = LwwEntry { 108 + created_at_wall_ms: wall_ms_now, 109 + value: Some(value), 110 + timestamp, 111 + ttl_ms, 112 + }; 113 + self.remove_estimated_bytes(&key); 114 + self.estimated_bytes += entry.entry_byte_size(&key); 115 + self.entries.insert(key.clone(), entry); 116 + self.lru.lock().promote(&key); 117 + } 118 + 119 + pub fn delete(&mut self, key: &str, timestamp: HlcTimestamp, wall_ms_now: u64) { 120 + match self.entries.get(key) { 121 + Some(existing) if existing.timestamp >= timestamp => return, 122 + _ => {} 123 + } 124 + let ttl_ms = self 125 + .entries 126 + .get(key) 127 + .map_or(60_000, |e| e.ttl_ms.max(60_000)); 128 + let entry = LwwEntry { 129 + value: None, 130 + timestamp, 131 + ttl_ms, 132 + created_at_wall_ms: wall_ms_now, 133 + }; 134 + self.remove_estimated_bytes(key); 135 + self.estimated_bytes += entry.entry_byte_size(key); 136 + self.entries.insert(key.to_string(), entry); 137 + self.lru.lock().remove(key); 138 + } 139 + 140 + pub fn merge_entry(&mut self, key: String, remote: LwwEntry) -> bool { 141 + match self.entries.get(&key) { 142 + Some(existing) if existing.timestamp >= remote.timestamp => false, 143 + _ => { 144 + let is_tombstone = remote.is_tombstone(); 145 + self.remove_estimated_bytes(&key); 146 + self.estimated_bytes += remote.entry_byte_size(&key); 147 + self.entries.insert(key.clone(), remote); 148 + let mut lru = self.lru.lock(); 149 + match is_tombstone { 150 + true => lru.remove(&key), 151 + false => lru.promote(&key), 152 + } 153 + true 154 + } 155 + } 156 + } 157 + 158 + pub fn extract_delta_since(&self, watermark: HlcTimestamp) -> LwwDelta { 159 + let entries = self 160 + .entries 161 + .iter() 162 + .filter(|(_, entry)| entry.timestamp > watermark) 163 + .map(|(k, v)| (k.clone(), v.clone())) 164 + .collect(); 165 + LwwDelta { entries } 166 + } 167 + 168 + pub fn gc_tombstones(&mut self, now_wall_ms: u64) { 169 + let expired_keys: Vec<String> = self 170 + .entries 171 + .iter() 172 + .filter(|(_, entry)| entry.tombstone_expired(now_wall_ms)) 173 + .map(|(k, _)| k.clone()) 174 + .collect(); 175 + expired_keys.iter().for_each(|key| { 176 + self.remove_estimated_bytes(key); 177 + self.entries.remove(key); 178 + }); 179 + let mut lru = self.lru.lock(); 180 + expired_keys.iter().for_each(|key| { 181 + lru.remove(key); 182 + }); 183 + } 184 + 185 + pub fn gc_expired(&mut self, now_wall_ms: u64) { 186 + let expired_keys: Vec<String> = self 187 + .entries 188 + .iter() 189 + .filter(|(_, entry)| entry.is_expired(now_wall_ms) && !entry.is_tombstone()) 190 + .map(|(k, _)| k.clone()) 191 + .collect(); 192 + expired_keys.iter().for_each(|key| { 193 + self.remove_estimated_bytes(key); 194 + self.entries.remove(key); 195 + }); 196 + let mut lru = self.lru.lock(); 197 + expired_keys.iter().for_each(|key| { 198 + lru.remove(key); 199 + }); 200 + } 201 + 202 + pub fn evict_lru(&mut self) -> Option<String> { 203 + let key = self.lru.lock().pop_least_recent()?; 204 + self.remove_estimated_bytes(&key); 205 + self.entries.remove(&key); 206 + Some(key) 207 + } 208 + 209 + pub fn estimated_bytes(&self) -> usize { 210 + self.estimated_bytes 211 + } 212 + 213 + pub fn len(&self) -> usize { 214 + self.entries.len() 215 + } 216 + 217 + fn remove_estimated_bytes(&mut self, key: &str) { 218 + if let Some(existing) = self.entries.get(key) { 219 + let size = existing.entry_byte_size(key); 220 + if size > self.estimated_bytes { 221 + tracing::warn!( 222 + entry_size = size, 223 + estimated_bytes = self.estimated_bytes, 224 + key = key, 225 + "estimated_bytes underflow detected, resetting to 0" 226 + ); 227 + } 228 + self.estimated_bytes = self.estimated_bytes.saturating_sub(size); 229 + } 230 + } 231 + } 232 + 233 + #[cfg(test)] 234 + mod tests { 235 + use super::*; 236 + 237 + fn ts(wall: u64, counter: u32, node: u64) -> HlcTimestamp { 238 + HlcTimestamp { 239 + wall_ms: wall, 240 + counter, 241 + node_id: node, 242 + } 243 + } 244 + 245 + #[test] 246 + fn set_and_get() { 247 + let mut map = LwwMap::new(); 248 + map.set("k1".into(), b"hello".to_vec(), ts(100, 0, 1), 60_000, 100); 249 + assert_eq!(map.get("k1", 100), Some(b"hello".to_vec())); 250 + } 251 + 252 + #[test] 253 + fn ttl_expiry() { 254 + let mut map = LwwMap::new(); 255 + map.set("k1".into(), b"hello".to_vec(), ts(100, 0, 1), 1000, 100); 256 + assert_eq!(map.get("k1", 100), Some(b"hello".to_vec())); 257 + assert_eq!(map.get("k1", 1200), None); 258 + } 259 + 260 + #[test] 261 + fn merge_higher_timestamp_wins() { 262 + let mut map = LwwMap::new(); 263 + map.set("k1".into(), b"old".to_vec(), ts(100, 0, 1), 60_000, 100); 264 + let merged = map.merge_entry( 265 + "k1".into(), 266 + LwwEntry { 267 + value: Some(b"new".to_vec()), 268 + timestamp: ts(200, 0, 2), 269 + ttl_ms: 60_000, 270 + created_at_wall_ms: 200, 271 + }, 272 + ); 273 + assert!(merged); 274 + assert_eq!(map.get("k1", 200), Some(b"new".to_vec())); 275 + } 276 + 277 + #[test] 278 + fn merge_lower_timestamp_rejected() { 279 + let mut map = LwwMap::new(); 280 + map.set("k1".into(), b"current".to_vec(), ts(200, 0, 1), 60_000, 200); 281 + let merged = map.merge_entry( 282 + "k1".into(), 283 + LwwEntry { 284 + value: Some(b"stale".to_vec()), 285 + timestamp: ts(100, 0, 2), 286 + ttl_ms: 60_000, 287 + created_at_wall_ms: 100, 288 + }, 289 + ); 290 + assert!(!merged); 291 + assert_eq!(map.get("k1", 200), Some(b"current".to_vec())); 292 + } 293 + 294 + #[test] 295 + fn merge_commutativity() { 296 + let e1 = LwwEntry { 297 + value: Some(b"a".to_vec()), 298 + timestamp: ts(100, 0, 1), 299 + ttl_ms: 60_000, 300 + created_at_wall_ms: 100, 301 + }; 302 + let e2 = LwwEntry { 303 + value: Some(b"b".to_vec()), 304 + timestamp: ts(200, 0, 2), 305 + ttl_ms: 60_000, 306 + created_at_wall_ms: 200, 307 + }; 308 + 309 + let mut map_ab = LwwMap::new(); 310 + map_ab.merge_entry("k".into(), e1.clone()); 311 + map_ab.merge_entry("k".into(), e2.clone()); 312 + 313 + let mut map_ba = LwwMap::new(); 314 + map_ba.merge_entry("k".into(), e2); 315 + map_ba.merge_entry("k".into(), e1); 316 + 317 + assert_eq!(map_ab.get("k", 200), map_ba.get("k", 200)); 318 + } 319 + 320 + #[test] 321 + fn merge_idempotency() { 322 + let e = LwwEntry { 323 + value: Some(b"a".to_vec()), 324 + timestamp: ts(100, 0, 1), 325 + ttl_ms: 60_000, 326 + created_at_wall_ms: 100, 327 + }; 328 + let mut map = LwwMap::new(); 329 + assert!(map.merge_entry("k".into(), e.clone())); 330 + assert!(!map.merge_entry("k".into(), e)); 331 + } 332 + 333 + #[test] 334 + fn delete_creates_tombstone() { 335 + let mut map = LwwMap::new(); 336 + map.set("k1".into(), b"val".to_vec(), ts(100, 0, 1), 60_000, 100); 337 + map.delete("k1", ts(200, 0, 1), 200); 338 + assert_eq!(map.get("k1", 200), None); 339 + } 340 + 341 + #[test] 342 + fn tombstone_gc() { 343 + let mut map = LwwMap::new(); 344 + map.set("k1".into(), b"val".to_vec(), ts(100, 0, 1), 60_000, 100); 345 + map.delete("k1", ts(100, 1, 1), 100); 346 + assert_eq!(map.len(), 1); 347 + map.gc_tombstones(100 + 120_001); 348 + assert_eq!(map.len(), 0); 349 + } 350 + 351 + #[test] 352 + fn delta_extraction() { 353 + let mut map = LwwMap::new(); 354 + map.set("k1".into(), b"a".to_vec(), ts(100, 0, 1), 60_000, 100); 355 + map.set("k2".into(), b"b".to_vec(), ts(200, 0, 1), 60_000, 200); 356 + let delta = map.extract_delta_since(ts(150, 0, 0)); 357 + assert_eq!(delta.entries.len(), 1); 358 + assert_eq!(delta.entries[0].0, "k2"); 359 + } 360 + 361 + #[test] 362 + fn lru_eviction() { 363 + let mut map = LwwMap::new(); 364 + map.set("k1".into(), b"a".to_vec(), ts(100, 0, 1), 60_000, 100); 365 + map.set("k2".into(), b"b".to_vec(), ts(101, 0, 1), 60_000, 101); 366 + map.set("k3".into(), b"c".to_vec(), ts(102, 0, 1), 60_000, 102); 367 + let _ = map.get("k1", 102); 368 + let evicted = map.evict_lru(); 369 + assert_eq!(evicted.as_deref(), Some("k2")); 370 + } 371 + 372 + #[test] 373 + fn merged_entries_are_evictable() { 374 + let mut map = LwwMap::new(); 375 + map.merge_entry( 376 + "remote_key".into(), 377 + LwwEntry { 378 + value: Some(b"remote_val".to_vec()), 379 + timestamp: ts(100, 0, 2), 380 + ttl_ms: 60_000, 381 + created_at_wall_ms: 100, 382 + }, 383 + ); 384 + let evicted = map.evict_lru(); 385 + assert_eq!(evicted.as_deref(), Some("remote_key")); 386 + assert_eq!(map.len(), 0); 387 + } 388 + }
+214
crates/tranquil-ripple/src/crdt/mod.rs
··· 1 + pub mod delta; 2 + pub mod hlc; 3 + pub mod lww_map; 4 + pub mod g_counter; 5 + 6 + use delta::CrdtDelta; 7 + use hlc::{Hlc, HlcTimestamp}; 8 + use lww_map::LwwMap; 9 + use g_counter::RateLimitStore; 10 + use std::time::{SystemTime, UNIX_EPOCH}; 11 + 12 + pub struct CrdtStore { 13 + hlc: Hlc, 14 + cache: LwwMap, 15 + rate_limits: RateLimitStore, 16 + last_broadcast_ts: HlcTimestamp, 17 + } 18 + 19 + impl CrdtStore { 20 + pub fn new(node_id: u64) -> Self { 21 + Self { 22 + hlc: Hlc::new(node_id), 23 + cache: LwwMap::new(), 24 + rate_limits: RateLimitStore::new(node_id), 25 + last_broadcast_ts: HlcTimestamp::ZERO, 26 + } 27 + } 28 + 29 + fn wall_ms_now() -> u64 { 30 + SystemTime::now() 31 + .duration_since(UNIX_EPOCH) 32 + .unwrap_or_default() 33 + .as_millis() as u64 34 + } 35 + 36 + pub fn cache_get(&self, key: &str) -> Option<Vec<u8>> { 37 + self.cache.get(key, Self::wall_ms_now()) 38 + } 39 + 40 + pub fn cache_set(&mut self, key: String, value: Vec<u8>, ttl_ms: u64) { 41 + let ts = self.hlc.now(); 42 + self.cache.set(key, value, ts, ttl_ms, Self::wall_ms_now()); 43 + } 44 + 45 + pub fn cache_delete(&mut self, key: &str) { 46 + let ts = self.hlc.now(); 47 + self.cache.delete(key, ts, Self::wall_ms_now()); 48 + } 49 + 50 + pub fn rate_limit_peek(&self, key: &str, window_ms: u64) -> u64 { 51 + self.rate_limits 52 + .peek_count(key, window_ms, Self::wall_ms_now()) 53 + } 54 + 55 + pub fn rate_limit_check(&mut self, key: &str, limit: u32, window_ms: u64) -> bool { 56 + self.rate_limits 57 + .check_and_increment(key, limit, window_ms, Self::wall_ms_now()) 58 + } 59 + 60 + pub fn peek_broadcast_delta(&self) -> CrdtDelta { 61 + let cache_delta = { 62 + let d = self.cache.extract_delta_since(self.last_broadcast_ts); 63 + match d.entries.is_empty() { 64 + true => None, 65 + false => Some(d), 66 + } 67 + }; 68 + let rate_limit_deltas = self.rate_limits.extract_dirty_deltas(); 69 + CrdtDelta { 70 + version: 1, 71 + source_node: self.hlc.node_id(), 72 + cache_delta, 73 + rate_limit_deltas, 74 + } 75 + } 76 + 77 + pub fn commit_broadcast(&mut self, delta: &CrdtDelta) { 78 + let max_ts = delta 79 + .cache_delta 80 + .as_ref() 81 + .and_then(|d| d.entries.iter().map(|(_, e)| e.timestamp).max()) 82 + .unwrap_or(self.last_broadcast_ts); 83 + self.last_broadcast_ts = max_ts; 84 + let committed_keys: std::collections::HashSet<&str> = delta 85 + .rate_limit_deltas 86 + .iter() 87 + .map(|d| d.key.as_str()) 88 + .collect(); 89 + committed_keys.iter().for_each(|&key| { 90 + let still_matches = self 91 + .rate_limits 92 + .peek_dirty_counter(key) 93 + .zip(delta.rate_limit_deltas.iter().find(|d| d.key == key)) 94 + .is_some_and(|(current, committed)| { 95 + current.window_start_ms == committed.counter.window_start_ms 96 + && current.total() == committed.counter.total() 97 + }); 98 + if still_matches { 99 + self.rate_limits.clear_single_dirty(key); 100 + } 101 + }); 102 + } 103 + 104 + pub fn merge_delta(&mut self, delta: &CrdtDelta) -> bool { 105 + if !delta.is_compatible() { 106 + tracing::warn!( 107 + version = delta.version, 108 + "dropping incompatible CRDT delta version" 109 + ); 110 + return false; 111 + } 112 + let mut changed = false; 113 + if let Some(ref cache_delta) = delta.cache_delta { 114 + cache_delta.entries.iter().for_each(|(key, entry)| { 115 + let _ = self.hlc.receive(entry.timestamp); 116 + if self.cache.merge_entry(key.clone(), entry.clone()) { 117 + changed = true; 118 + } 119 + }); 120 + } 121 + delta.rate_limit_deltas.iter().for_each(|rd| { 122 + if self 123 + .rate_limits 124 + .merge_counter(rd.key.clone(), &rd.counter) 125 + { 126 + changed = true; 127 + } 128 + }); 129 + changed 130 + } 131 + 132 + pub fn run_maintenance(&mut self) { 133 + let now = Self::wall_ms_now(); 134 + self.cache.gc_tombstones(now); 135 + self.cache.gc_expired(now); 136 + self.rate_limits.gc_expired(now); 137 + } 138 + 139 + pub fn cache_estimated_bytes(&self) -> usize { 140 + self.cache.estimated_bytes() 141 + } 142 + 143 + pub fn rate_limit_estimated_bytes(&self) -> usize { 144 + self.rate_limits.estimated_bytes() 145 + } 146 + 147 + pub fn evict_lru(&mut self) -> Option<String> { 148 + self.cache.evict_lru() 149 + } 150 + } 151 + 152 + #[cfg(test)] 153 + mod tests { 154 + use super::*; 155 + 156 + #[test] 157 + fn roundtrip_cache() { 158 + let mut store = CrdtStore::new(1); 159 + store.cache_set("key".into(), b"value".to_vec(), 60_000); 160 + assert_eq!(store.cache_get("key"), Some(b"value".to_vec())); 161 + } 162 + 163 + #[test] 164 + fn delta_merge_convergence() { 165 + let mut store_a = CrdtStore::new(1); 166 + let mut store_b = CrdtStore::new(2); 167 + 168 + store_a.cache_set("x".into(), b"from_a".to_vec(), 60_000); 169 + store_b.cache_set("y".into(), b"from_b".to_vec(), 60_000); 170 + 171 + let delta_a = store_a.peek_broadcast_delta(); 172 + store_a.commit_broadcast(&delta_a); 173 + let delta_b = store_b.peek_broadcast_delta(); 174 + store_b.commit_broadcast(&delta_b); 175 + 176 + store_b.merge_delta(&delta_a); 177 + store_a.merge_delta(&delta_b); 178 + 179 + assert_eq!(store_a.cache_get("x"), Some(b"from_a".to_vec())); 180 + assert_eq!(store_a.cache_get("y"), Some(b"from_b".to_vec())); 181 + assert_eq!(store_b.cache_get("x"), Some(b"from_a".to_vec())); 182 + assert_eq!(store_b.cache_get("y"), Some(b"from_b".to_vec())); 183 + } 184 + 185 + #[test] 186 + fn rate_limit_across_stores() { 187 + let mut store_a = CrdtStore::new(1); 188 + let mut store_b = CrdtStore::new(2); 189 + 190 + store_a.rate_limit_check("rl:test", 5, 60_000); 191 + store_a.rate_limit_check("rl:test", 5, 60_000); 192 + store_b.rate_limit_check("rl:test", 5, 60_000); 193 + 194 + let delta_a = store_a.peek_broadcast_delta(); 195 + store_a.commit_broadcast(&delta_a); 196 + store_b.merge_delta(&delta_a); 197 + 198 + let delta_b = store_b.peek_broadcast_delta(); 199 + store_b.commit_broadcast(&delta_b); 200 + store_a.merge_delta(&delta_b); 201 + } 202 + 203 + #[test] 204 + fn incompatible_version_rejected() { 205 + let mut store = CrdtStore::new(1); 206 + let delta = CrdtDelta { 207 + version: 255, 208 + source_node: 99, 209 + cache_delta: None, 210 + rate_limit_deltas: vec![], 211 + }; 212 + assert!(!store.merge_delta(&delta)); 213 + } 214 + }
+94
crates/tranquil-ripple/src/engine.rs
··· 1 + use crate::cache::RippleCache; 2 + use crate::config::RippleConfig; 3 + use crate::crdt::CrdtStore; 4 + use crate::eviction::MemoryBudget; 5 + use crate::gossip::{GossipEngine, PeerId}; 6 + use crate::rate_limiter::RippleRateLimiter; 7 + use crate::transport::Transport; 8 + use parking_lot::RwLock; 9 + use std::net::SocketAddr; 10 + use std::sync::Arc; 11 + use tokio_util::sync::CancellationToken; 12 + use tranquil_infra::{Cache, DistributedRateLimiter}; 13 + 14 + pub struct RippleEngine; 15 + 16 + impl RippleEngine { 17 + pub async fn start( 18 + config: RippleConfig, 19 + shutdown: CancellationToken, 20 + ) -> Result<(Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>, SocketAddr), RippleStartError> { 21 + let store = Arc::new(RwLock::new(CrdtStore::new(config.machine_id))); 22 + 23 + let (transport, incoming_rx) = Transport::bind(config.bind_addr, config.machine_id, shutdown.clone()) 24 + .await 25 + .map_err(|e| RippleStartError::Bind(e.to_string()))?; 26 + 27 + let transport = Arc::new(transport); 28 + 29 + let bound_addr = transport.local_addr(); 30 + let local_id = PeerId { 31 + addr: bound_addr, 32 + machine_id: config.machine_id, 33 + generation: 0, 34 + }; 35 + 36 + let gossip = GossipEngine::new(transport, store.clone(), local_id); 37 + 38 + let gossip_handle = gossip.spawn( 39 + config.seed_peers, 40 + config.gossip_interval_ms, 41 + incoming_rx, 42 + shutdown.clone(), 43 + ); 44 + 45 + let budget = MemoryBudget::new(config.cache_max_bytes); 46 + let store_for_eviction = store.clone(); 47 + let eviction_shutdown = shutdown.clone(); 48 + let eviction_handle = tokio::spawn(async move { 49 + let mut interval = tokio::time::interval(std::time::Duration::from_secs(10)); 50 + loop { 51 + tokio::select! { 52 + _ = eviction_shutdown.cancelled() => break, 53 + _ = interval.tick() => { 54 + budget.enforce(&mut store_for_eviction.write()); 55 + } 56 + } 57 + } 58 + }); 59 + 60 + let shutdown_for_monitor = shutdown.clone(); 61 + tokio::spawn(async move { 62 + shutdown_for_monitor.cancelled().await; 63 + let gossip_result = gossip_handle.await; 64 + let eviction_result = eviction_handle.await; 65 + if let Err(e) = gossip_result { 66 + tracing::error!(error = %e, "gossip task panicked"); 67 + } 68 + if let Err(e) = eviction_result { 69 + tracing::error!(error = %e, "eviction task panicked"); 70 + } 71 + }); 72 + 73 + let cache: Arc<dyn Cache> = Arc::new(RippleCache::new(store.clone())); 74 + let rate_limiter: Arc<dyn DistributedRateLimiter> = 75 + Arc::new(RippleRateLimiter::new(store)); 76 + 77 + tracing::info!( 78 + bind = %bound_addr, 79 + machine_id = config.machine_id, 80 + max_cache_mb = config.cache_max_bytes / (1024 * 1024), 81 + "ripple engine started" 82 + ); 83 + 84 + Ok((cache, rate_limiter, bound_addr)) 85 + } 86 + } 87 + 88 + #[derive(Debug, thiserror::Error)] 89 + pub enum RippleStartError { 90 + #[error("failed to bind transport: {0}")] 91 + Bind(String), 92 + #[error("configuration error: {0}")] 93 + Config(String), 94 + }
+78
crates/tranquil-ripple/src/eviction.rs
··· 1 + use crate::crdt::CrdtStore; 2 + 3 + pub struct MemoryBudget { 4 + max_bytes: usize, 5 + } 6 + 7 + impl MemoryBudget { 8 + pub fn new(max_bytes: usize) -> Self { 9 + Self { max_bytes } 10 + } 11 + 12 + pub fn enforce(&self, store: &mut CrdtStore) { 13 + store.run_maintenance(); 14 + 15 + let max_bytes = self.max_bytes; 16 + let total_bytes = store.cache_estimated_bytes().saturating_add(store.rate_limit_estimated_bytes()); 17 + let overshoot_ratio = match total_bytes > max_bytes && max_bytes > 0 { 18 + true => total_bytes / max_bytes, 19 + false => 0, 20 + }; 21 + 22 + const BASE_BATCH: usize = 256; 23 + let batch_size = match overshoot_ratio { 24 + 0..=1 => BASE_BATCH, 25 + 2..=4 => BASE_BATCH * 4, 26 + _ => BASE_BATCH * 8, 27 + }; 28 + 29 + let evicted = std::iter::from_fn(|| { 30 + let current = store.cache_estimated_bytes().saturating_add(store.rate_limit_estimated_bytes()); 31 + match current > max_bytes { 32 + true => store.evict_lru(), 33 + false => None, 34 + } 35 + }) 36 + .take(batch_size) 37 + .count(); 38 + if evicted > 0 { 39 + tracing::info!( 40 + evicted_entries = evicted, 41 + cache_bytes = store.cache_estimated_bytes(), 42 + rate_limit_bytes = store.rate_limit_estimated_bytes(), 43 + max_bytes = self.max_bytes, 44 + "memory budget eviction" 45 + ); 46 + } 47 + } 48 + } 49 + 50 + #[cfg(test)] 51 + mod tests { 52 + use super::*; 53 + 54 + #[test] 55 + fn eviction_under_budget() { 56 + let mut store = CrdtStore::new(1); 57 + let budget = MemoryBudget::new(1024 * 1024); 58 + store.cache_set("k".into(), vec![1, 2, 3], 60_000); 59 + budget.enforce(&mut store); 60 + assert!(store.cache_get("k").is_some()); 61 + } 62 + 63 + #[test] 64 + fn eviction_over_budget() { 65 + let mut store = CrdtStore::new(1); 66 + let budget = MemoryBudget::new(100); 67 + (0..50).for_each(|i| { 68 + store.cache_set( 69 + format!("key-{i}"), 70 + vec![0u8; 64], 71 + 60_000, 72 + ); 73 + }); 74 + budget.enforce(&mut store); 75 + let total = store.cache_estimated_bytes().saturating_add(store.rate_limit_estimated_bytes()); 76 + assert!(total <= 100); 77 + } 78 + }
+390
crates/tranquil-ripple/src/gossip.rs
··· 1 + use crate::crdt::delta::CrdtDelta; 2 + use crate::crdt::CrdtStore; 3 + use crate::transport::{ChannelTag, IncomingFrame, Transport}; 4 + use foca::{Config, Foca, Notification, Runtime, Timer}; 5 + use parking_lot::RwLock; 6 + use rand::rngs::StdRng; 7 + use rand::SeedableRng; 8 + use std::collections::HashSet; 9 + use std::fmt; 10 + use std::net::SocketAddr; 11 + use std::num::NonZeroUsize; 12 + use std::sync::Arc; 13 + use std::time::Duration; 14 + use tokio::sync::mpsc; 15 + use tokio_util::sync::CancellationToken; 16 + 17 + const MAX_GCOUNTER_NODES: usize = 256; 18 + 19 + #[derive(Debug, Clone, PartialEq, Eq, Hash)] 20 + pub struct PeerId { 21 + pub addr: SocketAddr, 22 + pub machine_id: u64, 23 + pub generation: u32, 24 + } 25 + 26 + impl fmt::Display for PeerId { 27 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 28 + write!(f, "{}@{}(g{})", self.machine_id, self.addr, self.generation) 29 + } 30 + } 31 + 32 + impl foca::Identity for PeerId { 33 + type Addr = SocketAddr; 34 + 35 + fn addr(&self) -> SocketAddr { 36 + self.addr 37 + } 38 + 39 + fn renew(&self) -> Option<Self> { 40 + Some(Self { 41 + addr: self.addr, 42 + machine_id: self.machine_id, 43 + generation: self.generation.saturating_add(1), 44 + }) 45 + } 46 + 47 + fn win_addr_conflict(&self, adversary: &Self) -> bool { 48 + self.generation > adversary.generation 49 + } 50 + } 51 + 52 + impl serde::Serialize for PeerId { 53 + fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { 54 + use serde::ser::SerializeTuple; 55 + let mut tup = serializer.serialize_tuple(3)?; 56 + tup.serialize_element(&self.addr.to_string())?; 57 + tup.serialize_element(&self.machine_id)?; 58 + tup.serialize_element(&self.generation)?; 59 + tup.end() 60 + } 61 + } 62 + 63 + impl<'de> serde::Deserialize<'de> for PeerId { 64 + fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { 65 + let (addr_str, machine_id, generation): (String, u64, u32) = 66 + serde::Deserialize::deserialize(deserializer)?; 67 + let addr: SocketAddr = addr_str.parse().map_err(serde::de::Error::custom)?; 68 + Ok(Self { 69 + addr, 70 + machine_id, 71 + generation, 72 + }) 73 + } 74 + } 75 + 76 + enum RuntimeAction { 77 + SendTo(PeerId, Vec<u8>), 78 + ScheduleTimer(Timer<PeerId>, Duration), 79 + MemberUp(SocketAddr), 80 + MemberDown(SocketAddr), 81 + } 82 + 83 + struct BufferedRuntime { 84 + actions: Vec<RuntimeAction>, 85 + } 86 + 87 + impl BufferedRuntime { 88 + fn new() -> Self { 89 + Self { 90 + actions: Vec::new(), 91 + } 92 + } 93 + } 94 + 95 + struct MemberTracker { 96 + active_addrs: HashSet<SocketAddr>, 97 + } 98 + 99 + impl MemberTracker { 100 + fn new() -> Self { 101 + Self { 102 + active_addrs: HashSet::new(), 103 + } 104 + } 105 + 106 + fn member_up(&mut self, addr: SocketAddr) { 107 + self.active_addrs.insert(addr); 108 + } 109 + 110 + fn member_down(&mut self, addr: SocketAddr) { 111 + self.active_addrs.remove(&addr); 112 + } 113 + 114 + fn active_peers(&self) -> impl Iterator<Item = SocketAddr> + '_ { 115 + self.active_addrs.iter().copied() 116 + } 117 + } 118 + 119 + impl Runtime<PeerId> for &mut BufferedRuntime { 120 + fn notify(&mut self, notification: Notification<'_, PeerId>) { 121 + match notification { 122 + Notification::MemberUp(peer) => { 123 + self.actions.push(RuntimeAction::MemberUp(peer.addr)); 124 + } 125 + Notification::MemberDown(peer) => { 126 + self.actions.push(RuntimeAction::MemberDown(peer.addr)); 127 + } 128 + _ => {} 129 + } 130 + } 131 + 132 + fn send_to(&mut self, to: PeerId, data: &[u8]) { 133 + self.actions 134 + .push(RuntimeAction::SendTo(to, data.to_vec())); 135 + } 136 + 137 + fn submit_after(&mut self, event: Timer<PeerId>, after: Duration) { 138 + self.actions.push(RuntimeAction::ScheduleTimer(event, after)); 139 + } 140 + } 141 + 142 + pub struct GossipEngine { 143 + transport: Arc<Transport>, 144 + store: Arc<RwLock<CrdtStore>>, 145 + local_id: PeerId, 146 + } 147 + 148 + impl GossipEngine { 149 + pub fn new( 150 + transport: Arc<Transport>, 151 + store: Arc<RwLock<CrdtStore>>, 152 + local_id: PeerId, 153 + ) -> Self { 154 + Self { 155 + transport, 156 + store, 157 + local_id, 158 + } 159 + } 160 + 161 + pub fn spawn( 162 + self, 163 + seed_peers: Vec<SocketAddr>, 164 + gossip_interval_ms: u64, 165 + mut incoming_rx: mpsc::Receiver<IncomingFrame>, 166 + shutdown: CancellationToken, 167 + ) -> tokio::task::JoinHandle<()> { 168 + let mut config = Config::simple(); 169 + config.max_packet_size = NonZeroUsize::new(2 * 1024 * 1024).expect("nonzero"); 170 + config.periodic_gossip = Some(foca::PeriodicParams { 171 + frequency: Duration::from_millis(gossip_interval_ms), 172 + num_members: NonZeroUsize::new(3).expect("nonzero"), 173 + }); 174 + config.periodic_announce = Some(foca::PeriodicParams { 175 + frequency: Duration::from_secs(30), 176 + num_members: NonZeroUsize::new(3).expect("nonzero"), 177 + }); 178 + 179 + let rng = StdRng::from_os_rng(); 180 + let codec = foca::BincodeCodec(bincode::config::standard()); 181 + let mut foca: Foca<PeerId, _, _, _> = Foca::new(self.local_id.clone(), config, rng, codec); 182 + 183 + let transport = self.transport.clone(); 184 + let store = self.store.clone(); 185 + 186 + let (timer_tx, mut timer_rx) = mpsc::channel::<(Timer<PeerId>, Duration)>(256); 187 + 188 + const WATERMARK_STALE_SECS: u64 = 30; 189 + 190 + tokio::spawn(async move { 191 + let mut runtime = BufferedRuntime::new(); 192 + let mut members = MemberTracker::new(); 193 + let mut last_commit = tokio::time::Instant::now(); 194 + 195 + seed_peers.iter().for_each(|&addr| { 196 + let seed_id = PeerId { 197 + addr, 198 + machine_id: 0, 199 + generation: 0, 200 + }; 201 + if let Err(e) = foca.announce(seed_id, &mut runtime) { 202 + tracing::warn!(error = %e, "failed to announce to seed peer"); 203 + } 204 + }); 205 + 206 + drain_runtime_actions(&mut runtime, &transport, &timer_tx, &mut members, &shutdown); 207 + 208 + let mut gossip_tick = 209 + tokio::time::interval(Duration::from_millis(gossip_interval_ms)); 210 + let mut maintenance_tick = tokio::time::interval(Duration::from_secs(10)); 211 + 212 + loop { 213 + tokio::select! { 214 + _ = shutdown.cancelled() => { 215 + tracing::info!("gossip engine shutting down, flushing final delta"); 216 + flush_final_delta(&store, &transport, &members); 217 + break; 218 + } 219 + Some(frame) = incoming_rx.recv() => { 220 + match frame.tag { 221 + ChannelTag::Gossip => { 222 + if let Err(e) = foca.handle_data(&frame.data, &mut runtime) { 223 + tracing::warn!(error = %e, "foca handle_data error"); 224 + } 225 + drain_runtime_actions(&mut runtime, &transport, &timer_tx, &mut members, &shutdown); 226 + } 227 + ChannelTag::CrdtSync => { 228 + const MAX_DELTA_ENTRIES: usize = 10_000; 229 + const MAX_DELTA_RATE_LIMITS: usize = 10_000; 230 + match bincode::serde::decode_from_slice::<CrdtDelta, _>(&frame.data, bincode::config::standard()) { 231 + Ok((delta, _)) => { 232 + let cache_len = delta.cache_delta.as_ref().map_or(0, |d| d.entries.len()); 233 + let rl_len = delta.rate_limit_deltas.len(); 234 + let gcounter_oversize = delta.rate_limit_deltas.iter().any(|rd| rd.counter.increments.len() > MAX_GCOUNTER_NODES); 235 + let window_mismatch = delta.rate_limit_deltas.iter().any(|rd| rd.counter.window_duration_ms == 0); 236 + match cache_len > MAX_DELTA_ENTRIES || rl_len > MAX_DELTA_RATE_LIMITS || gcounter_oversize || window_mismatch { 237 + true => { 238 + tracing::warn!( 239 + cache_entries = cache_len, 240 + rate_limit_entries = rl_len, 241 + gcounter_oversize = gcounter_oversize, 242 + "dropping invalid CRDT delta" 243 + ); 244 + } 245 + false => { 246 + store.write().merge_delta(&delta); 247 + } 248 + } 249 + } 250 + Err(e) => { 251 + tracing::warn!(error = %e, "failed to decode crdt sync delta"); 252 + } 253 + } 254 + } 255 + _ => {} 256 + } 257 + } 258 + _ = gossip_tick.tick() => { 259 + let pending = { 260 + let s = store.read(); 261 + let delta = s.peek_broadcast_delta(); 262 + match delta.is_empty() { 263 + true => None, 264 + false => { 265 + match bincode::serde::encode_to_vec(&delta, bincode::config::standard()) { 266 + Ok(bytes) => Some((bytes, delta)), 267 + Err(e) => { 268 + tracing::warn!(error = %e, "failed to serialize broadcast delta"); 269 + None 270 + } 271 + } 272 + }, 273 + } 274 + }; 275 + if let Some((ref data, ref delta)) = pending { 276 + let peers: Vec<SocketAddr> = members.active_peers().collect(); 277 + let mut all_queued = true; 278 + let cancel = shutdown.clone(); 279 + peers.iter().for_each(|&addr| { 280 + match transport.try_queue(addr, ChannelTag::CrdtSync, data) { 281 + true => {} 282 + false => { 283 + all_queued = false; 284 + let t = transport.clone(); 285 + let d = data.clone(); 286 + let c = cancel.clone(); 287 + tokio::spawn(async move { 288 + tokio::select! { 289 + _ = c.cancelled() => {} 290 + _ = t.send(addr, ChannelTag::CrdtSync, &d) => {} 291 + } 292 + }); 293 + } 294 + } 295 + }); 296 + let stale = last_commit.elapsed() > Duration::from_secs(WATERMARK_STALE_SECS); 297 + if all_queued || peers.is_empty() || stale { 298 + if stale && !all_queued { 299 + tracing::warn!( 300 + elapsed_secs = last_commit.elapsed().as_secs(), 301 + "force-advancing broadcast watermark (staleness cap)" 302 + ); 303 + } 304 + store.write().commit_broadcast(delta); 305 + last_commit = tokio::time::Instant::now(); 306 + } 307 + } 308 + if let Err(e) = foca.gossip(&mut runtime) { 309 + tracing::warn!(error = %e, "foca gossip error"); 310 + } 311 + drain_runtime_actions(&mut runtime, &transport, &timer_tx, &mut members, &shutdown); 312 + } 313 + Some((timer, _)) = timer_rx.recv() => { 314 + if let Err(e) = foca.handle_timer(timer, &mut runtime) { 315 + tracing::warn!(error = %e, "foca handle_timer error"); 316 + } 317 + drain_runtime_actions(&mut runtime, &transport, &timer_tx, &mut members, &shutdown); 318 + } 319 + _ = maintenance_tick.tick() => { 320 + store.write().run_maintenance(); 321 + tracing::trace!( 322 + members = foca.num_members(), 323 + cache_bytes = store.read().cache_estimated_bytes(), 324 + "maintenance cycle" 325 + ); 326 + } 327 + } 328 + } 329 + }) 330 + } 331 + } 332 + 333 + fn flush_final_delta( 334 + store: &Arc<RwLock<CrdtStore>>, 335 + transport: &Arc<Transport>, 336 + members: &MemberTracker, 337 + ) { 338 + let s = store.read(); 339 + let delta = s.peek_broadcast_delta(); 340 + if delta.is_empty() { 341 + return; 342 + } 343 + match bincode::serde::encode_to_vec(&delta, bincode::config::standard()) { 344 + Ok(bytes) => { 345 + members.active_peers().for_each(|addr| { 346 + let _ = transport.try_queue(addr, ChannelTag::CrdtSync, &bytes); 347 + }); 348 + } 349 + Err(e) => { 350 + tracing::warn!(error = %e, "failed to serialize final delta on shutdown"); 351 + } 352 + } 353 + } 354 + 355 + fn drain_runtime_actions( 356 + runtime: &mut BufferedRuntime, 357 + transport: &Arc<Transport>, 358 + timer_tx: &mpsc::Sender<(Timer<PeerId>, Duration)>, 359 + members: &mut MemberTracker, 360 + shutdown: &CancellationToken, 361 + ) { 362 + let actions: Vec<RuntimeAction> = runtime.actions.drain(..).collect(); 363 + actions.into_iter().for_each(|action| match action { 364 + RuntimeAction::SendTo(peer, data) => { 365 + let t = transport.clone(); 366 + let c = shutdown.clone(); 367 + tokio::spawn(async move { 368 + tokio::select! { 369 + _ = c.cancelled() => {} 370 + _ = t.send(peer.addr, ChannelTag::Gossip, &data) => {} 371 + } 372 + }); 373 + } 374 + RuntimeAction::ScheduleTimer(timer, duration) => { 375 + let tx = timer_tx.clone(); 376 + tokio::spawn(async move { 377 + tokio::time::sleep(duration).await; 378 + let _ = tx.send((timer, duration)).await; 379 + }); 380 + } 381 + RuntimeAction::MemberUp(addr) => { 382 + tracing::info!(peer = %addr, "member up"); 383 + members.member_up(addr); 384 + } 385 + RuntimeAction::MemberDown(addr) => { 386 + tracing::info!(peer = %addr, "member down"); 387 + members.member_down(addr); 388 + } 389 + }); 390 + }
+11
crates/tranquil-ripple/src/lib.rs
··· 1 + pub mod cache; 2 + pub mod config; 3 + pub mod crdt; 4 + pub mod engine; 5 + pub mod eviction; 6 + pub mod gossip; 7 + pub mod rate_limiter; 8 + pub mod transport; 9 + 10 + pub use config::RippleConfig; 11 + pub use engine::{RippleEngine, RippleStartError};
+49
crates/tranquil-ripple/src/rate_limiter.rs
··· 1 + use crate::crdt::CrdtStore; 2 + use async_trait::async_trait; 3 + use parking_lot::RwLock; 4 + use std::sync::Arc; 5 + use tranquil_infra::DistributedRateLimiter; 6 + 7 + pub struct RippleRateLimiter { 8 + store: Arc<RwLock<CrdtStore>>, 9 + } 10 + 11 + impl RippleRateLimiter { 12 + pub fn new(store: Arc<RwLock<CrdtStore>>) -> Self { 13 + Self { store } 14 + } 15 + } 16 + 17 + #[async_trait] 18 + impl DistributedRateLimiter for RippleRateLimiter { 19 + async fn check_rate_limit(&self, key: &str, limit: u32, window_ms: u64) -> bool { 20 + self.store.write().rate_limit_check(key, limit, window_ms) 21 + } 22 + 23 + async fn peek_rate_limit_count(&self, key: &str, window_ms: u64) -> u64 { 24 + self.store.read().rate_limit_peek(key, window_ms) 25 + } 26 + } 27 + 28 + #[cfg(test)] 29 + mod tests { 30 + use super::*; 31 + 32 + #[tokio::test] 33 + async fn rate_limiter_trait_allows_within_limit() { 34 + let store = Arc::new(RwLock::new(CrdtStore::new(1))); 35 + let rl = RippleRateLimiter::new(store); 36 + assert!(rl.check_rate_limit("test", 5, 60_000).await); 37 + assert!(rl.check_rate_limit("test", 5, 60_000).await); 38 + } 39 + 40 + #[tokio::test] 41 + async fn rate_limiter_trait_blocks_over_limit() { 42 + let store = Arc::new(RwLock::new(CrdtStore::new(1))); 43 + let rl = RippleRateLimiter::new(store); 44 + assert!(rl.check_rate_limit("k", 3, 60_000).await); 45 + assert!(rl.check_rate_limit("k", 3, 60_000).await); 46 + assert!(rl.check_rate_limit("k", 3, 60_000).await); 47 + assert!(!rl.check_rate_limit("k", 3, 60_000).await); 48 + } 49 + }
+442
crates/tranquil-ripple/src/transport.rs
··· 1 + use backon::{ExponentialBuilder, Retryable}; 2 + use bytes::{Buf, BufMut, BytesMut}; 3 + use std::collections::HashMap; 4 + use std::net::SocketAddr; 5 + use std::sync::Arc; 6 + use std::sync::atomic::{AtomicUsize, Ordering}; 7 + use std::time::Duration; 8 + use tokio::io::{AsyncReadExt, AsyncWriteExt}; 9 + use tokio::net::{TcpListener, TcpStream}; 10 + use tokio::sync::mpsc; 11 + use tokio_util::sync::CancellationToken; 12 + 13 + const MAX_FRAME_SIZE: usize = 16 * 1024 * 1024; 14 + const MAX_INBOUND_CONNECTIONS: usize = 512; 15 + const WRITE_TIMEOUT: Duration = Duration::from_secs(10); 16 + 17 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 18 + #[repr(u8)] 19 + pub enum ChannelTag { 20 + Gossip = 0x01, 21 + CrdtSync = 0x02, 22 + Raft = 0x03, 23 + Direct = 0x04, 24 + } 25 + 26 + impl ChannelTag { 27 + fn from_u8(v: u8) -> Option<Self> { 28 + match v { 29 + 0x01 => Some(Self::Gossip), 30 + 0x02 => Some(Self::CrdtSync), 31 + 0x03 => Some(Self::Raft), 32 + 0x04 => Some(Self::Direct), 33 + _ => None, 34 + } 35 + } 36 + } 37 + 38 + #[derive(Debug)] 39 + pub struct IncomingFrame { 40 + pub from: SocketAddr, 41 + pub tag: ChannelTag, 42 + pub data: Vec<u8>, 43 + } 44 + 45 + struct ConnectionWriter { 46 + tx: mpsc::Sender<Vec<u8>>, 47 + } 48 + 49 + pub struct Transport { 50 + local_addr: SocketAddr, 51 + _machine_id: u64, 52 + connections: Arc<parking_lot::Mutex<HashMap<SocketAddr, ConnectionWriter>>>, 53 + connecting: Arc<parking_lot::Mutex<std::collections::HashSet<SocketAddr>>>, 54 + #[allow(dead_code)] 55 + inbound_count: Arc<AtomicUsize>, 56 + shutdown: CancellationToken, 57 + incoming_tx: mpsc::Sender<IncomingFrame>, 58 + } 59 + 60 + impl Transport { 61 + pub async fn bind( 62 + addr: SocketAddr, 63 + machine_id: u64, 64 + shutdown: CancellationToken, 65 + ) -> Result<(Self, mpsc::Receiver<IncomingFrame>), std::io::Error> { 66 + let listener = TcpListener::bind(addr).await?; 67 + let local_addr = listener.local_addr()?; 68 + let (incoming_tx, incoming_rx) = mpsc::channel(4096); 69 + let inbound_count = Arc::new(AtomicUsize::new(0)); 70 + 71 + let transport = Self { 72 + local_addr, 73 + _machine_id: machine_id, 74 + connections: Arc::new(parking_lot::Mutex::new(HashMap::new())), 75 + connecting: Arc::new(parking_lot::Mutex::new(std::collections::HashSet::new())), 76 + inbound_count: inbound_count.clone(), 77 + shutdown: shutdown.clone(), 78 + incoming_tx: incoming_tx.clone(), 79 + }; 80 + 81 + let cancel = shutdown.clone(); 82 + let inbound_counter = inbound_count.clone(); 83 + tokio::spawn(async move { 84 + loop { 85 + tokio::select! { 86 + _ = cancel.cancelled() => break, 87 + result = listener.accept() => { 88 + match result { 89 + Ok((stream, peer_addr)) => { 90 + let current = inbound_counter.load(Ordering::Relaxed); 91 + if current >= MAX_INBOUND_CONNECTIONS { 92 + tracing::warn!( 93 + peer = %peer_addr, 94 + count = current, 95 + max = MAX_INBOUND_CONNECTIONS, 96 + "rejecting inbound connection: limit reached" 97 + ); 98 + drop(stream); 99 + continue; 100 + } 101 + inbound_counter.fetch_add(1, Ordering::Relaxed); 102 + Self::spawn_reader( 103 + stream, 104 + peer_addr, 105 + incoming_tx.clone(), 106 + cancel.clone(), 107 + inbound_counter.clone(), 108 + ); 109 + tracing::debug!(peer = %peer_addr, "accepted inbound connection"); 110 + } 111 + Err(e) => { 112 + tracing::warn!(error = %e, "accept failed"); 113 + } 114 + } 115 + } 116 + } 117 + } 118 + }); 119 + 120 + tracing::info!(addr = %local_addr, "ripple transport bound"); 121 + Ok((transport, incoming_rx)) 122 + } 123 + 124 + pub fn local_addr(&self) -> SocketAddr { 125 + self.local_addr 126 + } 127 + 128 + pub fn try_queue(&self, target: SocketAddr, tag: ChannelTag, data: &[u8]) -> bool { 129 + let frame = match encode_frame(tag, data) { 130 + Some(f) => f, 131 + None => return false, 132 + }; 133 + let conns = self.connections.lock(); 134 + match conns.get(&target) { 135 + Some(writer) => writer.tx.try_send(frame).is_ok(), 136 + None => false, 137 + } 138 + } 139 + 140 + pub async fn send(&self, target: SocketAddr, tag: ChannelTag, data: &[u8]) { 141 + let frame = match encode_frame(tag, data) { 142 + Some(f) => f, 143 + None => return, 144 + }; 145 + let writer = { 146 + let conns = self.connections.lock(); 147 + conns.get(&target).map(|w| w.tx.clone()) 148 + }; 149 + match writer { 150 + Some(tx) => { 151 + if tx.send(frame).await.is_err() { 152 + self.connections.lock().remove(&target); 153 + self.connect_and_send(target, tag, data).await; 154 + } 155 + } 156 + None => { 157 + self.connect_and_send(target, tag, data).await; 158 + } 159 + } 160 + } 161 + 162 + async fn connect_and_send(&self, target: SocketAddr, tag: ChannelTag, data: &[u8]) { 163 + { 164 + let mut connecting = self.connecting.lock(); 165 + if connecting.contains(&target) { 166 + tracing::debug!(peer = %target, "connection already in-flight, dropping frame"); 167 + return; 168 + } 169 + connecting.insert(target); 170 + } 171 + 172 + let result = self.connect_and_send_inner(target, tag, data).await; 173 + self.connecting.lock().remove(&target); 174 + result 175 + } 176 + 177 + async fn connect_and_send_inner(&self, target: SocketAddr, tag: ChannelTag, data: &[u8]) { 178 + let shutdown = self.shutdown.clone(); 179 + let stream = (|| async { 180 + tokio::time::timeout(Duration::from_secs(5), TcpStream::connect(target)) 181 + .await 182 + .map_err(|_| std::io::Error::new(std::io::ErrorKind::TimedOut, "connect timeout"))? 183 + }) 184 + .retry( 185 + ExponentialBuilder::default() 186 + .with_min_delay(Duration::from_millis(50)) 187 + .with_max_delay(Duration::from_secs(2)) 188 + .with_max_times(3), 189 + ) 190 + .when(|_| !shutdown.is_cancelled()) 191 + .await; 192 + match stream { 193 + Ok(stream) => { 194 + let (read_half, write_half) = stream.into_split(); 195 + let (write_tx, mut write_rx) = mpsc::channel::<Vec<u8>>(1024); 196 + let cancel = self.shutdown.clone(); 197 + let connections = self.connections.clone(); 198 + let peer = target; 199 + 200 + tokio::spawn(async move { 201 + let mut writer = write_half; 202 + loop { 203 + tokio::select! { 204 + _ = cancel.cancelled() => break, 205 + msg = write_rx.recv() => { 206 + match msg { 207 + Some(buf) => { 208 + let write_result = tokio::time::timeout( 209 + WRITE_TIMEOUT, 210 + writer.write_all(&buf), 211 + ).await; 212 + match write_result { 213 + Ok(Ok(())) => {} 214 + Ok(Err(e)) => { 215 + tracing::warn!(peer = %peer, error = %e, "write failed, closing connection"); 216 + break; 217 + } 218 + Err(_) => { 219 + tracing::warn!(peer = %peer, "write timed out, closing connection"); 220 + break; 221 + } 222 + } 223 + } 224 + None => break, 225 + } 226 + } 227 + } 228 + } 229 + connections.lock().remove(&peer); 230 + }); 231 + 232 + Self::spawn_reader_half(read_half, target, self.incoming_tx.clone(), self.shutdown.clone()); 233 + 234 + let frame = match encode_frame(tag, data) { 235 + Some(f) => f, 236 + None => return, 237 + }; 238 + let _ = write_tx.send(frame).await; 239 + self.connections.lock().insert( 240 + target, 241 + ConnectionWriter { tx: write_tx }, 242 + ); 243 + tracing::debug!(peer = %target, "established outbound connection"); 244 + } 245 + Err(e) => { 246 + tracing::warn!(peer = %target, error = %e, "failed to connect after retries"); 247 + } 248 + } 249 + } 250 + 251 + fn spawn_reader( 252 + stream: TcpStream, 253 + peer_addr: SocketAddr, 254 + incoming_tx: mpsc::Sender<IncomingFrame>, 255 + cancel: CancellationToken, 256 + inbound_counter: Arc<AtomicUsize>, 257 + ) { 258 + tokio::spawn(async move { 259 + let mut buf = BytesMut::with_capacity(8192); 260 + let mut stream = stream; 261 + loop { 262 + if buf.len() > MAX_FRAME_SIZE * 2 { 263 + tracing::warn!(peer = %peer_addr, buf_len = buf.len(), "read buffer exceeded limit, closing connection"); 264 + break; 265 + } 266 + tokio::select! { 267 + _ = cancel.cancelled() => break, 268 + n = stream.read_buf(&mut buf) => { 269 + match n { 270 + Ok(0) | Err(_) => break, 271 + Ok(_) => { 272 + if !Self::process_frames(&mut buf, peer_addr, &incoming_tx) { 273 + break; 274 + } 275 + } 276 + } 277 + } 278 + } 279 + } 280 + inbound_counter.fetch_sub(1, Ordering::Relaxed); 281 + }); 282 + } 283 + 284 + fn spawn_reader_half( 285 + read_half: tokio::net::tcp::OwnedReadHalf, 286 + peer_addr: SocketAddr, 287 + incoming_tx: mpsc::Sender<IncomingFrame>, 288 + cancel: CancellationToken, 289 + ) { 290 + tokio::spawn(async move { 291 + let mut buf = BytesMut::with_capacity(8192); 292 + let mut reader = read_half; 293 + loop { 294 + if buf.len() > MAX_FRAME_SIZE * 2 { 295 + tracing::warn!(peer = %peer_addr, buf_len = buf.len(), "read buffer exceeded limit, closing connection"); 296 + break; 297 + } 298 + tokio::select! { 299 + _ = cancel.cancelled() => break, 300 + n = reader.read_buf(&mut buf) => { 301 + match n { 302 + Ok(0) | Err(_) => break, 303 + Ok(_) => { 304 + if !Self::process_frames(&mut buf, peer_addr, &incoming_tx) { 305 + break; 306 + } 307 + } 308 + } 309 + } 310 + } 311 + } 312 + }); 313 + } 314 + 315 + fn process_frames( 316 + buf: &mut BytesMut, 317 + peer_addr: SocketAddr, 318 + incoming_tx: &mpsc::Sender<IncomingFrame>, 319 + ) -> bool { 320 + loop { 321 + match decode_frame(buf) { 322 + DecodeResult::Frame(tag, data) => { 323 + if let Err(e) = incoming_tx.try_send(IncomingFrame { 324 + from: peer_addr, 325 + tag, 326 + data, 327 + }) { 328 + tracing::warn!(peer = %peer_addr, error = %e, "incoming frame channel full, dropping frame"); 329 + } 330 + } 331 + DecodeResult::NeedMoreData => return true, 332 + DecodeResult::Corrupt => return false, 333 + } 334 + } 335 + } 336 + } 337 + 338 + fn encode_frame(tag: ChannelTag, data: &[u8]) -> Option<Vec<u8>> { 339 + match data.len() > MAX_FRAME_SIZE { 340 + true => { 341 + tracing::warn!( 342 + frame_len = data.len(), 343 + max = MAX_FRAME_SIZE, 344 + "refusing to encode oversized frame" 345 + ); 346 + None 347 + } 348 + false => { 349 + let len = u32::try_from(data.len()).ok()?; 350 + let mut buf = Vec::with_capacity(5 + data.len()); 351 + buf.put_u32(len); 352 + buf.put_u8(tag as u8); 353 + buf.extend_from_slice(data); 354 + Some(buf) 355 + } 356 + } 357 + } 358 + 359 + enum DecodeResult { 360 + Frame(ChannelTag, Vec<u8>), 361 + NeedMoreData, 362 + Corrupt, 363 + } 364 + 365 + fn decode_frame(buf: &mut BytesMut) -> DecodeResult { 366 + loop { 367 + if buf.len() < 5 { 368 + return DecodeResult::NeedMoreData; 369 + } 370 + let len = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as usize; 371 + if len > MAX_FRAME_SIZE { 372 + tracing::warn!(frame_len = len, max = MAX_FRAME_SIZE, "oversized frame, closing connection"); 373 + buf.clear(); 374 + return DecodeResult::Corrupt; 375 + } 376 + if buf.len() < 5 + len { 377 + return DecodeResult::NeedMoreData; 378 + } 379 + buf.advance(4); 380 + let tag_byte = buf[0]; 381 + buf.advance(1); 382 + let data = buf.split_to(len).to_vec(); 383 + match ChannelTag::from_u8(tag_byte) { 384 + Some(tag) => return DecodeResult::Frame(tag, data), 385 + None => { 386 + tracing::debug!(tag = tag_byte, "skipping frame with unknown channel tag"); 387 + } 388 + } 389 + } 390 + } 391 + 392 + #[cfg(test)] 393 + mod tests { 394 + use super::*; 395 + 396 + #[test] 397 + fn frame_roundtrip() { 398 + let original = b"hello world"; 399 + let encoded = encode_frame(ChannelTag::Gossip, original).expect("should encode"); 400 + let mut buf = BytesMut::from(&encoded[..]); 401 + match decode_frame(&mut buf) { 402 + DecodeResult::Frame(tag, data) => { 403 + assert_eq!(tag, ChannelTag::Gossip); 404 + assert_eq!(data, original); 405 + } 406 + _ => panic!("expected frame"), 407 + } 408 + assert!(buf.is_empty()); 409 + } 410 + 411 + #[test] 412 + fn partial_frame_returns_need_more() { 413 + let encoded = encode_frame(ChannelTag::CrdtSync, b"test data").expect("should encode"); 414 + let mut buf = BytesMut::from(&encoded[..3]); 415 + assert!(matches!(decode_frame(&mut buf), DecodeResult::NeedMoreData)); 416 + } 417 + 418 + #[test] 419 + fn multiple_frames() { 420 + let f1 = encode_frame(ChannelTag::Gossip, b"first").expect("should encode"); 421 + let f2 = encode_frame(ChannelTag::Direct, b"second").expect("should encode"); 422 + let mut buf = BytesMut::new(); 423 + buf.extend_from_slice(&f1); 424 + buf.extend_from_slice(&f2); 425 + 426 + match decode_frame(&mut buf) { 427 + DecodeResult::Frame(tag1, data1) => { 428 + assert_eq!(tag1, ChannelTag::Gossip); 429 + assert_eq!(data1, b"first"); 430 + } 431 + _ => panic!("expected frame"), 432 + } 433 + 434 + match decode_frame(&mut buf) { 435 + DecodeResult::Frame(tag2, data2) => { 436 + assert_eq!(tag2, ChannelTag::Direct); 437 + assert_eq!(data2, b"second"); 438 + } 439 + _ => panic!("expected frame"), 440 + } 441 + } 442 + }
+610
crates/tranquil-ripple/tests/two_node_convergence.rs
··· 1 + use std::sync::Arc; 2 + use std::time::Duration; 3 + use tokio_util::sync::CancellationToken; 4 + use tranquil_infra::{Cache, DistributedRateLimiter}; 5 + use tranquil_ripple::{RippleConfig, RippleEngine}; 6 + 7 + async fn spawn_pair( 8 + shutdown: CancellationToken, 9 + ) -> ( 10 + (Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>), 11 + (Arc<dyn Cache>, Arc<dyn DistributedRateLimiter>), 12 + ) { 13 + let config_a = RippleConfig { 14 + bind_addr: "127.0.0.1:0".parse().unwrap(), 15 + seed_peers: vec![], 16 + machine_id: 1, 17 + gossip_interval_ms: 100, 18 + cache_max_bytes: 64 * 1024 * 1024, 19 + }; 20 + let (cache_a, rl_a, addr_a) = RippleEngine::start(config_a, shutdown.clone()) 21 + .await 22 + .expect("node A failed to start"); 23 + 24 + let config_b = RippleConfig { 25 + bind_addr: "127.0.0.1:0".parse().unwrap(), 26 + seed_peers: vec![addr_a], 27 + machine_id: 2, 28 + gossip_interval_ms: 100, 29 + cache_max_bytes: 64 * 1024 * 1024, 30 + }; 31 + let (cache_b, rl_b, _addr_b) = RippleEngine::start(config_b, shutdown.clone()) 32 + .await 33 + .expect("node B failed to start"); 34 + 35 + tokio::time::sleep(Duration::from_millis(2000)).await; 36 + 37 + ((cache_a, rl_a), (cache_b, rl_b)) 38 + } 39 + 40 + async fn poll_until<F, Fut>(max_ms: u64, interval_ms: u64, check_fn: F) 41 + where 42 + F: Fn() -> Fut, 43 + Fut: std::future::Future<Output = bool>, 44 + { 45 + let deadline = tokio::time::Instant::now() + Duration::from_millis(max_ms); 46 + let interval = Duration::from_millis(interval_ms); 47 + 48 + loop { 49 + if check_fn().await { 50 + return; 51 + } 52 + if tokio::time::Instant::now() + interval > deadline { 53 + panic!("poll_until timed out after {max_ms}ms"); 54 + } 55 + tokio::time::sleep(interval).await; 56 + } 57 + } 58 + 59 + #[tokio::test] 60 + async fn two_node_cache_convergence() { 61 + tracing_subscriber::fmt() 62 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 63 + .with_test_writer() 64 + .try_init() 65 + .ok(); 66 + 67 + let shutdown = CancellationToken::new(); 68 + let ((cache_a, _rl_a), (cache_b, _rl_b)) = spawn_pair(shutdown.clone()).await; 69 + 70 + cache_a 71 + .set("test-key", "hello-from-a", Duration::from_secs(300)) 72 + .await 73 + .expect("set on A failed"); 74 + 75 + assert_eq!( 76 + cache_a.get("test-key").await.as_deref(), 77 + Some("hello-from-a"), 78 + ); 79 + 80 + let b = cache_b.clone(); 81 + poll_until(10_000, 200, || { 82 + let b = b.clone(); 83 + async move { b.get("test-key").await.as_deref() == Some("hello-from-a") } 84 + }) 85 + .await; 86 + 87 + shutdown.cancel(); 88 + } 89 + 90 + #[tokio::test] 91 + async fn two_node_delete_convergence() { 92 + tracing_subscriber::fmt() 93 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 94 + .with_test_writer() 95 + .try_init() 96 + .ok(); 97 + 98 + let shutdown = CancellationToken::new(); 99 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 100 + 101 + let key = format!("del-{}", uuid::Uuid::new_v4()); 102 + 103 + cache_a 104 + .set(&key, "to-be-deleted", Duration::from_secs(300)) 105 + .await 106 + .expect("set on A failed"); 107 + 108 + let b = cache_b.clone(); 109 + let k = key.clone(); 110 + poll_until(10_000, 200, move || { 111 + let b = b.clone(); 112 + let k = k.clone(); 113 + async move { b.get(&k).await.is_some() } 114 + }) 115 + .await; 116 + 117 + cache_a.delete(&key).await.expect("delete on A failed"); 118 + 119 + let b = cache_b.clone(); 120 + let k = key.clone(); 121 + poll_until(10_000, 200, move || { 122 + let b = b.clone(); 123 + let k = k.clone(); 124 + async move { b.get(&k).await.is_none() } 125 + }) 126 + .await; 127 + 128 + shutdown.cancel(); 129 + } 130 + 131 + #[tokio::test] 132 + async fn two_node_lww_conflict_resolution() { 133 + tracing_subscriber::fmt() 134 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 135 + .with_test_writer() 136 + .try_init() 137 + .ok(); 138 + 139 + let shutdown = CancellationToken::new(); 140 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 141 + 142 + let key = format!("lww-{}", uuid::Uuid::new_v4()); 143 + 144 + cache_a 145 + .set(&key, "value-from-a", Duration::from_secs(300)) 146 + .await 147 + .expect("set on A failed"); 148 + 149 + cache_b 150 + .set(&key, "value-from-b", Duration::from_secs(300)) 151 + .await 152 + .expect("set on B failed"); 153 + 154 + let a = cache_a.clone(); 155 + let b = cache_b.clone(); 156 + let k = key.clone(); 157 + poll_until(15_000, 200, move || { 158 + let a = a.clone(); 159 + let b = b.clone(); 160 + let k = k.clone(); 161 + async move { 162 + let (va, vb) = tokio::join!(a.get(&k), b.get(&k)); 163 + matches!((va, vb), (Some(a), Some(b)) if a == b) 164 + } 165 + }) 166 + .await; 167 + 168 + let val_a = cache_a.get(&key).await.expect("A should have the key"); 169 + let val_b = cache_b.get(&key).await.expect("B should have the key"); 170 + 171 + assert_eq!(val_a, val_b, "both nodes must agree on the same value after LWW resolution"); 172 + 173 + shutdown.cancel(); 174 + } 175 + 176 + #[tokio::test] 177 + async fn two_node_binary_data_convergence() { 178 + tracing_subscriber::fmt() 179 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 180 + .with_test_writer() 181 + .try_init() 182 + .ok(); 183 + 184 + let shutdown = CancellationToken::new(); 185 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 186 + 187 + let key = format!("bin-{}", uuid::Uuid::new_v4()); 188 + let payload: Vec<u8> = (0..=255u8).collect(); 189 + 190 + cache_a 191 + .set_bytes(&key, &payload, Duration::from_secs(300)) 192 + .await 193 + .expect("set_bytes on A failed"); 194 + 195 + let b = cache_b.clone(); 196 + let k = key.clone(); 197 + let expected = payload.clone(); 198 + poll_until(10_000, 200, move || { 199 + let b = b.clone(); 200 + let k = k.clone(); 201 + let expected = expected.clone(); 202 + async move { 203 + b.get_bytes(&k) 204 + .await 205 + .map(|v| v == expected) 206 + .unwrap_or(false) 207 + } 208 + }) 209 + .await; 210 + 211 + shutdown.cancel(); 212 + } 213 + 214 + #[tokio::test] 215 + async fn two_node_ttl_expiration() { 216 + tracing_subscriber::fmt() 217 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 218 + .with_test_writer() 219 + .try_init() 220 + .ok(); 221 + 222 + let shutdown = CancellationToken::new(); 223 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 224 + 225 + let key = format!("ttl-{}", uuid::Uuid::new_v4()); 226 + 227 + cache_a 228 + .set(&key, "ephemeral", Duration::from_secs(2)) 229 + .await 230 + .expect("set on A failed"); 231 + 232 + let b = cache_b.clone(); 233 + let k = key.clone(); 234 + poll_until(10_000, 200, move || { 235 + let b = b.clone(); 236 + let k = k.clone(); 237 + async move { b.get(&k).await.is_some() } 238 + }) 239 + .await; 240 + 241 + tokio::time::sleep(Duration::from_secs(3)).await; 242 + 243 + assert!(cache_a.get(&key).await.is_none(), "A should have expired the key"); 244 + assert!(cache_b.get(&key).await.is_none(), "B should have expired the key"); 245 + 246 + shutdown.cancel(); 247 + } 248 + 249 + #[tokio::test] 250 + async fn two_node_rapid_overwrite_convergence() { 251 + tracing_subscriber::fmt() 252 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 253 + .with_test_writer() 254 + .try_init() 255 + .ok(); 256 + 257 + let shutdown = CancellationToken::new(); 258 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 259 + 260 + let key = format!("rapid-{}", uuid::Uuid::new_v4()); 261 + 262 + futures::future::join_all((0..50).map(|i| { 263 + let cache = cache_a.clone(); 264 + let k = key.clone(); 265 + async move { 266 + cache 267 + .set(&k, &format!("value-{i}"), Duration::from_secs(300)) 268 + .await 269 + .expect("set failed"); 270 + } 271 + })) 272 + .await; 273 + 274 + let b = cache_b.clone(); 275 + let k = key.clone(); 276 + poll_until(10_000, 200, move || { 277 + let b = b.clone(); 278 + let k = k.clone(); 279 + async move { b.get(&k).await.as_deref() == Some("value-49") } 280 + }) 281 + .await; 282 + 283 + shutdown.cancel(); 284 + } 285 + 286 + #[tokio::test] 287 + async fn two_node_many_keys_convergence() { 288 + tracing_subscriber::fmt() 289 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 290 + .with_test_writer() 291 + .try_init() 292 + .ok(); 293 + 294 + let shutdown = CancellationToken::new(); 295 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 296 + 297 + let prefix = format!("many-{}", uuid::Uuid::new_v4()); 298 + 299 + futures::future::join_all((0..200).map(|i| { 300 + let cache = cache_a.clone(); 301 + let p = prefix.clone(); 302 + async move { 303 + cache 304 + .set( 305 + &format!("{p}-{i}"), 306 + &format!("val-{i}"), 307 + Duration::from_secs(300), 308 + ) 309 + .await 310 + .expect("set failed"); 311 + } 312 + })) 313 + .await; 314 + 315 + let b = cache_b.clone(); 316 + let p = prefix.clone(); 317 + poll_until(30_000, 500, move || { 318 + let b = b.clone(); 319 + let p = p.clone(); 320 + async move { 321 + futures::future::join_all((0..200).map(|i| { 322 + let b = b.clone(); 323 + let p = p.clone(); 324 + async move { b.get(&format!("{p}-{i}")).await.is_some() } 325 + })) 326 + .await 327 + .into_iter() 328 + .all(|present| present) 329 + } 330 + }) 331 + .await; 332 + 333 + let results: Vec<Option<String>> = futures::future::join_all((0..200).map(|i| { 334 + let b = cache_b.clone(); 335 + let p = prefix.clone(); 336 + async move { b.get(&format!("{p}-{i}")).await } 337 + })) 338 + .await; 339 + 340 + results.into_iter().enumerate().for_each(|(i, val)| { 341 + assert_eq!( 342 + val.as_deref(), 343 + Some(format!("val-{i}").as_str()), 344 + "key {i} mismatch on B" 345 + ); 346 + }); 347 + 348 + shutdown.cancel(); 349 + } 350 + 351 + #[tokio::test] 352 + async fn two_node_concurrent_disjoint_writes() { 353 + tracing_subscriber::fmt() 354 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 355 + .with_test_writer() 356 + .try_init() 357 + .ok(); 358 + 359 + let shutdown = CancellationToken::new(); 360 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 361 + 362 + let prefix = format!("disj-{}", uuid::Uuid::new_v4()); 363 + 364 + let write_a = { 365 + let cache = cache_a.clone(); 366 + let p = prefix.clone(); 367 + async move { 368 + futures::future::join_all((0..100).map(|i| { 369 + let cache = cache.clone(); 370 + let p = p.clone(); 371 + async move { 372 + cache 373 + .set( 374 + &format!("{p}-a-{i}"), 375 + &format!("a-{i}"), 376 + Duration::from_secs(300), 377 + ) 378 + .await 379 + .expect("set failed"); 380 + } 381 + })) 382 + .await; 383 + } 384 + }; 385 + 386 + let write_b = { 387 + let cache = cache_b.clone(); 388 + let p = prefix.clone(); 389 + async move { 390 + futures::future::join_all((0..100).map(|i| { 391 + let cache = cache.clone(); 392 + let p = p.clone(); 393 + async move { 394 + cache 395 + .set( 396 + &format!("{p}-b-{i}"), 397 + &format!("b-{i}"), 398 + Duration::from_secs(300), 399 + ) 400 + .await 401 + .expect("set failed"); 402 + } 403 + })) 404 + .await; 405 + } 406 + }; 407 + 408 + tokio::join!(write_a, write_b); 409 + 410 + let a = cache_a.clone(); 411 + let b = cache_b.clone(); 412 + let p = prefix.clone(); 413 + poll_until(30_000, 500, move || { 414 + let a = a.clone(); 415 + let b = b.clone(); 416 + let p = p.clone(); 417 + async move { 418 + let a_has_b_keys = futures::future::join_all((0..100).map(|i| { 419 + let a = a.clone(); 420 + let p = p.clone(); 421 + async move { a.get(&format!("{p}-b-{i}")).await.is_some() } 422 + })) 423 + .await 424 + .into_iter() 425 + .all(|v| v); 426 + 427 + let b_has_a_keys = futures::future::join_all((0..100).map(|i| { 428 + let b = b.clone(); 429 + let p = p.clone(); 430 + async move { b.get(&format!("{p}-a-{i}")).await.is_some() } 431 + })) 432 + .await 433 + .into_iter() 434 + .all(|v| v); 435 + 436 + a_has_b_keys && b_has_a_keys 437 + } 438 + }) 439 + .await; 440 + 441 + shutdown.cancel(); 442 + } 443 + 444 + #[tokio::test] 445 + async fn two_node_concurrent_same_key_writes() { 446 + tracing_subscriber::fmt() 447 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 448 + .with_test_writer() 449 + .try_init() 450 + .ok(); 451 + 452 + let shutdown = CancellationToken::new(); 453 + let ((cache_a, _), (cache_b, _)) = spawn_pair(shutdown.clone()).await; 454 + 455 + let prefix = format!("same-{}", uuid::Uuid::new_v4()); 456 + 457 + let write_a = { 458 + let cache = cache_a.clone(); 459 + let p = prefix.clone(); 460 + async move { 461 + futures::future::join_all((0..50).map(|i| { 462 + let cache = cache.clone(); 463 + let p = p.clone(); 464 + async move { 465 + cache 466 + .set( 467 + &format!("{p}-{i}"), 468 + &format!("a-{i}"), 469 + Duration::from_secs(300), 470 + ) 471 + .await 472 + .expect("set failed"); 473 + } 474 + })) 475 + .await; 476 + } 477 + }; 478 + 479 + let write_b = { 480 + let cache = cache_b.clone(); 481 + let p = prefix.clone(); 482 + async move { 483 + futures::future::join_all((0..50).map(|i| { 484 + let cache = cache.clone(); 485 + let p = p.clone(); 486 + async move { 487 + cache 488 + .set( 489 + &format!("{p}-{i}"), 490 + &format!("b-{i}"), 491 + Duration::from_secs(300), 492 + ) 493 + .await 494 + .expect("set failed"); 495 + } 496 + })) 497 + .await; 498 + } 499 + }; 500 + 501 + tokio::join!(write_a, write_b); 502 + 503 + let a = cache_a.clone(); 504 + let b = cache_b.clone(); 505 + let p = prefix.clone(); 506 + poll_until(15_000, 200, move || { 507 + let a = a.clone(); 508 + let b = b.clone(); 509 + let p = p.clone(); 510 + async move { 511 + futures::future::join_all((0..50).map(|i| { 512 + let a = a.clone(); 513 + let b = b.clone(); 514 + let p = p.clone(); 515 + async move { 516 + let va = a.get(&format!("{p}-{i}")).await.unwrap_or_default(); 517 + let vb = b.get(&format!("{p}-{i}")).await.unwrap_or_default(); 518 + !va.is_empty() && va == vb 519 + } 520 + })) 521 + .await 522 + .into_iter() 523 + .all(|v| v) 524 + } 525 + }) 526 + .await; 527 + 528 + let results: Vec<(String, String)> = futures::future::join_all((0..50).map(|i| { 529 + let a = cache_a.clone(); 530 + let b = cache_b.clone(); 531 + let p = prefix.clone(); 532 + async move { 533 + let va = a.get(&format!("{p}-{i}")).await.unwrap_or_default(); 534 + let vb = b.get(&format!("{p}-{i}")).await.unwrap_or_default(); 535 + (va, vb) 536 + } 537 + })) 538 + .await; 539 + 540 + results.into_iter().enumerate().for_each(|(i, (va, vb))| { 541 + assert_eq!(va, vb, "key {i}: nodes disagree (A={va}, B={vb})"); 542 + }); 543 + 544 + shutdown.cancel(); 545 + } 546 + 547 + #[tokio::test] 548 + async fn two_node_rate_limit_split_increment() { 549 + tracing_subscriber::fmt() 550 + .with_max_level(tracing_subscriber::filter::LevelFilter::DEBUG) 551 + .with_test_writer() 552 + .try_init() 553 + .ok(); 554 + 555 + let shutdown = CancellationToken::new(); 556 + let ((_, rl_a), (_, rl_b)) = spawn_pair(shutdown.clone()).await; 557 + 558 + let key = format!("rl-split-{}", uuid::Uuid::new_v4()); 559 + let limit: u32 = 200; 560 + let window_ms: u64 = 600_000; 561 + 562 + futures::future::join_all((0..40).map(|_| { 563 + let rl = rl_a.clone(); 564 + let k = key.clone(); 565 + async move { 566 + let allowed = rl.check_rate_limit(&k, limit, window_ms).await; 567 + assert!(allowed, "should be allowed within limit"); 568 + } 569 + })) 570 + .await; 571 + 572 + futures::future::join_all((0..30).map(|_| { 573 + let rl = rl_b.clone(); 574 + let k = key.clone(); 575 + async move { 576 + let allowed = rl.check_rate_limit(&k, limit, window_ms).await; 577 + assert!(allowed, "should be allowed within limit"); 578 + } 579 + })) 580 + .await; 581 + 582 + let rl_peek = rl_a.clone(); 583 + let k = key.clone(); 584 + poll_until(15_000, 200, move || { 585 + let rl = rl_peek.clone(); 586 + let k = k.clone(); 587 + async move { rl.peek_rate_limit_count(&k, window_ms).await >= 70 } 588 + }) 589 + .await; 590 + 591 + let mut remaining = 0u32; 592 + loop { 593 + if !rl_a.check_rate_limit(&key, limit, window_ms).await { 594 + break; 595 + } 596 + remaining += 1; 597 + if remaining > limit { 598 + panic!("rate limiter never denied - convergence failed"); 599 + } 600 + } 601 + 602 + let expected_remaining = limit - 70; 603 + let margin = 15; 604 + assert!( 605 + remaining.abs_diff(expected_remaining) <= margin, 606 + "expected ~{expected_remaining} remaining hits, got {remaining} (margin={margin})" 607 + ); 608 + 609 + shutdown.cancel(); 610 + }
+3 -5
deploy/quadlets/tranquil-pds-app.container
··· 1 1 [Unit] 2 2 Description=Tranquil PDS AT Protocol PDS 3 - After=tranquil-pds-db.service tranquil-pds-minio.service tranquil-pds-valkey.service 3 + After=tranquil-pds-db.service 4 4 [Container] 5 5 ContainerName=tranquil-pds-app 6 6 Image=localhost/tranquil-pds:latest ··· 8 8 EnvironmentFile=/srv/tranquil-pds/config/tranquil-pds.env 9 9 Environment=SERVER_HOST=0.0.0.0 10 10 Environment=SERVER_PORT=3000 11 - Environment=S3_ENDPOINT=http://localhost:9000 12 - Environment=AWS_REGION=us-east-1 13 - Environment=S3_BUCKET=pds-blobs 14 - Environment=VALKEY_URL=redis://localhost:6379 11 + Volume=/srv/tranquil-pds/blobs:/var/lib/tranquil/blobs:Z 12 + Volume=/srv/tranquil-pds/backups:/var/lib/tranquil/backups:Z 15 13 HealthCmd=wget -q --spider http://localhost:3000/xrpc/_health 16 14 HealthInterval=30s 17 15 HealthTimeout=10s
+7 -66
docker-compose.prod.yaml
··· 10 10 SERVER_PORT: "3000" 11 11 PDS_HOSTNAME: "${PDS_HOSTNAME:?PDS_HOSTNAME is required}" 12 12 DATABASE_URL: "postgres://tranquil_pds:${DB_PASSWORD:?DB_PASSWORD is required}@db:5432/pds" 13 - S3_ENDPOINT: "http://minio:9000" 14 - AWS_REGION: "us-east-1" 15 - S3_BUCKET: "pds-blobs" 16 - AWS_ACCESS_KEY_ID: "${MINIO_ROOT_USER:-minioadmin}" 17 - AWS_SECRET_ACCESS_KEY: "${MINIO_ROOT_PASSWORD:?MINIO_ROOT_PASSWORD is required}" 18 - VALKEY_URL: "redis://valkey:6379" 13 + BLOB_STORAGE_PATH: "/var/lib/tranquil/blobs" 14 + BACKUP_STORAGE_PATH: "/var/lib/tranquil/backups" 19 15 JWT_SECRET: "${JWT_SECRET:?JWT_SECRET is required (min 32 chars)}" 20 16 DPOP_SECRET: "${DPOP_SECRET:?DPOP_SECRET is required (min 32 chars)}" 21 17 MASTER_KEY: "${MASTER_KEY:?MASTER_KEY is required (min 32 chars)}" 22 18 CRAWLERS: "${CRAWLERS:-https://bsky.network}" 19 + volumes: 20 + - blob_data:/var/lib/tranquil/blobs 21 + - backup_data:/var/lib/tranquil/backups 23 22 depends_on: 24 23 db: 25 - condition: service_healthy 26 - minio: 27 - condition: service_healthy 28 - valkey: 29 24 condition: service_healthy 30 25 healthcheck: 31 26 test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/xrpc/_health"] ··· 81 76 reservations: 82 77 memory: 128M 83 78 84 - minio: 85 - image: cgr.dev/chainguard/minio:latest 86 - restart: unless-stopped 87 - command: server /data --console-address ":9001" 88 - environment: 89 - MINIO_ROOT_USER: "${MINIO_ROOT_USER:-minioadmin}" 90 - MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD:?MINIO_ROOT_PASSWORD is required}" 91 - volumes: 92 - - minio_data:/data 93 - deploy: 94 - resources: 95 - limits: 96 - memory: 512M 97 - reservations: 98 - memory: 128M 99 - 100 - minio-init: 101 - image: cgr.dev/chainguard/minio-client:latest-dev 102 - depends_on: 103 - - minio 104 - entrypoint: > 105 - /bin/sh -c " 106 - for i in 1 2 3 4 5 6 7 8 9 10; do 107 - mc alias set local http://minio:9000 $${MINIO_ROOT_USER} $${MINIO_ROOT_PASSWORD} && break; 108 - echo 'Waiting for minio...'; sleep 2; 109 - done; 110 - mc mb --ignore-existing local/pds-blobs; 111 - mc mb --ignore-existing local/pds-backups; 112 - mc anonymous set none local/pds-blobs; 113 - exit 0; 114 - " 115 - environment: 116 - MINIO_ROOT_USER: "${MINIO_ROOT_USER:-minioadmin}" 117 - MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD:?MINIO_ROOT_PASSWORD is required}" 118 - 119 - valkey: 120 - image: valkey/valkey:9-alpine 121 - restart: unless-stopped 122 - command: valkey-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru 123 - volumes: 124 - - valkey_data:/data 125 - healthcheck: 126 - test: ["CMD", "valkey-cli", "ping"] 127 - interval: 10s 128 - timeout: 5s 129 - retries: 3 130 - start_period: 5s 131 - deploy: 132 - resources: 133 - limits: 134 - memory: 300M 135 - reservations: 136 - memory: 64M 137 - 138 79 nginx: 139 80 image: nginx:1.29-alpine 140 81 restart: unless-stopped ··· 180 121 181 122 volumes: 182 123 postgres_data: 183 - minio_data: 184 - valkey_data: 124 + blob_data: 125 + backup_data: 185 126 prometheus_data: 186 127 acme_challenge:
+5 -25
docker-compose.yaml
··· 10 10 - ./.env 11 11 environment: 12 12 DATABASE_URL: postgres://postgres:postgres@db:5432/pds 13 - S3_ENDPOINT: http://objsto:9000 14 - VALKEY_URL: redis://cache:6379 13 + volumes: 14 + - blob_data:/var/lib/tranquil/blobs 15 + - backup_data:/var/lib/tranquil/backups 15 16 depends_on: 16 17 - db 17 - - objsto 18 - - cache 19 18 20 19 frontend: 21 20 build: ··· 38 37 volumes: 39 38 - postgres_data:/var/lib/postgresql 40 39 41 - objsto: 42 - image: cgr.dev/chainguard/minio:latest 43 - ports: 44 - - "9000:9000" 45 - - "9001:9001" 46 - environment: 47 - MINIO_ROOT_USER: minioadmin 48 - MINIO_ROOT_PASSWORD: minioadmin 49 - volumes: 50 - - minio_data:/data 51 - command: server /data --console-address ":9001" 52 - 53 - cache: 54 - image: valkey/valkey:9-alpine 55 - ports: 56 - - "6379:6379" 57 - volumes: 58 - - valkey_data:/data 59 - 60 40 prometheus: 61 41 image: prom/prometheus:v3.8.0 62 42 ports: ··· 72 52 73 53 volumes: 74 54 postgres_data: 75 - minio_data: 76 - valkey_data: 55 + blob_data: 56 + backup_data: 77 57 prometheus_data:
+16 -11
docs/install-containers.md
··· 43 43 44 44 ## Standalone Containers (No Compose) 45 45 46 - If you already have postgres and valkey running on the host (eg., from the [Debian install guide](install-debian.md)), you can run just the app containers. 46 + If you already have postgres running on the host (eg. from the [Debian install guide](install-debian.md)), you can run just the app containers. 47 47 48 48 Build the images: 49 49 ```sh ··· 51 51 podman build -t tranquil-pds-frontend:latest ./frontend 52 52 ``` 53 53 54 - Run the backend with host networking (so it can access postgres/valkey on localhost) and mount the blob storage: 54 + Run the backend with host networking (so it can access postgres on localhost) and mount the blob storage: 55 55 ```sh 56 56 podman run -d --name tranquil-pds \ 57 57 --network=host \ ··· 106 106 107 107 ```bash 108 108 mkdir -p /etc/containers/systemd 109 - mkdir -p /srv/tranquil-pds/{postgres,valkey,blobs,backups,certs,acme,config} 109 + mkdir -p /srv/tranquil-pds/{postgres,blobs,backups,certs,acme,config} 110 110 ``` 111 111 112 112 ## Create Environment File ··· 127 127 128 128 Copy the quadlet files from the repository: 129 129 ```bash 130 - cp /opt/tranquil-pds/deploy/quadlets/*.pod /etc/containers/systemd/ 131 - cp /opt/tranquil-pds/deploy/quadlets/*.container /etc/containers/systemd/ 130 + cp /opt/tranquil-pds/deploy/quadlets/tranquil-pds.pod /etc/containers/systemd/ 131 + cp /opt/tranquil-pds/deploy/quadlets/tranquil-pds-db.container /etc/containers/systemd/ 132 + cp /opt/tranquil-pds/deploy/quadlets/tranquil-pds-app.container /etc/containers/systemd/ 133 + cp /opt/tranquil-pds/deploy/quadlets/tranquil-pds-frontend.container /etc/containers/systemd/ 134 + cp /opt/tranquil-pds/deploy/quadlets/tranquil-pds-nginx.container /etc/containers/systemd/ 132 135 ``` 133 136 137 + Optional quadlets for valkey and minio are also available in `deploy/quadlets/` if you need them. 138 + 134 139 Note: Systemd doesn't support shell-style variable expansion in `Environment=` lines. The quadlet files expect DATABASE_URL to be set in the environment file. 135 140 136 141 ## Create nginx Configuration ··· 160 165 161 166 ```bash 162 167 systemctl daemon-reload 163 - systemctl start tranquil-pds-db tranquil-pds-valkey 168 + systemctl start tranquil-pds-db 164 169 sleep 10 165 170 ``` 166 171 ··· 172 177 173 178 ## Obtain Wildcard SSL Certificate 174 179 175 - User handles are served as subdomains (eg., `alice.pds.example.com`), so you need a wildcard certificate. Wildcard certs require DNS-01 validation. 180 + User handles are served as subdomains (eg. `alice.pds.example.com`), so you need a wildcard certificate. Wildcard certs require DNS-01 validation. 176 181 177 182 Create temporary self-signed cert to start services: 178 183 ```bash ··· 195 200 196 201 Follow the prompts to add TXT records to your DNS. Note: manual mode doesn't auto-renew. 197 202 198 - For automated renewal, use a DNS provider plugin (eg., cloudflare, route53). 203 + For automated renewal, use a DNS provider plugin (eg. cloudflare, route53). 199 204 200 205 Link certificates and restart: 201 206 ```bash ··· 207 212 ## Enable All Services 208 213 209 214 ```bash 210 - systemctl enable tranquil-pds-db tranquil-pds-valkey tranquil-pds-app tranquil-pds-frontend tranquil-pds-nginx 215 + systemctl enable tranquil-pds-db tranquil-pds-app tranquil-pds-frontend tranquil-pds-nginx 211 216 ``` 212 217 213 218 ## Configure Firewall ··· 252 257 253 258 ```sh 254 259 mkdir -p /srv/tranquil-pds/{data,config} 255 - mkdir -p /srv/tranquil-pds/data/{postgres,valkey,blobs,backups,certs,acme} 260 + mkdir -p /srv/tranquil-pds/data/{postgres,blobs,backups,certs,acme} 256 261 ``` 257 262 258 263 ## Clone Repository and Build Images ··· 346 351 347 352 ## Obtain Wildcard SSL Certificate 348 353 349 - User handles are served as subdomains (eg., `alice.pds.example.com`), so you need a wildcard certificate. Wildcard certs require DNS-01 validation. 354 + User handles are served as subdomains (eg. `alice.pds.example.com`), so you need a wildcard certificate. Wildcard certs require DNS-01 validation. 350 355 351 356 Create temporary self-signed cert to start services: 352 357 ```sh
-8
docs/install-debian.md
··· 46 46 47 47 We'll set ownership after creating the service user. 48 48 49 - ## Install valkey 50 - 51 - ```bash 52 - apt install -y valkey 53 - systemctl enable valkey-server 54 - systemctl start valkey-server 55 - ``` 56 - 57 49 ## Install deno (for frontend build) 58 50 59 51 ```bash
-3
docs/install-kubernetes.md
··· 3 3 If you're reaching for kubernetes for this app, you're experienced enough to know how to spin up: 4 4 5 5 - cloudnativepg (or your preferred postgres operator) 6 - - valkey 7 6 - a PersistentVolume for blob storage 8 7 - the app itself (it's just a container with some env vars) 9 8 ··· 13 12 - `DATABASE_URL` - postgres connection string 14 13 - `BLOB_STORAGE_PATH` - path to blob storage (mount a PV here) 15 14 - `BACKUP_STORAGE_PATH` - path for repo backups (optional but recommended) 16 - - `VALKEY_URL` - redis:// connection string 17 15 - `PDS_HOSTNAME` - your PDS hostname (without protocol) 18 16 - `JWT_SECRET`, `DPOP_SECRET`, `MASTER_KEY` - generate with `openssl rand -base64 48` 19 17 - `CRAWLERS` - typically `https://bsky.network` ··· 41 39 </body> 42 40 </html> 43 41 ``` 44 -
-11
scripts/install-debian.sh
··· 194 194 sudo -u postgres psql -c "GRANT ALL PRIVILEGES ON DATABASE pds TO tranquil_pds;" 195 195 log_success "postgres configured" 196 196 197 - log_info "Installing valkey..." 198 - apt install -y valkey 2>/dev/null || { 199 - log_warn "valkey not in repos, installing redis..." 200 - apt install -y redis-server 201 - systemctl enable redis-server 202 - systemctl start redis-server 203 - } 204 - systemctl enable valkey-server 2>/dev/null || true 205 - systemctl start valkey-server 2>/dev/null || true 206 - 207 197 log_info "Creating blob storage directories..." 208 198 mkdir -p /var/lib/tranquil/blobs /var/lib/tranquil/backups 209 199 log_success "Blob storage directories created" ··· 313 303 DATABASE_MIN_CONNECTIONS=10 314 304 BLOB_STORAGE_PATH=/var/lib/tranquil/blobs 315 305 BACKUP_STORAGE_PATH=/var/lib/tranquil/backups 316 - VALKEY_URL=redis://localhost:6379 317 306 JWT_SECRET=${JWT_SECRET} 318 307 DPOP_SECRET=${DPOP_SECRET} 319 308 MASTER_KEY=${MASTER_KEY}
+8 -1
scripts/run-tests.sh
··· 16 16 sqlx database create 2>/dev/null || true 17 17 sqlx migrate run --source "$PROJECT_DIR/migrations" 18 18 echo "" 19 + ulimit -n 65536 20 + 21 + echo "Building test binaries..." 22 + cargo test --no-run 2>&1 | tail -1 23 + 19 24 echo "Running tests..." 20 25 echo "" 21 - ulimit -n 65536 22 26 cargo nextest run "$@" 27 + 28 + echo "" 29 + echo "All tests passed."
+5 -50
scripts/test-infra.sh
··· 32 32 echo "Stale infra file found, cleaning up..." 33 33 rm -f "$INFRA_FILE" 34 34 fi 35 - $CONTAINER_CMD rm -f "${CONTAINER_PREFIX}-postgres" "${CONTAINER_PREFIX}-minio" "${CONTAINER_PREFIX}-valkey" 2>/dev/null || true 35 + $CONTAINER_CMD rm -f "${CONTAINER_PREFIX}-postgres" 2>/dev/null || true 36 36 echo "Starting PostgreSQL..." 37 37 $CONTAINER_CMD run -d \ 38 38 --name "${CONTAINER_PREFIX}-postgres" \ ··· 43 43 --label tranquil_pds_test=true \ 44 44 postgres:18-alpine \ 45 45 -c max_connections=500 >/dev/null 46 - echo "Starting MinIO..." 47 - $CONTAINER_CMD run -d \ 48 - --name "${CONTAINER_PREFIX}-minio" \ 49 - -e MINIO_ROOT_USER=minioadmin \ 50 - -e MINIO_ROOT_PASSWORD=minioadmin \ 51 - -p 9000 \ 52 - --label tranquil_pds_test=true \ 53 - cgr.dev/chainguard/minio:latest server /data >/dev/null 54 - echo "Starting Valkey..." 55 - $CONTAINER_CMD run -d \ 56 - --name "${CONTAINER_PREFIX}-valkey" \ 57 - -P \ 58 - --label tranquil_pds_test=true \ 59 - valkey/valkey:9-alpine >/dev/null 60 46 echo "Waiting for services to be ready..." 61 - sleep 2 62 - PG_PORT=$($CONTAINER_CMD port "${CONTAINER_PREFIX}-postgres" 5432 | head -1 | cut -d: -f2) 63 - MINIO_PORT=$($CONTAINER_CMD port "${CONTAINER_PREFIX}-minio" 9000 | head -1 | cut -d: -f2) 64 - VALKEY_PORT=$($CONTAINER_CMD port "${CONTAINER_PREFIX}-valkey" 6379 | head -1 | cut -d: -f2) 65 47 for i in {1..30}; do 66 48 if $CONTAINER_CMD exec "${CONTAINER_PREFIX}-postgres" pg_isready -U postgres >/dev/null 2>&1; then 67 49 break ··· 69 51 echo "Waiting for PostgreSQL... ($i/30)" 70 52 sleep 1 71 53 done 72 - for i in {1..30}; do 73 - if curl -s "http://127.0.0.1:${MINIO_PORT}/minio/health/live" >/dev/null 2>&1; then 74 - break 75 - fi 76 - echo "Waiting for MinIO... ($i/30)" 77 - sleep 1 78 - done 79 - for i in {1..30}; do 80 - if $CONTAINER_CMD exec "${CONTAINER_PREFIX}-valkey" valkey-cli ping 2>/dev/null | grep -q PONG; then 81 - break 82 - fi 83 - echo "Waiting for Valkey... ($i/30)" 84 - sleep 1 85 - done 86 - echo "Creating MinIO buckets..." 87 - $CONTAINER_CMD run --rm --network host \ 88 - -e MC_HOST_minio="http://minioadmin:minioadmin@127.0.0.1:${MINIO_PORT}" \ 89 - cgr.dev/chainguard/minio-client:latest-dev mb minio/test-bucket --ignore-existing >/dev/null 2>&1 || true 90 - $CONTAINER_CMD run --rm --network host \ 91 - -e MC_HOST_minio="http://minioadmin:minioadmin@127.0.0.1:${MINIO_PORT}" \ 92 - cgr.dev/chainguard/minio-client:latest-dev mb minio/test-backups --ignore-existing >/dev/null 2>&1 || true 54 + PG_PORT=$($CONTAINER_CMD port "${CONTAINER_PREFIX}-postgres" 5432 | head -1 | cut -d: -f2) 93 55 cat > "$INFRA_FILE" << EOF 94 56 export DATABASE_URL="postgres://postgres:postgres@127.0.0.1:${PG_PORT}/postgres" 95 57 export TEST_DB_PORT="${PG_PORT}" 96 - export S3_ENDPOINT="http://127.0.0.1:${MINIO_PORT}" 97 - export S3_BUCKET="test-bucket" 98 - export BACKUP_S3_BUCKET="test-backups" 99 - export AWS_ACCESS_KEY_ID="minioadmin" 100 - export AWS_SECRET_ACCESS_KEY="minioadmin" 101 - export AWS_REGION="us-east-1" 102 - export VALKEY_URL="redis://127.0.0.1:${VALKEY_PORT}" 103 58 export TRANQUIL_PDS_TEST_INFRA_READY="1" 104 59 export TRANQUIL_PDS_ALLOW_INSECURE_SECRETS="1" 105 60 export SKIP_IMPORT_VERIFICATION="true" ··· 113 68 } 114 69 stop_infra() { 115 70 echo "Stopping test infrastructure..." 116 - $CONTAINER_CMD rm -f "${CONTAINER_PREFIX}-postgres" "${CONTAINER_PREFIX}-minio" "${CONTAINER_PREFIX}-valkey" 2>/dev/null || true 71 + $CONTAINER_CMD rm -f "${CONTAINER_PREFIX}-postgres" 2>/dev/null || true 117 72 rm -f "$INFRA_FILE" 73 + rm -rf "${TMPDIR:-/tmp}"/tranquil-pds-test-* 2>/dev/null || true 118 74 echo "Infrastructure stopped." 119 75 } 120 76 status_infra() { ··· 124 80 echo "Config file: $INFRA_FILE" 125 81 source "$INFRA_FILE" 126 82 echo "Database URL: $DATABASE_URL" 127 - echo "S3 Endpoint: $S3_ENDPOINT" 128 83 else 129 84 echo "Config file: NOT FOUND" 130 85 fi ··· 158 113 echo "Usage: $0 {start|stop|restart|status|env}" 159 114 echo "" 160 115 echo "Commands:" 161 - echo " start - Start test infrastructure (Postgres, MinIO, Valkey)" 116 + echo " start - Start test infrastructure (Postgres)" 162 117 echo " stop - Stop and remove test containers" 163 118 echo " restart - Stop then start infrastructure" 164 119 echo " status - Show infrastructure status"