tracks lexicons and how many times they appeared on the jetstream

refactor(server): use rayon to compute the blocks, and then exec inserting them in thread pool

ptr.pet 52fd5331 a820ae13

verified
+173 -83
+31
server/Cargo.lock
··· 359 359 ] 360 360 361 361 [[package]] 362 + name = "crossbeam-deque" 363 + version = "0.8.6" 364 + source = "registry+https://github.com/rust-lang/crates.io-index" 365 + checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 366 + dependencies = [ 367 + "crossbeam-epoch", 368 + "crossbeam-utils", 369 + ] 370 + 371 + [[package]] 362 372 name = "crossbeam-epoch" 363 373 version = "0.9.18" 364 374 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1147 1157 ] 1148 1158 1149 1159 [[package]] 1160 + name = "rayon" 1161 + version = "1.10.0" 1162 + source = "registry+https://github.com/rust-lang/crates.io-index" 1163 + checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 1164 + dependencies = [ 1165 + "either", 1166 + "rayon-core", 1167 + ] 1168 + 1169 + [[package]] 1170 + name = "rayon-core" 1171 + version = "1.12.1" 1172 + source = "registry+https://github.com/rust-lang/crates.io-index" 1173 + checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 1174 + dependencies = [ 1175 + "crossbeam-deque", 1176 + "crossbeam-utils", 1177 + ] 1178 + 1179 + [[package]] 1150 1180 name = "redox_syscall" 1151 1181 version = "0.5.15" 1152 1182 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1499 1529 "itertools", 1500 1530 "ordered-varint", 1501 1531 "quanta", 1532 + "rayon", 1502 1533 "rkyv", 1503 1534 "rustls", 1504 1535 "scc",
+1
server/Cargo.toml
··· 28 28 quanta = "0.12.6" 29 29 itertools = "0.14.0" 30 30 byteview = "0.6.1" 31 + rayon = "1.10.0"
+1 -1
server/src/api.rs
··· 146 146 to: Option<u64>, 147 147 } 148 148 149 - #[derive(Serialize)] 149 + #[derive(Debug, Serialize)] 150 150 struct Hit { 151 151 timestamp: u64, 152 152 deleted: bool,
-2
server/src/db/block.rs
··· 8 8 marker::PhantomData, 9 9 }; 10 10 11 - use crate::error::AppResult; 12 - 13 11 pub struct Item<T> { 14 12 pub timestamp: u64, 15 13 data: AlignedVec,
+76 -48
server/src/db/mod.rs
··· 1 1 use std::{ 2 - io::{self, Cursor, Write}, 3 - marker::PhantomData, 2 + io::{Cursor, Write}, 4 3 ops::{Bound, Deref, RangeBounds}, 5 4 path::Path, 6 5 sync::{ 7 6 Arc, 8 - atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering as AtomicOrdering}, 7 + atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering}, 9 8 }, 10 9 time::Duration, 11 10 }; ··· 14 13 use fjall::{Config, Keyspace, Partition, PartitionCreateOptions, Slice}; 15 14 use itertools::{Either, Itertools}; 16 15 use ordered_varint::Variable; 16 + use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; 17 17 use rkyv::{Archive, Deserialize, Serialize, rancor::Error}; 18 18 use smol_str::SmolStr; 19 19 use tokio::sync::broadcast; 20 - use tokio_util::bytes::{self, BufMut}; 20 + use tokio_util::sync::CancellationToken; 21 21 22 22 use crate::{ 23 23 db::block::{ReadVariableExt, WriteVariableExt}, ··· 127 127 } 128 128 } 129 129 130 + struct Block { 131 + written: usize, 132 + key: ByteView, 133 + data: ByteView, 134 + } 135 + 130 136 pub struct LexiconHandle { 131 137 tree: Partition, 138 + nsid: SmolStr, 132 139 buf: Arc<scc::Queue<EventRecord>>, 133 140 // this is stored here since scc::Queue does not have O(1) length 134 - buf_len: AtomicUsize, // relaxed 141 + buf_len: AtomicUsize, // seqcst 135 142 last_insert: AtomicU64, // relaxed 136 143 eps: DefaultRateTracker, 137 144 } ··· 141 148 let opts = PartitionCreateOptions::default().compression(fjall::CompressionType::Miniz(9)); 142 149 Self { 143 150 tree: keyspace.open_partition(nsid, opts).unwrap(), 151 + nsid: nsid.into(), 144 152 buf: Default::default(), 145 153 buf_len: AtomicUsize::new(0), 146 154 last_insert: AtomicU64::new(0), ··· 149 157 } 150 158 151 159 fn item_count(&self) -> usize { 152 - self.buf_len.load(AtomicOrdering::Relaxed) 160 + self.buf_len.load(AtomicOrdering::SeqCst) 153 161 } 154 162 155 163 fn since_last_activity(&self) -> u64 { ··· 162 170 163 171 fn insert(&self, event: EventRecord) { 164 172 self.buf.push(event); 165 - self.buf_len.fetch_add(1, AtomicOrdering::Relaxed); 173 + self.buf_len.fetch_add(1, AtomicOrdering::SeqCst); 166 174 self.last_insert.store(CLOCK.raw(), AtomicOrdering::Relaxed); 167 175 self.eps.observe(); 168 176 } 169 177 170 - fn sync(&self, max_block_size: usize) -> AppResult<usize> { 178 + fn encode_block(&self, max_block_size: usize) -> AppResult<Option<Block>> { 171 179 let buf_size = 172 180 size_of::<u64>() + self.item_count().min(max_block_size) * size_of::<(u64, NsidHit)>(); 173 181 let mut writer = ItemEncoder::new(WritableByteView::with_size(buf_size)); ··· 192 200 written += 1; 193 201 } 194 202 if let (Some(start_timestamp), Some(end_timestamp)) = (start_timestamp, end_timestamp) { 195 - self.buf_len.store(0, AtomicOrdering::Relaxed); 203 + self.buf_len.store(0, AtomicOrdering::SeqCst); 196 204 let value = writer.finish()?; 197 - let mut key = Vec::with_capacity(size_of::<u64>() * 2); 205 + let mut key = WritableByteView::with_size(size_of::<u64>() * 2); 198 206 key.write_varint(start_timestamp)?; 199 207 key.write_varint(end_timestamp)?; 200 - self.tree.insert(key, value.into_inner())?; 208 + return Ok(Some(Block { 209 + written, 210 + key: key.into_inner(), 211 + data: value.into_inner(), 212 + })); 201 213 } 202 - Ok(written) 214 + Ok(None) 203 215 } 204 216 } 205 217 ··· 209 221 inner: Keyspace, 210 222 counts: Partition, 211 223 hits: scc::HashIndex<SmolStr, Arc<LexiconHandle>>, 212 - syncpool: threadpool::ThreadPool, 224 + sync_pool: threadpool::ThreadPool, 213 225 event_broadcaster: broadcast::Sender<(SmolStr, NsidCounts)>, 214 226 eps: RateTracker<100>, 215 - shutting_down: AtomicBool, 227 + cancel_token: CancellationToken, 216 228 min_block_size: usize, 217 229 max_block_size: usize, 218 230 max_last_activity: u64, 219 231 } 220 232 221 233 impl Db { 222 - pub fn new(path: impl AsRef<Path>) -> AppResult<Self> { 234 + pub fn new(path: impl AsRef<Path>, cancel_token: CancellationToken) -> AppResult<Self> { 223 235 tracing::info!("opening db..."); 224 236 let ks = Config::new(path) 225 237 .cache_size(8 * 1024 * 1024) // from talna 226 238 .open()?; 227 239 Ok(Self { 228 240 hits: Default::default(), 229 - syncpool: threadpool::Builder::new().num_threads(256).build(), 241 + sync_pool: threadpool::Builder::new() 242 + .num_threads(rayon::current_num_threads() * 2) 243 + .build(), 230 244 counts: ks.open_partition( 231 245 "_counts", 232 246 PartitionCreateOptions::default().compression(fjall::CompressionType::None), ··· 234 248 inner: ks, 235 249 event_broadcaster: broadcast::channel(1000).0, 236 250 eps: RateTracker::new(Duration::from_secs(1)), 237 - shutting_down: AtomicBool::new(false), 251 + cancel_token, 238 252 min_block_size: 512, 239 253 max_block_size: 500_000, 240 254 max_last_activity: Duration::from_secs(10).as_nanos() as u64, 241 255 }) 242 256 } 243 257 244 - pub fn shutdown(&self) -> AppResult<()> { 245 - self.shutting_down.store(true, AtomicOrdering::Release); 246 - self.sync(true) 258 + pub fn shutting_down(&self) -> impl Future<Output = ()> { 259 + self.cancel_token.cancelled() 247 260 } 248 261 249 262 pub fn is_shutting_down(&self) -> bool { 250 - self.shutting_down.load(AtomicOrdering::Acquire) 263 + self.cancel_token.is_cancelled() 251 264 } 252 265 253 266 pub fn sync(&self, all: bool) -> AppResult<()> { 254 - let mut execs = Vec::with_capacity(self.hits.len()); 267 + // prepare all the data 268 + let mut data = Vec::with_capacity(self.hits.len()); 255 269 let _guard = scc::ebr::Guard::new(); 256 - for (nsid, tree) in self.hits.iter(&_guard) { 257 - let count = tree.item_count(); 258 - let is_max_block_size = count > self.min_block_size.max(tree.suggested_block_size()); 259 - let is_too_old = tree.since_last_activity() > self.max_last_activity; 260 - if count > 0 && (all || is_max_block_size || is_too_old) { 261 - let nsid = nsid.clone(); 262 - let tree = tree.clone(); 263 - let max_block_size = self.max_block_size; 264 - execs.push(move || { 265 - loop { 266 - let synced = match tree.sync(max_block_size) { 267 - Ok(synced) => synced, 268 - Err(err) => { 269 - tracing::error!("failed to sync {nsid}: {err}"); 270 - break; 271 - } 272 - }; 273 - if synced == 0 { 274 - break; 275 - } 276 - tracing::info!("synced {synced} of {nsid} to db"); 277 - } 278 - }); 270 + for (_, handle) in self.hits.iter(&_guard) { 271 + let block_size = self 272 + .max_block_size 273 + .min(self.min_block_size.max(handle.suggested_block_size())); 274 + let count = handle.item_count(); 275 + let data_count = count / block_size; 276 + let is_too_old = handle.since_last_activity() > self.max_last_activity; 277 + if count > 0 && (all || data_count > 0 || is_too_old) { 278 + for i in 0..data_count { 279 + data.push((i, handle.clone(), block_size)); 280 + } 281 + // only sync remainder if we haven't met block size 282 + let remainder = count % block_size; 283 + if data_count == 0 && remainder > 0 { 284 + data.push((data_count, handle.clone(), remainder)); 285 + } 279 286 } 280 287 } 281 288 drop(_guard); 282 289 283 - for exec in execs { 284 - self.syncpool.execute(exec); 290 + // process the blocks 291 + let mut blocks = Vec::with_capacity(data.len()); 292 + data.into_par_iter() 293 + .map(|(i, handle, max_block_size)| { 294 + handle 295 + .encode_block(max_block_size) 296 + .transpose() 297 + .map(|r| r.map(|block| (i, block, handle.clone()))) 298 + }) 299 + .collect_into_vec(&mut blocks); 300 + 301 + // execute into db 302 + for item in blocks.into_iter() { 303 + let Some((i, block, handle)) = item.transpose()? else { 304 + continue; 305 + }; 306 + self.sync_pool 307 + .execute(move || match handle.tree.insert(block.key, block.data) { 308 + Ok(_) => { 309 + tracing::info!("[{i}] synced {} of {} to db", block.written, handle.nsid) 310 + } 311 + Err(err) => tracing::error!("failed to sync block: {}", err), 312 + }); 285 313 } 286 - self.syncpool.join(); 314 + self.sync_pool.join(); 287 315 288 316 Ok(()) 289 317 }
+51 -20
server/src/main.rs
··· 49 49 None => {} 50 50 } 51 51 52 - let db = Arc::new(Db::new(".fjall_data").expect("couldnt create db")); 52 + let cancel_token = CancellationToken::new(); 53 + 54 + let db = 55 + Arc::new(Db::new(".fjall_data", cancel_token.child_token()).expect("couldnt create db")); 53 56 54 57 rustls::crypto::ring::default_provider() 55 58 .install_default() ··· 64 67 } 65 68 }; 66 69 67 - let cancel_token = CancellationToken::new(); 68 - 70 + let (event_tx, mut event_rx) = tokio::sync::mpsc::channel(1000); 69 71 let consume_events = tokio::spawn({ 70 72 let consume_cancel = cancel_token.child_token(); 71 - let db = db.clone(); 72 73 async move { 73 74 jetstream.connect().await?; 74 75 loop { ··· 78 79 let Some(record) = EventRecord::from_jetstream(event) else { 79 80 continue; 80 81 }; 81 - let db = db.clone(); 82 - tokio::task::spawn_blocking(move || { 83 - if let Err(err) = db.record_event(record) { 84 - tracing::error!("failed to record event: {}", err); 85 - } 86 - }); 82 + event_tx.send(record).await?; 87 83 } 88 84 Err(err) => return Err(err), 89 85 }, ··· 93 89 } 94 90 }); 95 91 96 - std::thread::spawn({ 92 + let ingest_events = std::thread::spawn({ 97 93 let db = db.clone(); 98 94 move || { 95 + let mut buffer = Vec::new(); 99 96 loop { 100 - if db.is_shutting_down() { 97 + let read = event_rx.blocking_recv_many(&mut buffer, 100); 98 + if let Err(err) = db.ingest_events(buffer.drain(..)) { 99 + tracing::error!("failed to ingest events: {}", err); 100 + } 101 + if read == 0 || db.is_shutting_down() { 101 102 break; 102 103 } 103 - match db.sync(false) { 104 - Ok(_) => (), 105 - Err(e) => tracing::error!("failed to sync db: {}", e), 104 + } 105 + } 106 + }); 107 + 108 + let sync_task = tokio::task::spawn({ 109 + let db = db.clone(); 110 + async move { 111 + loop { 112 + let sync_db = tokio::task::spawn_blocking({ 113 + let db = db.clone(); 114 + move || { 115 + if db.is_shutting_down() { 116 + return; 117 + } 118 + match db.sync(false) { 119 + Ok(_) => (), 120 + Err(e) => tracing::error!("failed to sync db: {}", e), 121 + } 122 + } 123 + }); 124 + tokio::select! { 125 + _ = sync_db => {} 126 + _ = db.shutting_down() => break, 127 + } 128 + tokio::select! { 129 + _ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {} 130 + _ = db.shutting_down() => break, 106 131 } 107 - std::thread::sleep(std::time::Duration::from_secs(10)); 108 132 } 109 133 } 110 134 }); ··· 130 154 } 131 155 132 156 tracing::info!("shutting down..."); 133 - db.shutdown().expect("couldnt shutdown db"); 157 + cancel_token.cancel(); 158 + ingest_events.join().expect("failed to join ingest events"); 159 + sync_task.await.expect("cant join sync task"); 160 + db.sync(true).expect("cant sync db"); 134 161 } 135 162 136 163 fn debug() { 137 - let db = Db::new(".fjall_data").expect("couldnt create db"); 164 + let db = Db::new(".fjall_data", CancellationToken::new()).expect("couldnt create db"); 138 165 for nsid in db.get_nsids() { 139 166 let nsid = nsid.deref(); 140 167 for hit in db.get_hits(nsid, ..) { ··· 145 172 } 146 173 147 174 fn compact() { 148 - let from = Arc::new(Db::new(".fjall_data_from").expect("couldnt create db")); 149 - let to = Arc::new(Db::new(".fjall_data_to").expect("couldnt create db")); 175 + let cancel_token = CancellationToken::new(); 176 + let from = Arc::new( 177 + Db::new(".fjall_data_from", cancel_token.child_token()).expect("couldnt create db"), 178 + ); 179 + let to = 180 + Arc::new(Db::new(".fjall_data_to", cancel_token.child_token()).expect("couldnt create db")); 150 181 151 182 let nsids = from.get_nsids().collect::<Vec<_>>(); 152 183 let mut threads = Vec::with_capacity(nsids.len());
+13 -12
server/src/utils.rs
··· 9 9 #[derive(Debug)] 10 10 pub struct RateTracker<const BUCKET_WINDOW: u64> { 11 11 buckets: Vec<AtomicU64>, 12 + last_bucket_time: AtomicU64, 12 13 bucket_duration_nanos: u64, 13 14 window_duration: Duration, 14 - last_bucket_time: AtomicU64, 15 15 start_time: u64, // raw time when tracker was created 16 16 } 17 17 ··· 39 39 } 40 40 } 41 41 42 + #[inline(always)] 43 + fn elapsed(&self) -> u64 { 44 + CLOCK.delta_as_nanos(self.start_time, CLOCK.raw()) 45 + } 46 + 42 47 /// record an event 43 48 pub fn observe(&self) { 44 - let now = CLOCK.raw(); 45 - self.maybe_advance_buckets(now); 49 + self.maybe_advance_buckets(); 46 50 47 - let bucket_index = self.get_current_bucket_index(now); 51 + let bucket_index = self.get_current_bucket_index(); 48 52 self.buckets[bucket_index].fetch_add(1, Ordering::Relaxed); 49 53 } 50 54 51 55 /// get the current rate in events per second 52 56 pub fn rate(&self) -> f64 { 53 - let now = CLOCK.raw(); 54 - self.maybe_advance_buckets(now); 57 + self.maybe_advance_buckets(); 55 58 56 59 let total_events: u64 = self 57 60 .buckets ··· 62 65 total_events as f64 / self.window_duration.as_secs_f64() 63 66 } 64 67 65 - fn get_current_bucket_index(&self, now: u64) -> usize { 66 - let elapsed_nanos = CLOCK.delta_as_nanos(self.start_time, now); 67 - let bucket_number = elapsed_nanos / self.bucket_duration_nanos; 68 + fn get_current_bucket_index(&self) -> usize { 69 + let bucket_number = self.elapsed() / self.bucket_duration_nanos; 68 70 (bucket_number as usize) % self.buckets.len() 69 71 } 70 72 71 - fn maybe_advance_buckets(&self, now: u64) { 72 - let elapsed_nanos = CLOCK.delta_as_nanos(self.start_time, now); 73 + fn maybe_advance_buckets(&self) { 73 74 let current_bucket_time = 74 - (elapsed_nanos / self.bucket_duration_nanos) * self.bucket_duration_nanos; 75 + (self.elapsed() / self.bucket_duration_nanos) * self.bucket_duration_nanos; 75 76 let last_bucket_time = self.last_bucket_time.load(Ordering::Relaxed); 76 77 77 78 if current_bucket_time > last_bucket_time {