tracks lexicons and how many times they appeared on the jetstream

fix(server): implement max items on get_hits so we dont read every block if requested

ptr.pet f4563532 dc9df83a

verified
+14 -6
+1 -1
server/src/api.rs
··· 177 177 let from = params.to.map(Bound::Included).unwrap_or(Bound::Unbounded); 178 178 let to = params.from.map(Bound::Included).unwrap_or(Bound::Unbounded); 179 179 let maybe_hits = db 180 - .get_hits(&params.nsid, HitsRange { from, to }) 180 + .get_hits(&params.nsid, HitsRange { from, to }, MAX_HITS) 181 181 .take(MAX_HITS); 182 182 let mut hits = Vec::with_capacity(maybe_hits.size_hint().0); 183 183
+7 -3
server/src/db/mod.rs
··· 383 383 &self, 384 384 nsid: &str, 385 385 range: impl RangeBounds<u64> + std::fmt::Debug, 386 + max_items: usize, 386 387 ) -> impl Iterator<Item = AppResult<handle::Item>> { 387 388 let start_limit = match range.start_bound().cloned() { 388 389 Bound::Included(start) => start, ··· 401 402 }; 402 403 403 404 // let mut ts = CLOCK.now(); 405 + let mut current_item_count = 0; 404 406 let map_block = move |(key, val)| { 405 407 let mut key_reader = Cursor::new(key); 406 408 let start_timestamp = key_reader.read_varint::<u64>()?; 407 409 // let end_timestamp = key_reader.read_varint::<u64>()?; 408 410 if start_timestamp < start_limit { 409 411 // tracing::info!( 410 - // "skipped block with timestamps {start_timestamp}..{end_timestamp} because {start_limit} is greater" 412 + // "stopped at block with timestamps {start_timestamp}..{end_timestamp} because {start_limit} is greater" 411 413 // ); 412 414 return Ok(None); 413 - } else { 414 - // tracing::info!("using block with timestamp {start_timestamp}..{end_timestamp}"); 415 415 } 416 416 let decoder = handle::ItemDecoder::new(Cursor::new(val), start_timestamp)?; 417 + current_item_count += decoder.item_count(); 418 + if current_item_count > max_items { 419 + return Ok(None); 420 + } 417 421 // tracing::info!( 418 422 // "took {}ns to get block with size {}", 419 423 // ts.elapsed().as_nanos(),
+6 -2
server/src/main.rs
··· 1 - use std::{ops::Deref, time::Duration, u64}; 1 + use std::{ops::Deref, time::Duration, u64, usize}; 2 2 3 3 use itertools::Itertools; 4 4 use rclite::Arc; ··· 300 300 threads.push(std::thread::spawn(move || { 301 301 tracing::info!("{}: migrating...", nsid.deref()); 302 302 let mut count = 0_u64; 303 - for hits in from.get_hits(&nsid, ..).chunks(100000).into_iter() { 303 + for hits in from 304 + .get_hits(&nsid, .., usize::MAX) 305 + .chunks(100000) 306 + .into_iter() 307 + { 304 308 to.ingest_events(hits.map(|hit| { 305 309 count += 1; 306 310 let hit = hit.expect("cant decode hit");