···11use crate::types::{BroadcastEvent, RepoState};
22+use fjall::config::BlockSizePolicy;
23use fjall::{Database, Keyspace, KeyspaceCreateOptions, OwnedWriteBatch, PersistMode, Slice};
34use jacquard::IntoStatic;
45use jacquard_common::types::string::Did;
···89910use std::sync::Arc;
10111212+pub mod filter;
1113pub mod keys;
1214pub mod types;
1315···3032 pub resync_buffer: Keyspace,
3133 pub events: Keyspace,
3234 pub counts: Keyspace,
3535+ pub filter: Keyspace,
3336 pub event_tx: broadcast::Sender<BroadcastEvent>,
3437 pub next_event_id: Arc<AtomicU64>,
3538 pub counts_map: HashMap<SmolStr, u64>,
···83868487impl Db {
8588 pub fn open(cfg: &crate::config::Config) -> Result<Self> {
8989+ const fn kb(v: u32) -> u32 {
9090+ v * 1024
9191+ }
9292+8693 let db = Database::builder(&cfg.database_path)
8794 .cache_size(cfg.cache_size * 2_u64.pow(20) / 2)
8895 .manual_journal_persist(true)
···105112 let repos = open_ks(
106113 "repos",
107114 opts()
115115+ // most lookups hit since repo must exist after discovery
116116+ // we don't hit here if it's not tracked anyway (that happens in filter)
108117 .expect_point_read_hits(true)
109109- .max_memtable_size(cfg.db_repos_memtable_size_mb * 1024 * 1024),
118118+ .max_memtable_size(cfg.db_repos_memtable_size_mb * 1024 * 1024)
119119+ .data_block_size_policy(BlockSizePolicy::all(kb(4))),
110120 )?;
111121 let blocks = open_ks(
112122 "blocks",
113123 opts()
114124 // point reads are used a lot by stream
115125 .expect_point_read_hits(true)
116116- .max_memtable_size(cfg.db_blocks_memtable_size_mb * 1024 * 1024),
126126+ .max_memtable_size(cfg.db_blocks_memtable_size_mb * 1024 * 1024)
127127+ // 32 - 64 kb is probably fine, as the newer blocks will be in the first levels
128128+ // and any consumers will probably be streaming the newer events...
129129+ .data_block_size_policy(BlockSizePolicy::new([kb(4), kb(8), kb(32), kb(64)])),
117130 )?;
118118- let cursors = open_ks("cursors", opts().expect_point_read_hits(true))?;
131131+ let records = open_ks(
132132+ "records",
133133+ // point reads might miss when using getRecord
134134+ // but we assume thats not going to be used much... (todo: should be a config option maybe?)
135135+ // since this keyspace is big, turning off bloom filters will help a lot
136136+ opts()
137137+ .expect_point_read_hits(true)
138138+ .max_memtable_size(cfg.db_records_memtable_size_mb * 1024 * 1024)
139139+ .data_block_size_policy(BlockSizePolicy::all(kb(8))),
140140+ )?;
141141+ let cursors = open_ks(
142142+ "cursors",
143143+ opts()
144144+ // cursor point reads hit almost 100% of the time
145145+ .expect_point_read_hits(true)
146146+ .data_block_size_policy(BlockSizePolicy::all(kb(1))),
147147+ )?;
119148 let pending = open_ks(
120149 "pending",
121121- opts().max_memtable_size(cfg.db_pending_memtable_size_mb * 1024 * 1024),
150150+ opts()
151151+ // iterated over as a queue, no point reads are used so bloom filters are disabled
152152+ .expect_point_read_hits(true)
153153+ .max_memtable_size(cfg.db_pending_memtable_size_mb * 1024 * 1024)
154154+ .data_block_size_policy(BlockSizePolicy::all(kb(4))),
122155 )?;
123123- let resync = open_ks("resync", opts())?;
124124- let resync_buffer = open_ks("resync_buffer", opts())?;
156156+ // resync point reads often miss (because most repos aren't resyncing), so keeping the bloom filter helps avoid disk hits
157157+ let resync = open_ks(
158158+ "resync",
159159+ opts().data_block_size_policy(BlockSizePolicy::all(kb(8))),
160160+ )?;
161161+ let resync_buffer = open_ks(
162162+ "resync_buffer",
163163+ opts()
164164+ // iterated during backfill, no point reads
165165+ .expect_point_read_hits(true)
166166+ .data_block_size_policy(BlockSizePolicy::all(kb(32))),
167167+ )?;
125168 let events = open_ks(
126169 "events",
127127- opts().max_memtable_size(cfg.db_events_memtable_size_mb * 1024 * 1024),
170170+ opts()
171171+ // only iterators are used here, no point reads
172172+ .expect_point_read_hits(true)
173173+ .max_memtable_size(cfg.db_events_memtable_size_mb * 1024 * 1024)
174174+ .data_block_size_policy(BlockSizePolicy::new([kb(16), kb(32)])),
128175 )?;
129129- let counts = open_ks("counts", opts().expect_point_read_hits(true))?;
176176+ let counts = open_ks(
177177+ "counts",
178178+ opts()
179179+ // count increments hit because counters are mostly pre-initialized
180180+ .expect_point_read_hits(true)
181181+ // the data is very small
182182+ .data_block_size_policy(BlockSizePolicy::all(kb(1))),
183183+ )?;
130184131131- let records = open_ks(
132132- "records",
133133- opts().max_memtable_size(cfg.db_records_memtable_size_mb * 1024 * 1024),
185185+ // filter handles high-volume point reads (checking explicit DID includes and excludes from firehose)
186186+ // so it needs the bloom filter
187187+ let filter = open_ks(
188188+ "filter",
189189+ // this can be pretty small since the DIDs wont be compressed that well anyhow
190190+ opts().data_block_size_policy(BlockSizePolicy::all(kb(1))),
134191 )?;
135192136193 let mut last_id = 0;
···170227 resync_buffer,
171228 events,
172229 counts,
230230+ filter,
173231 event_tx,
174232 counts_map,
175233 next_event_id: Arc::new(AtomicU64::new(last_id + 1)),