this repo has no description

WIP: bsky post UF data structure

+532 -65
+532 -65
indexer/src/storage/mod.rs
··· 1 - use std::collections::HashMap; 1 + use std::{collections::HashMap, hash::Hash}; 2 + 3 + // pub mod memory; 4 + 5 + 6 + #[derive(Clone, Debug, Eq, Hash, PartialEq)] 7 + struct BskyPostId { 8 + did: String, 9 + rkey: String, 10 + } 11 + 12 + impl BskyPostId { 13 + fn from(did: &str, rkey: &str) -> BskyPostId { 14 + BskyPostId { did: String::from(did), rkey: String::from(rkey) } 15 + } 16 + } 2 17 3 - pub mod memory; 18 + #[derive(Clone, Copy, Debug, Eq, PartialEq)] 19 + enum SubgraphType { 20 + Reply, 21 + Quote, 22 + ReplyQuote, 23 + } 24 + 25 + 26 + #[derive(Clone, Eq, Hash, PartialEq)] 27 + struct BskyPostReplyTo<Id> { 28 + target: Id, 29 + root: Id, 30 + } 31 + 32 + #[derive(Clone, Eq, Hash, PartialEq)] 33 + struct BskyPostRecord<Id> { 34 + id: Id, 35 + reply_to: Option<BskyPostReplyTo<Id>>, 36 + quote_of: Option<Id>, 37 + } 4 38 5 39 type UFIndex = u32; 6 40 type UFSize = u32; 7 41 8 - struct UnionFind { 9 - parents: Vec<UFIndex>, 10 - sizes: HashMap<UFIndex, UFSize>, 42 + #[derive(Debug)] 43 + struct BskyPostUnionFind<Id> { 44 + id_to_index: HashMap<Id, UFIndex>, 45 + parents_ro: HashMap<UFIndex, UFIndex>, 46 + parents_qo: HashMap<UFIndex, UFIndex>, 47 + parents_rq: HashMap<UFIndex, UFIndex>, 48 + sizes_ro: HashMap<UFIndex, UFSize>, 49 + sizes_qo: HashMap<UFIndex, UFSize>, 50 + sizes_rq: HashMap<UFIndex, UFSize>, 11 51 next_index: UFIndex, 12 52 } 13 53 14 - impl UnionFind { 15 - pub fn new() -> UnionFind { 16 - UnionFind { 17 - parents: Vec::new(), 18 - sizes: HashMap::new(), 54 + impl<Id: Clone + Eq + Hash> BskyPostUnionFind<Id> { 55 + pub fn new() -> BskyPostUnionFind<Id> { 56 + BskyPostUnionFind { 57 + id_to_index: HashMap::new(), 58 + parents_ro: HashMap::new(), 59 + parents_qo: HashMap::new(), 60 + parents_rq: HashMap::new(), 61 + sizes_ro: HashMap::new(), 62 + sizes_qo: HashMap::new(), 63 + sizes_rq: HashMap::new(), 19 64 next_index: 0, 20 65 } 21 66 } 22 67 23 - pub fn add(&mut self) -> UFIndex { 24 - let idx = self.next_index; 25 - self.parents.push(idx); 26 - self.sizes.insert(idx, 1); 27 - self.next_index += 1; 28 - idx 68 + fn get_index(&mut self, id: &Id) -> UFIndex { 69 + match self.id_to_index.get(id) { 70 + None => { 71 + let idx = self.next_index; 72 + self.id_to_index.insert(id.clone(), idx); 73 + self.next_index += 1; 74 + idx 75 + }, 76 + Some(idx) => *idx, 77 + } 78 + } 79 + 80 + pub fn ingest_post(&mut self, record: BskyPostRecord<Id>) -> bool { 81 + if record.reply_to.is_none() && record.quote_of.is_none() { 82 + return false; 83 + } 84 + let post_idx = self.get_index(&record.id); 85 + 86 + if let Some(ref reply_to) = record.reply_to { 87 + let root_idx = self.get_index(&reply_to.root); 88 + let parent_idx = self.get_index(&reply_to.target); 89 + 90 + // assuming that root has a parents_ro entry iff it has a sizes_ro entry. 91 + // this is *only true* iff the root of the reply tree is always the UF root. 92 + // but by construction it is, since we never have to call union() or find() 93 + // for a reply tree. 94 + 95 + let root_ro_size = match self.parents_ro.insert(root_idx, root_idx) { 96 + None => 1, 97 + Some(_) => *self.sizes_ro.get(&root_idx).unwrap(), 98 + }; 99 + let mut new_root_ro_size = root_ro_size; 100 + match self.parents_ro.insert(parent_idx, root_idx) { 101 + None => { 102 + new_root_ro_size += 1; 103 + }, 104 + Some(_) => (), 105 + } 106 + match self.parents_ro.insert(post_idx, root_idx) { 107 + None => { 108 + new_root_ro_size += 1; 109 + }, 110 + Some(_) => (), 111 + } 112 + 113 + if new_root_ro_size > root_ro_size { 114 + self.sizes_ro.insert(root_idx, new_root_ro_size); 115 + } 116 + 117 + let mut rq_to_add = Vec::new(); 118 + if !self.parents_rq.contains_key(&post_idx) { 119 + rq_to_add.push(post_idx); 120 + } 121 + if !self.parents_rq.contains_key(&root_idx) { 122 + rq_to_add.push(root_idx); 123 + } 124 + if parent_idx != root_idx && !self.parents_rq.contains_key(&parent_idx) { 125 + rq_to_add.push(parent_idx); 126 + } 127 + 128 + for idx in rq_to_add { 129 + self.parents_rq.insert(idx, idx); 130 + self.sizes_rq.insert(idx, 1); 131 + } 132 + if parent_idx != root_idx { 133 + self.union_rq(root_idx, parent_idx); 134 + } 135 + self.union_rq(parent_idx, post_idx); 136 + } 137 + 138 + if let Some(ref quote_of) = record.quote_of { 139 + let parent_idx = self.get_index(&quote_of); 140 + // assuming: if it's not in parents_qo, it's also not in sizes_qo 141 + 142 + match (self.parents_qo.get(&parent_idx), self.parents_qo.get(&post_idx)) { 143 + (None, None) => { 144 + self.parents_qo.insert(parent_idx, parent_idx); 145 + self.parents_qo.insert(post_idx, parent_idx); 146 + self.sizes_qo.insert(parent_idx, 2); 147 + }, 148 + (None, Some(post_parent)) => { 149 + let post_parent_idx = *post_parent; 150 + self.parents_qo.insert(parent_idx, parent_idx); 151 + self.sizes_qo.insert(parent_idx, 1); 152 + self.union_qo(parent_idx, post_parent_idx); 153 + }, 154 + (Some(parent_parent), None) => { 155 + let parent_root_idx = self.find_qo(*parent_parent); 156 + self.parents_qo.insert(post_idx, parent_root_idx); 157 + self.sizes_qo.entry(parent_root_idx).and_modify(|e| *e += 1); 158 + }, 159 + (Some(parent_parent), Some(post_parent)) => { 160 + self.union_qo(*parent_parent, *post_parent); 161 + }, 162 + } 163 + 164 + let mut rq_to_add = Vec::new(); 165 + if !self.parents_rq.contains_key(&post_idx) { 166 + rq_to_add.push(post_idx); 167 + } 168 + if !self.parents_rq.contains_key(&parent_idx) { 169 + rq_to_add.push(parent_idx); 170 + } 171 + for idx in rq_to_add { 172 + self.parents_rq.insert(idx, idx); 173 + self.sizes_rq.insert(idx, 1); 174 + } 175 + self.union_rq(parent_idx, post_idx); 176 + 177 + } 178 + 179 + true 29 180 } 30 181 31 182 // find the representative node for a given index 32 - pub fn find(&mut self, idx: UFIndex) -> UFIndex { 183 + fn find_qo(&mut self, idx: UFIndex) -> UFIndex { 184 + let mut curr_idx = idx; 185 + let mut pa_idx = *self.parents_qo.get(&curr_idx).unwrap(); 186 + while curr_idx != pa_idx { 187 + curr_idx = pa_idx; 188 + pa_idx = *self.parents_qo.get(&curr_idx).unwrap(); 189 + } 190 + let rep = curr_idx; 191 + 192 + curr_idx = idx; 193 + pa_idx = *self.parents_qo.get(&curr_idx).unwrap(); 194 + while curr_idx != pa_idx { 195 + self.parents_qo.insert(curr_idx, rep); 196 + curr_idx = pa_idx; 197 + pa_idx = *self.parents_qo.get(&curr_idx).unwrap(); 198 + } 199 + curr_idx 200 + } 201 + 202 + fn union_qo(&mut self, idx1: UFIndex, idx2: UFIndex) { 203 + let pa1 = self.find_qo(idx1); 204 + let pa2 = self.find_qo(idx2); 205 + 206 + if pa1 == pa2 { 207 + return; 208 + } 209 + 210 + let pa1_size = self.sizes_qo.get(&pa1).unwrap(); 211 + let pa2_size = self.sizes_qo.get(&pa2).unwrap(); 212 + 213 + if pa1_size >= pa2_size { 214 + self.parents_qo.insert(pa2, pa1); 215 + self.sizes_qo.insert(pa1, pa1_size + pa2_size); 216 + self.sizes_qo.remove(&pa2); 217 + } else { 218 + self.parents_qo.insert(pa1, pa2); 219 + self.sizes_qo.insert(pa2, pa1_size + pa2_size); 220 + self.sizes_qo.remove(&pa1); 221 + } 222 + } 223 + 224 + fn find_rq(&mut self, idx: UFIndex) -> UFIndex { 33 225 let mut curr_idx = idx; 34 - while curr_idx != self.parents[curr_idx as usize] { 35 - curr_idx = self.parents[curr_idx as usize]; 226 + let mut pa_idx = *self.parents_rq.get(&curr_idx).unwrap(); 227 + while curr_idx != pa_idx { 228 + curr_idx = pa_idx; 229 + pa_idx = *self.parents_rq.get(&curr_idx).unwrap(); 36 230 } 37 231 let rep = curr_idx; 38 232 39 233 curr_idx = idx; 40 - while curr_idx != self.parents[curr_idx as usize] { 41 - let curr_pa = self.parents[curr_idx as usize]; 42 - self.parents[curr_idx as usize] = rep; 43 - curr_idx = curr_pa; 234 + pa_idx = *self.parents_rq.get(&curr_idx).unwrap(); 235 + while curr_idx != pa_idx { 236 + self.parents_rq.insert(curr_idx, rep); 237 + curr_idx = pa_idx; 238 + pa_idx = *self.parents_rq.get(&curr_idx).unwrap(); 44 239 } 45 - rep 240 + curr_idx 46 241 } 47 242 48 - pub fn union(&mut self, idx1: UFIndex, idx2: UFIndex) { 49 - let pa1 = self.find(idx1); 50 - let pa2 = self.find(idx2); 243 + fn union_rq(&mut self, idx1: UFIndex, idx2: UFIndex) { 244 + let pa1 = self.find_rq(idx1); 245 + let pa2 = self.find_rq(idx2); 51 246 52 - let pa1_size = self.sizes.get(&pa1).unwrap(); 53 - let pa2_size = self.sizes.get(&pa2).unwrap(); 247 + if pa1 == pa2 { 248 + return; 249 + } 54 250 55 - if pa1_size > pa2_size { 56 - self.parents[pa2 as usize] = pa1; 57 - self.sizes.insert(pa1, pa1_size + pa2_size); 58 - self.sizes.remove(&pa2); 251 + let pa1_size = self.sizes_rq.get(&pa1).unwrap(); 252 + let pa2_size = self.sizes_rq.get(&pa2).unwrap(); 253 + 254 + if pa1_size >= pa2_size { 255 + self.parents_rq.insert(pa2, pa1); 256 + self.sizes_rq.insert(pa1, pa1_size + pa2_size); 257 + self.sizes_rq.remove(&pa2); 59 258 } else { 60 - self.parents[pa1 as usize] = pa2; 61 - self.sizes.insert(pa2, pa1_size + pa2_size); 62 - self.sizes.remove(&pa1); 259 + self.parents_rq.insert(pa1, pa2); 260 + self.sizes_rq.insert(pa2, pa1_size + pa2_size); 261 + self.sizes_rq.remove(&pa1); 63 262 } 64 263 } 65 264 66 - pub fn component_size(&mut self, idx: UFIndex) -> UFSize { 67 - let pa = self.find(idx); 68 - *self.sizes.get(&pa).unwrap() 265 + pub fn component_size(&mut self, subgraph_type: SubgraphType, id: &Id) -> Option<UFSize> { 266 + let idx = self.id_to_index.get(id).map(|idx| *idx)?; 267 + match subgraph_type { 268 + SubgraphType::Reply => { 269 + // special structure of replies means all non-root nodes are direct children of root 270 + let pa = *self.parents_ro.get(&idx).unwrap(); 271 + self.sizes_ro.get(&pa).map(|idx| *idx) 272 + }, 273 + SubgraphType::Quote => { 274 + let pa = self.find_qo(idx); 275 + self.sizes_qo.get(&pa).map(|idx| *idx) 276 + }, 277 + SubgraphType::ReplyQuote => { 278 + let pa = self.find_rq(idx); 279 + self.sizes_rq.get(&pa).map(|idx| *idx) 280 + }, 281 + } 69 282 } 70 283 } 71 284 72 285 #[cfg(test)] 73 286 mod tests { 74 - use super::UnionFind; 287 + use crate::storage::SubgraphType; 288 + 289 + use super::{BskyPostId, BskyPostRecord, BskyPostUnionFind}; 290 + 291 + #[test] 292 + fn test_simple_reply_thread_1() { 293 + let mut uf = BskyPostUnionFind::new(); 294 + 295 + let id1 = BskyPostId::from("a", "1"); 296 + let id2 = BskyPostId::from("b", "1"); 297 + let id3 = BskyPostId::from("a", "2"); 298 + let id4 = BskyPostId::from("a", "3"); 299 + uf.ingest_post(BskyPostRecord { 300 + id: id2.clone(), 301 + reply_to: Some(super::BskyPostReplyTo { target: id1.clone(), root: id1.clone() }), 302 + quote_of: None 303 + }); 304 + uf.ingest_post(BskyPostRecord { 305 + id: id3.clone(), 306 + reply_to: Some(super::BskyPostReplyTo { target: id2.clone(), root: id1.clone() }), 307 + quote_of: None 308 + }); 309 + uf.ingest_post(BskyPostRecord { 310 + id: id4.clone(), 311 + reply_to: Some(super::BskyPostReplyTo { target: id3.clone(), root: id1.clone() }), 312 + quote_of: None 313 + }); 314 + 315 + assert_eq!(uf.component_size(SubgraphType::Reply, &id1), Some(4)); 316 + assert_eq!(uf.component_size(SubgraphType::Reply, &id2), Some(4)); 317 + assert_eq!(uf.component_size(SubgraphType::Reply, &id3), Some(4)); 318 + assert_eq!(uf.component_size(SubgraphType::Reply, &id4), Some(4)); 319 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 320 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 321 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 322 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 323 + 324 + } 325 + 326 + #[test] 327 + fn test_simple_reply_thread_2() { 328 + let mut uf = BskyPostUnionFind::new(); 329 + 330 + let id1 = BskyPostId::from("a", "1"); 331 + let id2 = BskyPostId::from("b", "1"); 332 + let id3 = BskyPostId::from("a", "2"); 333 + let id4 = BskyPostId::from("a", "3"); 334 + uf.ingest_post(BskyPostRecord { 335 + id: id4.clone(), 336 + reply_to: Some(super::BskyPostReplyTo { target: id3.clone(), root: id1.clone() }), 337 + quote_of: None 338 + }); 339 + uf.ingest_post(BskyPostRecord { 340 + id: id3.clone(), 341 + reply_to: Some(super::BskyPostReplyTo { target: id2.clone(), root: id1.clone() }), 342 + quote_of: None 343 + }); 344 + uf.ingest_post(BskyPostRecord { 345 + id: id2.clone(), 346 + reply_to: Some(super::BskyPostReplyTo { target: id1.clone(), root: id1.clone() }), 347 + quote_of: None 348 + }); 349 + 350 + assert_eq!(uf.component_size(SubgraphType::Reply, &id1), Some(4)); 351 + assert_eq!(uf.component_size(SubgraphType::Reply, &id2), Some(4)); 352 + assert_eq!(uf.component_size(SubgraphType::Reply, &id3), Some(4)); 353 + assert_eq!(uf.component_size(SubgraphType::Reply, &id4), Some(4)); 354 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 355 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 356 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 357 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 358 + 359 + } 360 + 361 + #[test] 362 + fn test_simple_quote_chain_1() { 363 + let mut uf = BskyPostUnionFind::new(); 364 + 365 + let id1 = BskyPostId::from("a", "1"); 366 + let id2 = BskyPostId::from("b", "1"); 367 + let id3 = BskyPostId::from("a", "2"); 368 + let id4 = BskyPostId::from("a", "3"); 369 + uf.ingest_post(BskyPostRecord { 370 + id: id2.clone(), 371 + reply_to: None, 372 + quote_of: Some(id1.clone()) 373 + }); 374 + uf.ingest_post(BskyPostRecord { 375 + id: id3.clone(), 376 + reply_to: None, 377 + quote_of: Some(id2.clone()) 378 + }); 379 + uf.ingest_post(BskyPostRecord { 380 + id: id4.clone(), 381 + reply_to: None, 382 + quote_of: Some(id3.clone()) 383 + }); 384 + 385 + assert_eq!(uf.component_size(SubgraphType::Quote, &id1), Some(4)); 386 + assert_eq!(uf.component_size(SubgraphType::Quote, &id2), Some(4)); 387 + assert_eq!(uf.component_size(SubgraphType::Quote, &id3), Some(4)); 388 + assert_eq!(uf.component_size(SubgraphType::Quote, &id4), Some(4)); 389 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 390 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 391 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 392 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 393 + 394 + } 75 395 76 396 #[test] 77 - fn test_1() { 78 - let mut uf = UnionFind::new(); 79 - let idx1 = uf.add(); 80 - let idx2 = uf.add(); 81 - let idx3 = uf.add(); 82 - uf.union(idx3, idx2); 83 - assert_eq!(uf.component_size(idx1), 1); 84 - assert_eq!(uf.component_size(idx2), 2); 85 - assert_eq!(uf.component_size(idx3), 2); 86 - uf.union(idx1, idx3); 87 - assert_eq!(uf.component_size(idx1), 3); 88 - assert_eq!(uf.component_size(idx2), 3); 89 - assert_eq!(uf.component_size(idx3), 3); 90 - let idx4 = uf.add(); 91 - let idx5 = uf.add(); 92 - let idx6 = uf.add(); 93 - uf.union(idx4, idx5); 94 - uf.union(idx5, idx6); 95 - assert_eq!(uf.component_size(idx1), 3); 96 - assert_eq!(uf.component_size(idx4), 3); 97 - assert_eq!(uf.component_size(idx5), 3); 98 - assert_eq!(uf.component_size(idx6), 3); 99 - uf.union(idx1, idx6); 100 - assert_eq!(uf.component_size(idx3), 6); 101 - assert_eq!(uf.component_size(idx4), 6); 397 + fn test_simple_quote_chain_2() { 398 + let mut uf = BskyPostUnionFind::new(); 399 + 400 + let id1 = BskyPostId::from("a", "1"); 401 + let id2 = BskyPostId::from("b", "1"); 402 + let id3 = BskyPostId::from("a", "2"); 403 + let id4 = BskyPostId::from("a", "3"); 404 + uf.ingest_post(BskyPostRecord { 405 + id: id2.clone(), 406 + reply_to: None, 407 + quote_of: Some(id1.clone()) 408 + }); 409 + uf.ingest_post(BskyPostRecord { 410 + id: id4.clone(), 411 + reply_to: None, 412 + quote_of: Some(id3.clone()) 413 + }); 414 + uf.ingest_post(BskyPostRecord { 415 + id: id3.clone(), 416 + reply_to: None, 417 + quote_of: Some(id2.clone()) 418 + }); 419 + 420 + assert_eq!(uf.component_size(SubgraphType::Quote, &id1), Some(4)); 421 + assert_eq!(uf.component_size(SubgraphType::Quote, &id2), Some(4)); 422 + assert_eq!(uf.component_size(SubgraphType::Quote, &id3), Some(4)); 423 + assert_eq!(uf.component_size(SubgraphType::Quote, &id4), Some(4)); 424 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 425 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 426 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 427 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 428 + 429 + } 430 + 431 + #[test] 432 + fn test_simple_quote_chain_3() { 433 + let mut uf = BskyPostUnionFind::new(); 434 + 435 + let id1 = BskyPostId::from("a", "1"); 436 + let id2 = BskyPostId::from("b", "1"); 437 + let id3 = BskyPostId::from("a", "2"); 438 + let id4 = BskyPostId::from("a", "3"); 439 + uf.ingest_post(BskyPostRecord { 440 + id: id3.clone(), 441 + reply_to: None, 442 + quote_of: Some(id2.clone()) 443 + }); 444 + uf.ingest_post(BskyPostRecord { 445 + id: id2.clone(), 446 + reply_to: None, 447 + quote_of: Some(id1.clone()) 448 + }); 449 + uf.ingest_post(BskyPostRecord { 450 + id: id4.clone(), 451 + reply_to: None, 452 + quote_of: Some(id3.clone()) 453 + }); 454 + 455 + assert_eq!(uf.component_size(SubgraphType::Quote, &id1), Some(4)); 456 + assert_eq!(uf.component_size(SubgraphType::Quote, &id2), Some(4)); 457 + assert_eq!(uf.component_size(SubgraphType::Quote, &id3), Some(4)); 458 + assert_eq!(uf.component_size(SubgraphType::Quote, &id4), Some(4)); 459 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 460 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 461 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 462 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 463 + 464 + } 465 + 466 + #[test] 467 + fn test_simple_quote_chain_4() { 468 + let mut uf = BskyPostUnionFind::new(); 469 + 470 + let id1 = BskyPostId::from("a", "1"); 471 + let id2 = BskyPostId::from("b", "1"); 472 + let id3 = BskyPostId::from("a", "2"); 473 + let id4 = BskyPostId::from("a", "3"); 474 + uf.ingest_post(BskyPostRecord { 475 + id: id3.clone(), 476 + reply_to: None, 477 + quote_of: Some(id2.clone()) 478 + }); 479 + uf.ingest_post(BskyPostRecord { 480 + id: id4.clone(), 481 + reply_to: None, 482 + quote_of: Some(id3.clone()) 483 + }); 484 + uf.ingest_post(BskyPostRecord { 485 + id: id2.clone(), 486 + reply_to: None, 487 + quote_of: Some(id1.clone()) 488 + }); 489 + 490 + assert_eq!(uf.component_size(SubgraphType::Quote, &id1), Some(4)); 491 + assert_eq!(uf.component_size(SubgraphType::Quote, &id2), Some(4)); 492 + assert_eq!(uf.component_size(SubgraphType::Quote, &id3), Some(4)); 493 + assert_eq!(uf.component_size(SubgraphType::Quote, &id4), Some(4)); 494 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 495 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 496 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 497 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 498 + 499 + } 500 + 501 + #[test] 502 + fn test_simple_quote_chain_5() { 503 + let mut uf = BskyPostUnionFind::new(); 504 + 505 + let id1 = BskyPostId::from("a", "1"); 506 + let id2 = BskyPostId::from("b", "1"); 507 + let id3 = BskyPostId::from("a", "2"); 508 + let id4 = BskyPostId::from("a", "3"); 509 + uf.ingest_post(BskyPostRecord { 510 + id: id4.clone(), 511 + reply_to: None, 512 + quote_of: Some(id3.clone()) 513 + }); 514 + uf.ingest_post(BskyPostRecord { 515 + id: id2.clone(), 516 + reply_to: None, 517 + quote_of: Some(id1.clone()) 518 + }); 519 + uf.ingest_post(BskyPostRecord { 520 + id: id3.clone(), 521 + reply_to: None, 522 + quote_of: Some(id2.clone()) 523 + }); 524 + 525 + assert_eq!(uf.component_size(SubgraphType::Quote, &id1), Some(4)); 526 + assert_eq!(uf.component_size(SubgraphType::Quote, &id2), Some(4)); 527 + assert_eq!(uf.component_size(SubgraphType::Quote, &id3), Some(4)); 528 + assert_eq!(uf.component_size(SubgraphType::Quote, &id4), Some(4)); 529 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 530 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 531 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 532 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 533 + 534 + } 535 + 536 + #[test] 537 + fn test_simple_quote_chain_6() { 538 + let mut uf = BskyPostUnionFind::new(); 539 + 540 + let id1 = BskyPostId::from("a", "1"); 541 + let id2 = BskyPostId::from("b", "1"); 542 + let id3 = BskyPostId::from("a", "2"); 543 + let id4 = BskyPostId::from("a", "3"); 544 + uf.ingest_post(BskyPostRecord { 545 + id: id4.clone(), 546 + reply_to: None, 547 + quote_of: Some(id3.clone()) 548 + }); 549 + uf.ingest_post(BskyPostRecord { 550 + id: id3.clone(), 551 + reply_to: None, 552 + quote_of: Some(id2.clone()) 553 + }); 554 + uf.ingest_post(BskyPostRecord { 555 + id: id2.clone(), 556 + reply_to: None, 557 + quote_of: Some(id1.clone()) 558 + }); 559 + 560 + assert_eq!(uf.component_size(SubgraphType::Quote, &id1), Some(4)); 561 + assert_eq!(uf.component_size(SubgraphType::Quote, &id2), Some(4)); 562 + assert_eq!(uf.component_size(SubgraphType::Quote, &id3), Some(4)); 563 + assert_eq!(uf.component_size(SubgraphType::Quote, &id4), Some(4)); 564 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id1), Some(4)); 565 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id2), Some(4)); 566 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id3), Some(4)); 567 + assert_eq!(uf.component_size(SubgraphType::ReplyQuote, &id4), Some(4)); 568 + 102 569 } 103 570 }