Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

Add support for reverse ordering in link queries (Issue #1) #6

closed opened by seoul.systems targeting main from seoul.systems/microcosm-rs: order_query

The following adds support for reverse-chronological ordering when fetching back-links.

We add a non-required "reverse" query parameter that allows reversing the default order of returned back-links.

Supports both current storage backends (mem_store and rocks_store).

Labels

None yet.

Participants 2
AT URI
at://did:plc:53wellrw53o7sw4zlpfenvuh/sh.tangled.repo.pull/3majn54wt2l22
+405 -42
Diff #1
+31 -4
constellation/src/server/mod.rs
··· 17 17 use tokio::task::spawn_blocking; 18 18 use tokio_util::sync::CancellationToken; 19 19 20 - use crate::storage::{LinkReader, StorageStats}; 20 + use crate::storage::{LinkReader, Order, StorageStats}; 21 21 use crate::{CountsByCount, Did, RecordId}; 22 22 23 23 mod acceptable; ··· 239 239 /// Set the max number of links to return per page of results 240 240 #[serde(default = "get_default_cursor_limit")] 241 241 limit: u64, 242 + /// Allow returning links in reverse order (default: false) 243 + #[serde(default)] 244 + reverse: bool, 242 245 } 243 246 #[derive(Serialize)] 244 247 struct OtherSubjectCount { ··· 295 298 296 299 let path_to_other = format!(".{}", query.path_to_other); 297 300 301 + let order = if query.reverse { 302 + Order::OldestToNewest 303 + } else { 304 + Order::NewestToOldest 305 + }; 306 + 298 307 let paged = store 299 308 .get_many_to_many_counts( 300 309 &query.subject, 301 310 collection, 302 311 &path, 303 312 &path_to_other, 313 + order, 304 314 limit, 305 315 cursor_key, 306 316 &filter_dids, ··· 409 419 /// Set the max number of links to return per page of results 410 420 #[serde(default = "get_default_cursor_limit")] 411 421 limit: u64, 412 - // TODO: allow reverse (er, forward) order as well 422 + /// Allow returning links in reverse order (default: false) 423 + #[serde(default)] 424 + reverse: bool, 413 425 } 414 426 #[derive(Template, Serialize)] 415 427 #[template(path = "get-backlinks.html.j2")] ··· 455 467 }; 456 468 let path = format!(".{path}"); 457 469 470 + let order = if query.reverse { 471 + Order::OldestToNewest 472 + } else { 473 + Order::NewestToOldest 474 + }; 475 + 458 476 let paged = store 459 477 .get_links( 460 478 &query.subject, 461 479 collection, 462 480 &path, 481 + order, 463 482 limit, 464 483 until, 465 484 &filter_dids, ··· 508 527 from_dids: Option<String>, // comma separated: gross 509 528 #[serde(default = "get_default_cursor_limit")] 510 529 limit: u64, 511 - // TODO: allow reverse (er, forward) order as well 530 + /// Allow returning links in reverse order (default: false) 531 + #[serde(default)] 532 + reverse: bool, 512 533 } 513 534 #[derive(Template, Serialize)] 514 535 #[template(path = "links.html.j2")] ··· 557 578 } 558 579 } 559 580 581 + let order = if query.reverse { 582 + Order::OldestToNewest 583 + } else { 584 + Order::NewestToOldest 585 + }; 586 + 560 587 let paged = store 561 588 .get_links( 562 589 &query.target, 563 590 &query.collection, 564 591 &query.path, 592 + order, 565 593 limit, 566 594 until, 567 595 &filter_dids, ··· 594 622 path: String, 595 623 cursor: Option<OpaqueApiCursor>, 596 624 limit: Option<u64>, 597 - // TODO: allow reverse (er, forward) order as well 598 625 } 599 626 #[derive(Template, Serialize)] 600 627 #[template(path = "dids.html.j2")]
+50 -12
constellation/src/storage/mem_store.rs
··· 1 1 use super::{ 2 - LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats, 2 + LinkReader, LinkStorage, Order, PagedAppendingCollection, PagedOrderedCollection, 3 + StorageStats, 3 4 }; 4 5 use crate::{ActionableEvent, CountsByCount, Did, RecordId}; 5 6 use anyhow::Result; ··· 140 141 collection: &str, 141 142 path: &str, 142 143 path_to_other: &str, 144 + order: Order, 143 145 limit: u64, 144 146 after: Option<String>, 145 147 filter_dids: &HashSet<Did>, ··· 157 159 let filter_to_targets: HashSet<Target> = 158 160 HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s))); 159 161 160 - let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new(); 161 - for (did, rkey) in linkers.iter().flatten().cloned() { 162 + // the last type field here acts as an index to allow keeping track of the order in which 163 + // we encountred single elements 164 + let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>, usize)> = HashMap::new(); 165 + for (idx, (did, rkey)) in linkers.iter().flatten().cloned().enumerate() { 162 166 if !filter_dids.is_empty() && !filter_dids.contains(&did) { 163 167 continue; 164 168 } ··· 184 188 .take(1) 185 189 .next() 186 190 { 187 - let e = grouped_counts.entry(fwd_target.clone()).or_default(); 191 + let e = 192 + grouped_counts 193 + .entry(fwd_target.clone()) 194 + .or_insert((0, HashSet::new(), idx)); 188 195 e.0 += 1; 189 196 e.1.insert(did.clone()); 190 197 } 191 198 } 192 199 let mut items: Vec<(String, u64, u64)> = grouped_counts 193 200 .iter() 194 - .map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64)) 201 + .map(|(k, (n, u, _))| (k.0.clone(), *n, u.len() as u64)) 195 202 .collect(); 196 - items.sort(); 203 + // Sort based on order: OldestToNewest uses descending order, NewestToOldest uses ascending 204 + match order { 205 + Order::OldestToNewest => items.sort_by(|a, b| b.cmp(a)), 206 + Order::NewestToOldest => items.sort(), 207 + } 197 208 items = items 198 209 .into_iter() 199 210 .skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false)) ··· 239 250 target: &str, 240 251 collection: &str, 241 252 path: &str, 253 + order: Order, 242 254 limit: u64, 243 255 until: Option<u64>, 244 256 filter_dids: &HashSet<Did>, ··· 276 288 }; 277 289 278 290 let total = did_rkeys.len(); 279 - let end = until 280 - .map(|u| std::cmp::min(u as usize, total)) 281 - .unwrap_or(total); 282 - let begin = end.saturating_sub(limit as usize); 283 - let next = if begin == 0 { None } else { Some(begin as u64) }; 291 + 292 + let begin: usize; 293 + let end: usize; 294 + let next: Option<u64>; 295 + 296 + match order { 297 + // OldestToNewest: start from the beginning, paginate forward 298 + Order::OldestToNewest => { 299 + begin = until.map(|u| (u) as usize).unwrap_or(0); 300 + end = std::cmp::min(begin + limit as usize, total); 301 + 302 + next = if end < total { 303 + Some(end as u64 + 1) 304 + } else { 305 + None 306 + }; 307 + } 308 + // NewestToOldest: start from the end, paginate backward 309 + Order::NewestToOldest => { 310 + end = until 311 + .map(|u| std::cmp::min(u as usize, total)) 312 + .unwrap_or(total); 313 + begin = end.saturating_sub(limit as usize); 314 + next = if begin == 0 { None } else { Some(begin as u64) }; 315 + } 316 + } 284 317 285 318 let alive = did_rkeys.iter().flatten().count(); 286 319 let gone = total - alive; 287 320 288 - let items: Vec<_> = did_rkeys[begin..end] 321 + let mut items: Vec<_> = did_rkeys[begin..end] 289 322 .iter() 290 323 .rev() 291 324 .flatten() ··· 296 329 collection: collection.to_string(), 297 330 }) 298 331 .collect(); 332 + 333 + // For OldestToNewest, reverse the items to maintain forward chronological order 334 + if order == Order::OldestToNewest { 335 + items.reverse(); 336 + } 299 337 300 338 Ok(PagedAppendingCollection { 301 339 version: (total as u64, gone as u64),
+256 -10
constellation/src/storage/mod.rs
··· 11 11 #[cfg(feature = "rocks")] 12 12 pub use rocks_store::RocksStorage; 13 13 14 + /// Ordering for paginated link queries 15 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 16 + pub enum Order { 17 + /// Newest links first (default) 18 + NewestToOldest, 19 + /// Oldest links first 20 + OldestToNewest, 21 + } 22 + 14 23 #[derive(Debug, PartialEq)] 15 24 pub struct PagedAppendingCollection<T> { 16 25 pub version: (u64, u64), // (collection length, deleted item count) // TODO: change to (total, active)? since dedups isn't "deleted" ··· 72 81 collection: &str, 73 82 path: &str, 74 83 path_to_other: &str, 84 + order: Order, 75 85 limit: u64, 76 86 after: Option<String>, 77 87 filter_dids: &HashSet<Did>, ··· 87 97 target: &str, 88 98 collection: &str, 89 99 path: &str, 100 + order: Order, 90 101 limit: u64, 91 102 until: Option<u64>, 92 103 filter_dids: &HashSet<Did>, ··· 180 191 "a.com", 181 192 "app.t.c", 182 193 ".abc.uri", 194 + Order::NewestToOldest, 183 195 100, 184 196 None, 185 197 &HashSet::default() ··· 683 695 "a.com", 684 696 "app.t.c", 685 697 ".abc.uri", 698 + Order::NewestToOldest, 686 699 100, 687 700 None, 688 701 &HashSet::default() ··· 727 740 0, 728 741 )?; 729 742 } 730 - let links = 731 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 743 + let links = storage.get_links( 744 + "a.com", 745 + "app.t.c", 746 + ".abc.uri", 747 + Order::NewestToOldest, 748 + 2, 749 + None, 750 + &HashSet::default(), 751 + )?; 732 752 let dids = storage.get_distinct_dids("a.com", "app.t.c", ".abc.uri", 2, None)?; 733 753 assert_eq!( 734 754 links, ··· 763 783 "a.com", 764 784 "app.t.c", 765 785 ".abc.uri", 786 + Order::NewestToOldest, 766 787 2, 767 788 links.next, 768 789 &HashSet::default(), ··· 801 822 "a.com", 802 823 "app.t.c", 803 824 ".abc.uri", 825 + Order::NewestToOldest, 804 826 2, 805 827 links.next, 806 828 &HashSet::default(), ··· 831 853 assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 832 854 }); 833 855 856 + test_each_storage!(get_links_reverse_order, |storage| { 857 + for i in 1..=5 { 858 + storage.push( 859 + &ActionableEvent::CreateLinks { 860 + record_id: RecordId { 861 + did: format!("did:plc:asdf-{i}").into(), 862 + collection: "app.t.c".into(), 863 + rkey: "asdf".into(), 864 + }, 865 + links: vec![CollectedLink { 866 + target: Link::Uri("a.com".into()), 867 + path: ".abc.uri".into(), 868 + }], 869 + }, 870 + 0, 871 + )?; 872 + } 873 + 874 + // Test OldestToNewest order (oldest first) 875 + let links = storage.get_links( 876 + "a.com", 877 + "app.t.c", 878 + ".abc.uri", 879 + Order::OldestToNewest, 880 + 2, 881 + None, 882 + &HashSet::default(), 883 + )?; 884 + assert_eq!( 885 + links, 886 + PagedAppendingCollection { 887 + version: (5, 0), 888 + items: vec![ 889 + RecordId { 890 + did: "did:plc:asdf-1".into(), 891 + collection: "app.t.c".into(), 892 + rkey: "asdf".into(), 893 + }, 894 + RecordId { 895 + did: "did:plc:asdf-2".into(), 896 + collection: "app.t.c".into(), 897 + rkey: "asdf".into(), 898 + }, 899 + ], 900 + next: Some(3), 901 + total: 5, 902 + } 903 + ); 904 + // Test NewestToOldest order (newest first) 905 + let links = storage.get_links( 906 + "a.com", 907 + "app.t.c", 908 + ".abc.uri", 909 + Order::NewestToOldest, 910 + 2, 911 + None, 912 + &HashSet::default(), 913 + )?; 914 + assert_eq!( 915 + links, 916 + PagedAppendingCollection { 917 + version: (5, 0), 918 + items: vec![ 919 + RecordId { 920 + did: "did:plc:asdf-5".into(), 921 + collection: "app.t.c".into(), 922 + rkey: "asdf".into(), 923 + }, 924 + RecordId { 925 + did: "did:plc:asdf-4".into(), 926 + collection: "app.t.c".into(), 927 + rkey: "asdf".into(), 928 + }, 929 + ], 930 + next: Some(3), 931 + total: 5, 932 + } 933 + ); 934 + assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 935 + }); 936 + 834 937 test_each_storage!(get_filtered_links, |storage| { 835 938 let links = storage.get_links( 836 939 "a.com", 837 940 "app.t.c", 838 941 ".abc.uri", 942 + Order::NewestToOldest, 839 943 2, 840 944 None, 841 945 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 869 973 "a.com", 870 974 "app.t.c", 871 975 ".abc.uri", 976 + Order::NewestToOldest, 872 977 2, 873 978 None, 874 979 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 891 996 "a.com", 892 997 "app.t.c", 893 998 ".abc.uri", 999 + Order::NewestToOldest, 894 1000 2, 895 1001 None, 896 1002 &HashSet::from([Did("did:plc:someone-else".to_string())]), ··· 938 1044 "a.com", 939 1045 "app.t.c", 940 1046 ".abc.uri", 1047 + Order::NewestToOldest, 941 1048 2, 942 1049 None, 943 1050 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 967 1074 "a.com", 968 1075 "app.t.c", 969 1076 ".abc.uri", 1077 + Order::NewestToOldest, 970 1078 2, 971 1079 None, 972 1080 &HashSet::from([ ··· 999 1107 "a.com", 1000 1108 "app.t.c", 1001 1109 ".abc.uri", 1110 + Order::NewestToOldest, 1002 1111 2, 1003 1112 None, 1004 1113 &HashSet::from([Did("did:plc:someone-unknown".to_string())]), ··· 1031 1140 0, 1032 1141 )?; 1033 1142 } 1034 - let links = 1035 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1143 + let links = storage.get_links( 1144 + "a.com", 1145 + "app.t.c", 1146 + ".abc.uri", 1147 + Order::NewestToOldest, 1148 + 2, 1149 + None, 1150 + &HashSet::default(), 1151 + )?; 1036 1152 assert_eq!( 1037 1153 links, 1038 1154 PagedAppendingCollection { ··· 1057 1173 "a.com", 1058 1174 "app.t.c", 1059 1175 ".abc.uri", 1176 + Order::NewestToOldest, 1060 1177 2, 1061 1178 links.next, 1062 1179 &HashSet::default(), ··· 1101 1218 0, 1102 1219 )?; 1103 1220 } 1104 - let links = 1105 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1221 + let links = storage.get_links( 1222 + "a.com", 1223 + "app.t.c", 1224 + ".abc.uri", 1225 + Order::NewestToOldest, 1226 + 2, 1227 + None, 1228 + &HashSet::default(), 1229 + )?; 1106 1230 assert_eq!( 1107 1231 links, 1108 1232 PagedAppendingCollection { ··· 1141 1265 "a.com", 1142 1266 "app.t.c", 1143 1267 ".abc.uri", 1268 + Order::NewestToOldest, 1144 1269 2, 1145 1270 links.next, 1146 1271 &HashSet::default(), ··· 1185 1310 0, 1186 1311 )?; 1187 1312 } 1188 - let links = 1189 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1313 + let links = storage.get_links( 1314 + "a.com", 1315 + "app.t.c", 1316 + ".abc.uri", 1317 + Order::NewestToOldest, 1318 + 2, 1319 + None, 1320 + &HashSet::default(), 1321 + )?; 1190 1322 assert_eq!( 1191 1323 links, 1192 1324 PagedAppendingCollection { ··· 1219 1351 "a.com", 1220 1352 "app.t.c", 1221 1353 ".abc.uri", 1354 + Order::NewestToOldest, 1222 1355 2, 1223 1356 links.next, 1224 1357 &HashSet::default(), ··· 1256 1389 0, 1257 1390 )?; 1258 1391 } 1259 - let links = 1260 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1392 + let links = storage.get_links( 1393 + "a.com", 1394 + "app.t.c", 1395 + ".abc.uri", 1396 + Order::NewestToOldest, 1397 + 2, 1398 + None, 1399 + &HashSet::default(), 1400 + )?; 1261 1401 assert_eq!( 1262 1402 links, 1263 1403 PagedAppendingCollection { ··· 1286 1426 "a.com", 1287 1427 "app.t.c", 1288 1428 ".abc.uri", 1429 + Order::NewestToOldest, 1289 1430 2, 1290 1431 links.next, 1291 1432 &HashSet::default(), ··· 1367 1508 "a.b.c", 1368 1509 ".d.e", 1369 1510 ".f.g", 1511 + Order::NewestToOldest, 1370 1512 10, 1371 1513 None, 1372 1514 &HashSet::new(), ··· 1410 1552 "app.t.c", 1411 1553 ".abc.uri", 1412 1554 ".def.uri", 1555 + Order::NewestToOldest, 1413 1556 10, 1414 1557 None, 1415 1558 &HashSet::new(), ··· 1509 1652 "app.t.c", 1510 1653 ".abc.uri", 1511 1654 ".def.uri", 1655 + Order::NewestToOldest, 1512 1656 10, 1513 1657 None, 1514 1658 &HashSet::new(), ··· 1525 1669 "app.t.c", 1526 1670 ".abc.uri", 1527 1671 ".def.uri", 1672 + Order::NewestToOldest, 1528 1673 10, 1529 1674 None, 1530 1675 &HashSet::from_iter([Did("did:plc:fdsa".to_string())]), ··· 1541 1686 "app.t.c", 1542 1687 ".abc.uri", 1543 1688 ".def.uri", 1689 + Order::NewestToOldest, 1544 1690 10, 1545 1691 None, 1546 1692 &HashSet::new(), ··· 1551 1697 next: None, 1552 1698 } 1553 1699 ); 1700 + }); 1701 + 1702 + test_each_storage!(get_m2m_counts_reverse_order, |storage| { 1703 + // Create links from different DIDs to different targets 1704 + storage.push( 1705 + &ActionableEvent::CreateLinks { 1706 + record_id: RecordId { 1707 + did: "did:plc:user1".into(), 1708 + collection: "app.t.c".into(), 1709 + rkey: "post1".into(), 1710 + }, 1711 + links: vec![ 1712 + CollectedLink { 1713 + target: Link::Uri("a.com".into()), 1714 + path: ".abc.uri".into(), 1715 + }, 1716 + CollectedLink { 1717 + target: Link::Uri("b.com".into()), 1718 + path: ".def.uri".into(), 1719 + }, 1720 + ], 1721 + }, 1722 + 0, 1723 + )?; 1724 + storage.push( 1725 + &ActionableEvent::CreateLinks { 1726 + record_id: RecordId { 1727 + did: "did:plc:user2".into(), 1728 + collection: "app.t.c".into(), 1729 + rkey: "post1".into(), 1730 + }, 1731 + links: vec![ 1732 + CollectedLink { 1733 + target: Link::Uri("a.com".into()), 1734 + path: ".abc.uri".into(), 1735 + }, 1736 + CollectedLink { 1737 + target: Link::Uri("c.com".into()), 1738 + path: ".def.uri".into(), 1739 + }, 1740 + ], 1741 + }, 1742 + 1, 1743 + )?; 1744 + storage.push( 1745 + &ActionableEvent::CreateLinks { 1746 + record_id: RecordId { 1747 + did: "did:plc:user3".into(), 1748 + collection: "app.t.c".into(), 1749 + rkey: "post1".into(), 1750 + }, 1751 + links: vec![ 1752 + CollectedLink { 1753 + target: Link::Uri("a.com".into()), 1754 + path: ".abc.uri".into(), 1755 + }, 1756 + CollectedLink { 1757 + target: Link::Uri("d.com".into()), 1758 + path: ".def.uri".into(), 1759 + }, 1760 + ], 1761 + }, 1762 + 2, 1763 + )?; 1764 + 1765 + // Test NewestToOldest order (default order - by target ascending) 1766 + let counts = storage.get_many_to_many_counts( 1767 + "a.com", 1768 + "app.t.c", 1769 + ".abc.uri", 1770 + ".def.uri", 1771 + Order::NewestToOldest, 1772 + 10, 1773 + None, 1774 + &HashSet::new(), 1775 + &HashSet::new(), 1776 + )?; 1777 + assert_eq!(counts.items.len(), 3); 1778 + // Should be sorted by target in ascending order (alphabetical) 1779 + assert_eq!(counts.items[0].0, "b.com"); 1780 + assert_eq!(counts.items[1].0, "c.com"); 1781 + assert_eq!(counts.items[2].0, "d.com"); 1782 + 1783 + // Test OldestToNewest order (descending order - by target descending) 1784 + let counts = storage.get_many_to_many_counts( 1785 + "a.com", 1786 + "app.t.c", 1787 + ".abc.uri", 1788 + ".def.uri", 1789 + Order::OldestToNewest, 1790 + 10, 1791 + None, 1792 + &HashSet::new(), 1793 + &HashSet::new(), 1794 + )?; 1795 + assert_eq!(counts.items.len(), 3); 1796 + // Should be sorted by target in descending order (reverse alphabetical) 1797 + assert_eq!(counts.items[0].0, "d.com"); 1798 + assert_eq!(counts.items[1].0, "c.com"); 1799 + assert_eq!(counts.items[2].0, "b.com"); 1554 1800 }); 1555 1801 }
+41 -6
constellation/src/storage/rocks_store.rs
··· 1 1 use super::{ 2 - ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, 3 - StorageStats, 2 + ActionableEvent, LinkReader, LinkStorage, Order, PagedAppendingCollection, 3 + PagedOrderedCollection, StorageStats, 4 4 }; 5 5 use crate::{CountsByCount, Did, RecordId}; 6 6 use anyhow::{bail, Result}; ··· 941 941 collection: &str, 942 942 path: &str, 943 943 path_to_other: &str, 944 + order: Order, 944 945 limit: u64, 945 946 after: Option<String>, 946 947 filter_dids: &HashSet<Did>, ··· 1071 1072 } 1072 1073 1073 1074 let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len()); 1075 + 1074 1076 for (target_id, (n, dids)) in &grouped_counts { 1075 1077 let Some(target) = self 1076 1078 .target_id_table ··· 1082 1084 items.push((target.0 .0, *n, dids.len() as u64)); 1083 1085 } 1084 1086 1087 + // Sort based on order: OldestToNewest uses descending order, NewestToOldest uses ascending 1088 + match order { 1089 + Order::OldestToNewest => items.sort_by(|a, b| b.cmp(a)), // descending 1090 + Order::NewestToOldest => items.sort(), // ascending 1091 + } 1092 + 1085 1093 let next = if grouped_counts.len() as u64 >= limit { 1086 1094 // yeah.... it's a number saved as a string......sorry 1087 1095 grouped_counts ··· 1127 1135 target: &str, 1128 1136 collection: &str, 1129 1137 path: &str, 1138 + order: Order, 1130 1139 limit: u64, 1131 1140 until: Option<u64>, 1132 1141 filter_dids: &HashSet<Did>, ··· 1167 1176 1168 1177 let (alive, gone) = linkers.count(); 1169 1178 let total = alive + gone; 1170 - let end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize; 1171 - let begin = end.saturating_sub(limit as usize); 1172 - let next = if begin == 0 { None } else { Some(begin as u64) }; 1179 + 1180 + let end: usize; 1181 + let begin: usize; 1182 + let next: Option<u64>; 1183 + 1184 + match order { 1185 + // OldestToNewest: start from the beginning, paginate forward 1186 + Order::OldestToNewest => { 1187 + begin = until.map(|u| (u - 1) as usize).unwrap_or(0); 1188 + end = std::cmp::min(begin + limit as usize, total as usize); 1189 + 1190 + next = if end < total as usize { 1191 + Some(end as u64 + 1) 1192 + } else { 1193 + None 1194 + } 1195 + } 1196 + // NewestToOldest: start from the end, paginate backward 1197 + Order::NewestToOldest => { 1198 + end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize; 1199 + begin = end.saturating_sub(limit as usize); 1200 + next = if begin == 0 { None } else { Some(begin as u64) }; 1201 + } 1202 + } 1173 1203 1174 - let did_id_rkeys = linkers.0[begin..end].iter().rev().collect::<Vec<_>>(); 1204 + let mut did_id_rkeys = linkers.0[begin..end].iter().rev().collect::<Vec<_>>(); 1205 + 1206 + // For OldestToNewest, reverse the items to maintain forward chronological order 1207 + if order == Order::OldestToNewest { 1208 + did_id_rkeys.reverse(); 1209 + } 1175 1210 1176 1211 let mut items = Vec::with_capacity(did_id_rkeys.len()); 1177 1212 // TODO: use get-many (or multi-get or whatever it's called)
+4
constellation/templates/base.html.j2
··· 40 40 padding: 0.5em 0.3em; 41 41 max-width: 100%; 42 42 } 43 + pre.code input { 44 + margin: 0; 45 + padding: 0; 46 + } 43 47 .stat { 44 48 color: #f90; 45 49 font-size: 1.618rem;
+2 -1
constellation/templates/get-backlinks.html.j2
··· 6 6 7 7 {% block content %} 8 8 9 - {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit) %} 9 + {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit, query.reverse) %} 10 10 11 11 <h2> 12 12 Links to <code>{{ query.subject }}</code> ··· 40 40 <input type="hidden" name="did" value="{{ did }}" /> 41 41 {% endfor %} 42 42 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 43 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 43 44 <button type="submit">next page&hellip;</button> 44 45 </form> 45 46 {% else %}
+2
constellation/templates/get-many-to-many-counts.html.j2
··· 13 13 query.did, 14 14 query.other_subject, 15 15 query.limit, 16 + query.reverse, 16 17 ) %} 17 18 18 19 <h2> ··· 53 54 {% endfor %} 54 55 <input type="hidden" name="limit" value="{{ query.limit }}" /> 55 56 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 57 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 56 58 <button type="submit">next page&hellip;</button> 57 59 </form> 58 60 {% else %}
+7 -2
constellation/templates/hello.html.j2
··· 49 49 <li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li> 50 50 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 51 51 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 52 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 52 53 </ul> 53 54 54 55 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 55 - {% call try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16) %} 56 + {% call 57 + try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16, false) %} 56 58 57 59 58 60 <h3 class="route"><code>GET /xrpc/blue.microcosm.links.getManyToManyCounts</code></h3> ··· 68 70 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 69 71 <li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 70 72 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 73 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 71 74 </ul> 72 75 73 76 <p style="margin-bottom: 0"><strong>Try it:</strong></p> ··· 78 81 [""], 79 82 [""], 80 83 25, 84 + false, 81 85 ) %} 82 86 83 87 ··· 96 100 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 97 101 <li><p><code>from_dids</code> [deprecated]: optional. Use <code>did</code> instead. Example: <code>from_dids=did:plc:vc7f4oafdgxsihk4cry2xpze,did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 98 102 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 103 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 99 104 </ul> 100 105 101 106 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 102 - {% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16) %} 107 + {% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16, false) %} 103 108 104 109 105 110 <h3 class="route"><code>GET /links/distinct-dids</code></h3>
+2 -1
constellation/templates/links.html.j2
··· 6 6 7 7 {% block content %} 8 8 9 - {% call try_it::links(query.target, query.collection, query.path, query.did, query.limit) %} 9 + {% call try_it::links(query.target, query.collection, query.path, query.did, query.limit, query.reverse) %} 10 10 11 11 <h2> 12 12 Links to <code>{{ query.target }}</code> ··· 37 37 <input type="hidden" name="collection" value="{{ query.collection }}" /> 38 38 <input type="hidden" name="path" value="{{ query.path }}" /> 39 39 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 40 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 40 41 <button type="submit">next page&hellip;</button> 41 42 </form> 42 43 {% else %}
+10 -6
constellation/templates/try-it-macros.html.j2
··· 1 - {% macro get_backlinks(subject, source, dids, limit) %} 1 + {% macro get_backlinks(subject, source, dids, limit, reverse) %} 2 2 <form method="get" action="/xrpc/blue.microcosm.links.getBacklinks"> 3 3 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getBacklinks 4 4 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 6 6 {%- for did in dids %}{% if !did.is_empty() %} 7 7 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 8 8 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 9 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 9 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 10 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"><button type="submit">get links</button></pre> 10 11 </form> 11 12 <script> 12 13 const addDidButton = document.getElementById('add-did'); ··· 24 25 </script> 25 26 {% endmacro %} 26 27 27 - {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit) %} 28 + {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit, reverse) %} 28 29 <form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts"> 29 30 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getManyToManyCounts 30 31 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 36 37 {%- for otherSubject in otherSubjects %}{% if !otherSubject.is_empty() %} 37 38 &otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %} 38 39 <span id="m2m-did-placeholder"></span> <button id="m2m-add-did">+ did filter</button> 39 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 40 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 41 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"><button type="submit">get links</button></pre> 40 42 </form> 41 43 <script> 42 44 const m2mAddDidButton = document.getElementById('m2m-add-did'); ··· 66 68 </script> 67 69 {% endmacro %} 68 70 69 - {% macro links(target, collection, path, dids, limit) %} 71 + {% macro links(target, collection, path, dids, limit, reverse) %} 70 72 <form method="get" action="/links"> 71 73 <pre class="code"><strong>GET</strong> /links 72 74 ?target= <input type="text" name="target" value="{{ target }}" placeholder="target" /> ··· 75 77 {%- for did in dids %}{% if !did.is_empty() %} 76 78 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 77 79 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 78 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 80 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 81 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"> 82 + <button type="submit">get links</button></pre> 79 83 </form> 80 84 <script> 81 85 const addDidButton = document.getElementById('add-did');

History

2 rounds 23 comments
sign up or login to add to the discussion
6 commits
expand
Add reverse ordering support to link query endpoints
Fix failing tests
Format tests
Add tests for reverse ordering in link queries
Fix pagination logic for reverse-ordered link queries
Replace boolean reverse parameter with Order enum
expand 16 comments

Damn, I just realized I have not yet added a lexicon for the new endpoint. Will do that now. At least the endpoint logic itself should not be affected by that and thus be ready for review, given its similarity to the existing getManyToManyCounts endpoint.

As Lexicons do not offer support for tuples as primitive field types I had to convert the return to use a new RecordsBySubject struct instead, which closely matches what you already did for the m2m counts endpoint.

The comments obviously don't belong here... please refer to the comments made for the PR #7

alright! reading back my own code gave me such a headache with this!

i'm reverting a few parts of this:

  • deprecated links endpoint: going to hard-code it to the existing order (deprecated endpoints don't get new features)

  • reverse ordering on many-to-many counts.

the many-to-many counts endpoint is a weird one to think about because it's not obvious what an "order" on it means. especially since the counts are aggregated, which row should be first in the response?

the current behaviour is deterministic but arbitrary: items are sorted by the other subject's target id (ie., the first time we saw the joined thingy), which is potentially unrelated to the actual many-to-many records themselves. if someone later creates a many-to-many record linking an earlier other subject, it'll become first in the list.

so: the determinism of that other-subject-target-id sorting is what enables paging to work at this endpoint, but it doesn't impose a client-meaningful order of items in the response. reversing an arbitrary order just gives you a different arbitrary order.

hopefully that makes sense ha

(additionally: there is a bit of optimization in the main join loop in the rocksdb implementation that assumes target-id-order to save work from unused pages, and i could be wrong, but i think it wouldn't stay valid under reverse paging)

(oh but i do think we can add a reverse option to the distinct-dids endpoint! probably under a new xrpc version.)

Just so that I understand you correctly here: We decided to not proceed with the implementation of a reverse order parameter for many-to-many related things as the current order is deterministic but arbitrary in nature, and thus reversing is essentially not providing any more information that is meaningful in the end.

If it's not too much trouble, do you mind explaining why you used Iterator::skip and ::take for the slicing? Is this simply more idiomatic or more performant even? :)

Really liked the way you resolved empty results with the empty impl by the way. Definitely less verbose and even clearer than what I opted for haha

many-to-many: yes exactly. it's not clear what expectation we would be trying to meet for a client requesting the many-to-many counts in reverse.

i'm open to being wrong here! when thinking about ordering, i was focused on the order of the many-to-many join-records themselves. which is where things felt meaningless/arbitrary because a) they're aggregated together (at this endpoint) and b) their order of creation is independent of the order this endpoint returns its results in.

but maybe having results ordered by the other-target-id does have client-relevant meaning. joined subjects are sorted by their age in a global sense?

thinking through it with concrete examples, i'm more inclined to bring it back:

  • joining tangled issues to a label: issues are ordered oldest-to-newest (independent from when the label was added)
  • joining bluesky users to a bluesky list: users are ordered by the age of their account (again independent from when they were added to the list)

hmm.

the thing that still feels off to me for these is the fact that this endpoint is an aggregate of many-to-many counts. that implies to me that the count is the thing of interest, and the order of responses in relation to the count is arbitrary again.

trying to keep the non-aggregated version of this endpoint in view, does ordering by the linked subject age make sense then? (ordering by the link actually feels more natural to me in that case) (i think i just talked myself out of and then back into bringing back your ordering here!!)

skip and take: yeah, it's a little more functional-style which works well for my brain. i like a big pipeline of chained transformations operating on sequences so i took the chance to do a little refactor with it. especially since it's immediately setting up an iterator right after slicing, it feels more cohesive to me.

it can be technically the other things too:

indexing ranges like thing[start..end] can panic if either index is out of bounds. in some cases the compiler can prove we already did the bounds checks and remove the panic hooks, but i suspect it can't here. even though we can prove that the indices are in range by reasoning about the code, it's nice not to have things-that-can-panic in the code.

i think it eliminated a Vec::reverse() in favour of an Iterator::rev(), which is potentially a very tiny efficiency improvement

m2m: I think this just boils down to how we communicate this fact to users of our API imo. As your internal back and forth shows there can be valid arguments in both ways. I think most users might naively - as I did at first - simply expect the links/records to be returned in the order they were created in. But, clearly documenting this in the endpoint description seems like a good idea here to make sure we don't leave any kind of ambiguity on the table.

skip and take: That's actually a great argument and I tend to agree with you on both fronts. Having worked professionally more with Typescript than, say, C, I always liked that you could chain transformations to a specific object together (map, filter and so on). The additional safety we get here from using iterators to slice data is new for me but definitely worth it.

m2m (again): Depending on how you see this I might reopen the PR again and introduce the parameter again. I let you make the final judgement call ofc :)

thanks for following up! i really appreciate the discussion!

100% agree the most important thing is communicating and setting accurate expectations in the api docs regardless of how this lands.

I really had convinced myself that the order was "meaningless" for clients, but then every example i was thinking of was like, welllll it kind of is actually meaningful. After a few days I'm still feeling that way, so I'm interested in bringing this back.

(the other thing that got me was switching to the other M2M PR, and remembering that this endpoint is useful as a "distinct" version of that query. it's not only useful for its counts)

i do think there's some extra logic needed in one of the loops that currently tries to avoid some work based on cursor assumptions.

aaaaaand i think there might be a bug in the existing logic, so this will be extra fun sorry! (i will see if i can finally get that to a proper repro or disprove it, so we can hopefully fix that first)

Sounds great! I think no matter how you look at it, the order is not entirely arbitrary as we collect results on the order they're stored.

Do you mind elaborating a bit further on your last two remarks before I open the PR again?! Might be easier for me to know what to look for then!

closed without merging
5 commits
expand
Add reverse ordering support to link query endpoints
Fix failing tests
Format tests
Add tests for reverse ordering in link queries
Fix pagination logic for reverse-ordered link queries
expand 7 comments

Looks like we might need a rebase, seems like this picked up (and is trying to undo) the changes from your other two PRs

โœ… naming the query param reverse sounds good to me since it's consistent with com.atproto.repo.listRecords (and i assume/hopefully most other atproto lexicons)

i think we double-reverse for reverse-order backlinks:

https://tangled.org/microcosm.blue/microcosm-rs/pulls/6/round/0#constellation%2fsrc%2fstorage%2frocks_store.rs-N1200

could probably work it so we only call reverse once (when "forward". not confusing at all.)

Looks like we might need a rebase, seems like this picked up (and is trying to undo) the changes from your other two PRs

I think merging upstream into the fork might work as well and makes things easier for you as the reviewer. I don't mind rebasing ofc.

i think we double-reverse for reverse-order backlinks:

Yeah the existing .rev() calls were a bit confusing at first. Maybe changing the default order to return the oldest links per default might make things a bit clearer? Though I think this is mostly helping us as the maintainers as users might expect to get served the newest ones most of the time.

Another option could be introducing an "order" query parameter with two possible values asc and desc (or something similar) to make things a bit more clear what the default order is, but then we would lose the consistency between our endpoint and com.atproto.repo.listRecords.

We could reduce the number of times we reverse using the following I think:

let mut did_id_rkeys = linkers.0[begin..end].iter().collect::<Vec<_>>();

if !reverse {
    did_id_rkeys.reverse();
}

Some comments might be justified though. That conditional looks super confusing lol

Upon thinking about this a bit more I think we should keep the reverse query parameter as is and don't touch the default order as this would be an unnecessary API break. Instead we could just introduce an enum and convert the query parameter to an enum value immediately after receiving. Other operations would be clearer then. We should still add some more context somewhere in the comments though. I had something like this in mind. What do you think?

// As backlinks are stored chronologically (oldest โ†’ newest) we need to reverse for newest-first queries
pub enum Order {
    NewestToOldest,  // default (reverse=false)
    OldestToNewest,  // reverse=true
}

impl From<bool> for Order {
    fn from(reverse: bool) -> Self {
        if reverse {
            Order::OldestToNewest
        } else {
            Order::NewestToOldest
        }
    }
}

// In server/mod.rs, where we receive the query parameter
fn get_links(
    accept: ExtractAccept,
    query: axum_extra::extract::Query<GetLinkItemsQuery>,
    store: impl LinkReader,
) -> Result<impl IntoResponse, http::StatusCode> {
    // Convert boolean to enum right at the boundary
    let order = Order::from(query.reverse);

    // Now pass the enum to storage layer
    let paged = store.get_links(
        &query.target,
        &query.collection,
        &query.path,
        order,  // โ† Clean enum instead of mysterious boolean
        limit,
        until,
        &filter_dids,
    )?;
    // ...
}

// In rocks_store.rs get_links() - this replaces the confusing double-reverse!
let did_id_rkeys = match order {
    Order::OldestToNewest => {
        begin = until.map(|u| (u - 1) as usize).unwrap_or(0);
        end = std::cmp::min(begin + limit as usize, total as usize);
        next = if end < total as usize {
            Some(end as u64 + 1)
        } else {
            None
        };
        // No reversal - storage is already chronological
        linkers.0[begin..end].iter().collect::<Vec<_>>()
    }
    Order::NewestToOldest => {
        end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize;
        begin = end.saturating_sub(limit as usize);
        next = if begin == 0 { None } else { Some(begin as u64) };
        // Reverse chronological storage to get newest-to-oldest
        linkers.0[begin..end].iter().rev().collect::<Vec<_>>()
    }
};

Thanks for the follow-up!

For the API, I agree with where you landed: keep it newest-first by default, optional reverse param being true makes it chronological. Small nit would be to skip the From<bool> impl since a bool itself doesn't inherently carry directional meaning; the endpoint gives it that meaning, so we can put the let order = if query.reverse { Order::OldestToNewest } ... directly in the endpoint.

i think you're right to just put the whole construction of the vec and cursor into branches on Order. part of me wants to try and encapsulate it a bit and switch the indexing to use linkers.0.iter().skip(begin).take(end) or whatever but the .rev() in there will still require a branch and/or make type things annoying and probably all of it less clear. i like your code.

comments about order are excellent thank you :)

Rebased the PR to match the current state of main and introduced the Order enum to make the sorting of items a bit clearer throughout the handlers that use the reverse parameter already (followed your suggestion regarding the From<bool> and put the definition of the order parameter directly in the endpoint itself before passing it as a parameter to the underlying handler).