APIs for links and references in the ATmosphere

Merge pull request #12 from at-microcosm/ufos-new-data-model

Ufos new data model

authored by bad-example.com and committed by

GitHub da339cd9 5c377566

+5429 -1776
+205 -110
Cargo.lock
··· 116 116 checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" 117 117 118 118 [[package]] 119 + name = "arbitrary" 120 + version = "1.4.1" 121 + source = "registry+https://github.com/rust-lang/crates.io-index" 122 + checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" 123 + 124 + [[package]] 119 125 name = "arrayvec" 120 126 version = "0.7.6" 121 127 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 219 225 220 226 [[package]] 221 227 name = "atrium-api" 222 - version = "0.25.0" 228 + version = "0.25.2" 223 229 source = "registry+https://github.com/rust-lang/crates.io-index" 224 - checksum = "ea3ea578c768ec91082e424a8d139517b2cb5c75149bf3cec04371a1e74f00f2" 230 + checksum = "0d4eb9b4787aba546015c8ccda1d3924c157cee13d67848997fba74ac8144a07" 225 231 dependencies = [ 226 232 "atrium-common", 227 233 "atrium-xrpc", ··· 239 245 240 246 [[package]] 241 247 name = "atrium-common" 242 - version = "0.1.0" 248 + version = "0.1.1" 243 249 source = "registry+https://github.com/rust-lang/crates.io-index" 244 - checksum = "168e558408847bfed69df1033a32fd051f7a037ebc90ea46e588ccb2bfbd7233" 250 + checksum = "ba30d2f9e1a8b3db8fc97d0a5f91ee5a28f8acdddb771ad74c1b08eda357ca3d" 245 251 dependencies = [ 246 252 "dashmap", 247 253 "lru", ··· 254 260 255 261 [[package]] 256 262 name = "atrium-xrpc" 257 - version = "0.12.1" 263 + version = "0.12.2" 258 264 source = "registry+https://github.com/rust-lang/crates.io-index" 259 - checksum = "6b4956d94147cfbb669c68f654eb4fd6a1d00648c810cec79d04ec5425b8f378" 265 + checksum = "18a9e526cb2ed3e0a2ca78c3ce2a943d9041a68e067dadf42923b523771e07df" 260 266 dependencies = [ 261 267 "http", 262 268 "serde", ··· 274 280 275 281 [[package]] 276 282 name = "axum" 277 - version = "0.8.1" 283 + version = "0.8.3" 278 284 source = "registry+https://github.com/rust-lang/crates.io-index" 279 - checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8" 285 + checksum = "de45108900e1f9b9242f7f2e254aa3e2c029c921c258fe9e6b4217eeebd54288" 280 286 dependencies = [ 281 287 "axum-core", 282 288 "bytes", ··· 308 314 309 315 [[package]] 310 316 name = "axum-core" 311 - version = "0.5.0" 317 + version = "0.5.2" 312 318 source = "registry+https://github.com/rust-lang/crates.io-index" 313 - checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733" 319 + checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" 314 320 dependencies = [ 315 321 "bytes", 316 - "futures-util", 322 + "futures-core", 317 323 "http", 318 324 "http-body", 319 325 "http-body-util", ··· 328 334 329 335 [[package]] 330 336 name = "axum-extra" 331 - version = "0.10.0" 337 + version = "0.10.1" 332 338 source = "registry+https://github.com/rust-lang/crates.io-index" 333 - checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b" 339 + checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d" 334 340 dependencies = [ 335 341 "axum", 336 342 "axum-core", ··· 342 348 "http-body-util", 343 349 "mime", 344 350 "pin-project-lite", 351 + "rustversion", 345 352 "serde", 346 353 "tower", 347 354 "tower-layer", ··· 531 538 532 539 [[package]] 533 540 name = "byteview" 534 - version = "0.5.4" 541 + version = "0.6.1" 535 542 source = "registry+https://github.com/rust-lang/crates.io-index" 536 - checksum = "7a4516a8561bff0598c45512f90ee04ed62cee2cb36839e650a0a0704d5f741f" 543 + checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5" 537 544 538 545 [[package]] 539 546 name = "bzip2-sys" ··· 555 562 ] 556 563 557 564 [[package]] 565 + name = "cardinality-estimator" 566 + version = "1.0.2" 567 + source = "registry+https://github.com/rust-lang/crates.io-index" 568 + checksum = "6ae5e12c435064f9e8ec53c5a782ca9a362702a4863fe1b6448f524ecede8fe3" 569 + dependencies = [ 570 + "enum_dispatch", 571 + "serde", 572 + "wyhash", 573 + ] 574 + 575 + [[package]] 558 576 name = "cc" 559 - version = "1.2.17" 577 + version = "1.2.18" 560 578 source = "registry+https://github.com/rust-lang/crates.io-index" 561 - checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" 579 + checksum = "525046617d8376e3db1deffb079e91cef90a89fc3ca5c185bbf8c9ecdd15cd5c" 562 580 dependencies = [ 563 581 "jobserver", 564 582 "libc", ··· 628 646 629 647 [[package]] 630 648 name = "clap" 631 - version = "4.5.33" 649 + version = "4.5.35" 632 650 source = "registry+https://github.com/rust-lang/crates.io-index" 633 - checksum = "e2c80cae4c3350dd8f1272c73e83baff9a6ba550b8bfbe651b3c45b78cd1751e" 651 + checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944" 634 652 dependencies = [ 635 653 "clap_builder", 636 654 "clap_derive", ··· 638 656 639 657 [[package]] 640 658 name = "clap_builder" 641 - version = "4.5.33" 659 + version = "4.5.35" 642 660 source = "registry+https://github.com/rust-lang/crates.io-index" 643 - checksum = "0123e386f691c90aa228219b5b1ee72d465e8e231c79e9c82324f016a62a741c" 661 + checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9" 644 662 dependencies = [ 645 663 "anstream", 646 664 "anstyle", ··· 813 831 814 832 [[package]] 815 833 name = "ctrlc" 816 - version = "3.4.5" 834 + version = "3.4.6" 817 835 source = "registry+https://github.com/rust-lang/crates.io-index" 818 - checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" 836 + checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c" 819 837 dependencies = [ 820 838 "nix", 821 839 "windows-sys 0.59.0", ··· 823 841 824 842 [[package]] 825 843 name = "darling" 826 - version = "0.20.10" 844 + version = "0.20.11" 827 845 source = "registry+https://github.com/rust-lang/crates.io-index" 828 - checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" 846 + checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" 829 847 dependencies = [ 830 848 "darling_core", 831 849 "darling_macro", ··· 833 851 834 852 [[package]] 835 853 name = "darling_core" 836 - version = "0.20.10" 854 + version = "0.20.11" 837 855 source = "registry+https://github.com/rust-lang/crates.io-index" 838 - checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" 856 + checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" 839 857 dependencies = [ 840 858 "fnv", 841 859 "ident_case", ··· 847 865 848 866 [[package]] 849 867 name = "darling_macro" 850 - version = "0.20.10" 868 + version = "0.20.11" 851 869 source = "registry+https://github.com/rust-lang/crates.io-index" 852 - checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" 870 + checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" 853 871 dependencies = [ 854 872 "darling_core", 855 873 "quote", ··· 904 922 905 923 [[package]] 906 924 name = "deranged" 907 - version = "0.4.1" 925 + version = "0.4.0" 908 926 source = "registry+https://github.com/rust-lang/crates.io-index" 909 - checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" 927 + checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" 910 928 dependencies = [ 911 929 "powerfmt", 912 930 "serde", ··· 976 994 "dropshot_endpoint", 977 995 "form_urlencoded", 978 996 "futures", 979 - "hostname 0.4.0", 997 + "hostname 0.4.1", 980 998 "http", 981 999 "http-body-util", 982 1000 "hyper", 983 1001 "hyper-util", 984 - "indexmap 2.8.0", 1002 + "indexmap 2.9.0", 985 1003 "multer", 986 1004 "openapiv3", 987 1005 "paste", ··· 1070 1088 1071 1089 [[package]] 1072 1090 name = "env_logger" 1073 - version = "0.11.7" 1091 + version = "0.11.8" 1074 1092 source = "registry+https://github.com/rust-lang/crates.io-index" 1075 - checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697" 1093 + checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 1076 1094 dependencies = [ 1077 1095 "anstream", 1078 1096 "anstyle", ··· 1089 1107 1090 1108 [[package]] 1091 1109 name = "errno" 1092 - version = "0.3.10" 1110 + version = "0.3.11" 1093 1111 source = "registry+https://github.com/rust-lang/crates.io-index" 1094 - checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" 1112 + checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" 1095 1113 dependencies = [ 1096 1114 "libc", 1097 1115 "windows-sys 0.59.0", ··· 1110 1128 1111 1129 [[package]] 1112 1130 name = "event-listener-strategy" 1113 - version = "0.5.3" 1131 + version = "0.5.4" 1114 1132 source = "registry+https://github.com/rust-lang/crates.io-index" 1115 - checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" 1133 + checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" 1116 1134 dependencies = [ 1117 1135 "event-listener", 1118 1136 "pin-project-lite", ··· 1126 1144 1127 1145 [[package]] 1128 1146 name = "fjall" 1129 - version = "2.7.0" 1130 - source = "git+https://github.com/fjall-rs/fjall.git?branch=fix%2Flockless-ranges#d2102006958b0b30bdde0f7315b9b22539bb5f89" 1147 + version = "2.8.0" 1148 + source = "registry+https://github.com/rust-lang/crates.io-index" 1149 + checksum = "26b2ced3483989a62b3533c9f99054d73b527c6c0045cf22b00fe87956f1a46f" 1131 1150 dependencies = [ 1132 1151 "byteorder", 1133 1152 "byteview", ··· 1304 1323 "libc", 1305 1324 "log", 1306 1325 "rustversion", 1307 - "windows 0.58.0", 1326 + "windows", 1308 1327 ] 1309 1328 1310 1329 [[package]] ··· 1370 1389 "futures-core", 1371 1390 "futures-sink", 1372 1391 "http", 1373 - "indexmap 2.8.0", 1392 + "indexmap 2.9.0", 1374 1393 "slab", 1375 1394 "tokio", 1376 1395 "tokio-util", ··· 1466 1485 1467 1486 [[package]] 1468 1487 name = "hostname" 1469 - version = "0.4.0" 1488 + version = "0.4.1" 1470 1489 source = "registry+https://github.com/rust-lang/crates.io-index" 1471 - checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba" 1490 + checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65" 1472 1491 dependencies = [ 1473 1492 "cfg-if", 1474 1493 "libc", 1475 - "windows 0.52.0", 1494 + "windows-link", 1476 1495 ] 1477 1496 1478 1497 [[package]] ··· 1553 1572 1554 1573 [[package]] 1555 1574 name = "hyper-util" 1556 - version = "0.1.10" 1575 + version = "0.1.11" 1557 1576 source = "registry+https://github.com/rust-lang/crates.io-index" 1558 - checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" 1577 + checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2" 1559 1578 dependencies = [ 1560 1579 "bytes", 1561 1580 "futures-channel", ··· 1563 1582 "http", 1564 1583 "http-body", 1565 1584 "hyper", 1585 + "libc", 1566 1586 "pin-project-lite", 1567 1587 "socket2", 1568 1588 "tokio", ··· 1572 1592 1573 1593 [[package]] 1574 1594 name = "iana-time-zone" 1575 - version = "0.1.62" 1595 + version = "0.1.63" 1576 1596 source = "registry+https://github.com/rust-lang/crates.io-index" 1577 - checksum = "b2fd658b06e56721792c5df4475705b6cda790e9298d19d2f8af083457bcd127" 1597 + checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" 1578 1598 dependencies = [ 1579 1599 "android_system_properties", 1580 1600 "core-foundation-sys", ··· 1582 1602 "js-sys", 1583 1603 "log", 1584 1604 "wasm-bindgen", 1585 - "windows-core 0.52.0", 1605 + "windows-core 0.61.0", 1586 1606 ] 1587 1607 1588 1608 [[package]] ··· 1752 1772 1753 1773 [[package]] 1754 1774 name = "indexmap" 1755 - version = "2.8.0" 1775 + version = "2.9.0" 1756 1776 source = "registry+https://github.com/rust-lang/crates.io-index" 1757 - checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" 1777 + checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" 1758 1778 dependencies = [ 1759 1779 "equivalent", 1760 1780 "hashbrown 0.15.2", ··· 1850 1870 1851 1871 [[package]] 1852 1872 name = "jiff" 1853 - version = "0.2.5" 1873 + version = "0.2.6" 1854 1874 source = "registry+https://github.com/rust-lang/crates.io-index" 1855 - checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260" 1875 + checksum = "1f33145a5cbea837164362c7bd596106eb7c5198f97d1ba6f6ebb3223952e488" 1856 1876 dependencies = [ 1857 1877 "jiff-static", 1858 1878 "log", ··· 1863 1883 1864 1884 [[package]] 1865 1885 name = "jiff-static" 1866 - version = "0.2.5" 1886 + version = "0.2.6" 1867 1887 source = "registry+https://github.com/rust-lang/crates.io-index" 1868 - checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c" 1888 + checksum = "43ce13c40ec6956157a3635d97a1ee2df323b263f09ea14165131289cb0f5c19" 1869 1889 dependencies = [ 1870 1890 "proc-macro2", 1871 1891 "quote", ··· 1874 1894 1875 1895 [[package]] 1876 1896 name = "jobserver" 1877 - version = "0.1.32" 1897 + version = "0.1.33" 1878 1898 source = "registry+https://github.com/rust-lang/crates.io-index" 1879 - checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" 1899 + checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" 1880 1900 dependencies = [ 1901 + "getrandom 0.3.2", 1881 1902 "libc", 1882 1903 ] 1883 1904 ··· 1919 1940 checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" 1920 1941 1921 1942 [[package]] 1943 + name = "libfuzzer-sys" 1944 + version = "0.4.9" 1945 + source = "registry+https://github.com/rust-lang/crates.io-index" 1946 + checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" 1947 + dependencies = [ 1948 + "arbitrary", 1949 + "cc", 1950 + ] 1951 + 1952 + [[package]] 1922 1953 name = "libloading" 1923 1954 version = "0.8.6" 1924 1955 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2000 2031 2001 2032 [[package]] 2002 2033 name = "linux-raw-sys" 2003 - version = "0.9.3" 2034 + version = "0.9.4" 2004 2035 source = "registry+https://github.com/rust-lang/crates.io-index" 2005 - checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413" 2036 + checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" 2006 2037 2007 2038 [[package]] 2008 2039 name = "litemap" ··· 2050 2081 2051 2082 [[package]] 2052 2083 name = "lsm-tree" 2053 - version = "2.7.0" 2054 - source = "git+https://github.com/fjall-rs/lsm-tree.git?branch=fix%2Flockless-ranges#c1684bdf57488a6195942fde5ea0c756dc0b6035" 2084 + version = "2.8.0" 2085 + source = "registry+https://github.com/rust-lang/crates.io-index" 2086 + checksum = "d0a63a5e98a38b51765274137d8aedfbd848da5f4d016867e186b673fcc06a8c" 2055 2087 dependencies = [ 2056 2088 "byteorder", 2057 2089 "crossbeam-skiplist", ··· 2060 2092 "guardian", 2061 2093 "interval-heap", 2062 2094 "log", 2095 + "lz4_flex", 2063 2096 "path-absolutize", 2064 2097 "quick_cache", 2065 2098 "rustc-hash 2.1.1", ··· 2079 2112 "cc", 2080 2113 "libc", 2081 2114 ] 2115 + 2116 + [[package]] 2117 + name = "lz4_flex" 2118 + version = "0.11.3" 2119 + source = "registry+https://github.com/rust-lang/crates.io-index" 2120 + checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" 2082 2121 2083 2122 [[package]] 2084 2123 name = "mach2" ··· 2142 2181 "http-body-util", 2143 2182 "hyper", 2144 2183 "hyper-util", 2145 - "indexmap 2.8.0", 2184 + "indexmap 2.9.0", 2146 2185 "ipnet", 2147 2186 "metrics", 2148 2187 "metrics-util", ··· 2165 2204 "once_cell", 2166 2205 "procfs", 2167 2206 "rlimit", 2168 - "windows 0.58.0", 2207 + "windows", 2169 2208 ] 2170 2209 2171 2210 [[package]] ··· 2208 2247 2209 2248 [[package]] 2210 2249 name = "miniz_oxide" 2211 - version = "0.8.5" 2250 + version = "0.8.8" 2212 2251 source = "registry+https://github.com/rust-lang/crates.io-index" 2213 - checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" 2252 + checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" 2214 2253 dependencies = [ 2215 2254 "adler2", 2216 2255 ] ··· 2381 2420 2382 2421 [[package]] 2383 2422 name = "once_cell" 2384 - version = "1.21.1" 2423 + version = "1.21.3" 2385 2424 source = "registry+https://github.com/rust-lang/crates.io-index" 2386 - checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" 2425 + checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 2387 2426 2388 2427 [[package]] 2389 2428 name = "openapiv3" ··· 2391 2430 source = "registry+https://github.com/rust-lang/crates.io-index" 2392 2431 checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" 2393 2432 dependencies = [ 2394 - "indexmap 2.8.0", 2433 + "indexmap 2.9.0", 2395 2434 "serde", 2396 2435 "serde_json", 2397 2436 ] ··· 2430 2469 2431 2470 [[package]] 2432 2471 name = "openssl-src" 2433 - version = "300.4.2+3.4.1" 2472 + version = "300.5.0+3.5.0" 2434 2473 source = "registry+https://github.com/rust-lang/crates.io-index" 2435 - checksum = "168ce4e058f975fe43e89d9ccf78ca668601887ae736090aacc23ae353c298e2" 2474 + checksum = "e8ce546f549326b0e6052b649198487d91320875da901e7bd11a06d1ee3f9c2f" 2436 2475 dependencies = [ 2437 2476 "cc", 2438 2477 ] ··· 2725 2764 2726 2765 [[package]] 2727 2766 name = "redox_syscall" 2728 - version = "0.5.10" 2767 + version = "0.5.11" 2729 2768 source = "registry+https://github.com/rust-lang/crates.io-index" 2730 - checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" 2769 + checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" 2731 2770 dependencies = [ 2732 2771 "bitflags", 2733 2772 ] ··· 2882 2921 2883 2922 [[package]] 2884 2923 name = "rustix" 2885 - version = "1.0.3" 2924 + version = "1.0.5" 2886 2925 source = "registry+https://github.com/rust-lang/crates.io-index" 2887 - checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96" 2926 + checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf" 2888 2927 dependencies = [ 2889 2928 "bitflags", 2890 2929 "errno", 2891 2930 "libc", 2892 - "linux-raw-sys 0.9.3", 2931 + "linux-raw-sys 0.9.4", 2893 2932 "windows-sys 0.59.0", 2894 2933 ] 2895 2934 ··· 3073 3112 checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4" 3074 3113 dependencies = [ 3075 3114 "form_urlencoded", 3076 - "indexmap 2.8.0", 3115 + "indexmap 2.9.0", 3077 3116 "itoa", 3078 3117 "ryu", 3079 3118 "serde", ··· 3144 3183 "chrono", 3145 3184 "hex", 3146 3185 "indexmap 1.9.3", 3147 - "indexmap 2.8.0", 3186 + "indexmap 2.9.0", 3148 3187 "serde", 3149 3188 "serde_derive", 3150 3189 "serde_json", ··· 3271 3310 3272 3311 [[package]] 3273 3312 name = "smallvec" 3274 - version = "1.14.0" 3313 + version = "1.15.0" 3275 3314 source = "registry+https://github.com/rust-lang/crates.io-index" 3276 - checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" 3315 + checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" 3277 3316 3278 3317 [[package]] 3279 3318 name = "socket2" 3280 - version = "0.5.8" 3319 + version = "0.5.9" 3281 3320 source = "registry+https://github.com/rust-lang/crates.io-index" 3282 - checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" 3321 + checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" 3283 3322 dependencies = [ 3284 3323 "libc", 3285 3324 "windows-sys 0.52.0", ··· 3367 3406 "fastrand", 3368 3407 "getrandom 0.3.2", 3369 3408 "once_cell", 3370 - "rustix 1.0.3", 3409 + "rustix 1.0.5", 3371 3410 "windows-sys 0.59.0", 3372 3411 ] 3373 3412 ··· 3605 3644 source = "registry+https://github.com/rust-lang/crates.io-index" 3606 3645 checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" 3607 3646 dependencies = [ 3608 - "indexmap 2.8.0", 3647 + "indexmap 2.9.0", 3609 3648 "serde", 3610 3649 "serde_spanned", 3611 3650 "toml_datetime", ··· 3751 3790 version = "0.1.0" 3752 3791 dependencies = [ 3753 3792 "anyhow", 3793 + "async-trait", 3754 3794 "bincode 2.0.1", 3795 + "cardinality-estimator", 3755 3796 "clap", 3756 3797 "dropshot", 3757 3798 "env_logger", ··· 3763 3804 "semver", 3764 3805 "serde", 3765 3806 "serde_json", 3807 + "tempfile", 3766 3808 "thiserror 2.0.12", 3767 3809 "tikv-jemallocator", 3768 3810 "tokio", 3769 3811 ] 3770 3812 3771 3813 [[package]] 3814 + name = "ufos-fuzz" 3815 + version = "0.0.0" 3816 + dependencies = [ 3817 + "bincode 2.0.1", 3818 + "cardinality-estimator", 3819 + "jetstream", 3820 + "libfuzzer-sys", 3821 + "tikv-jemallocator", 3822 + "ufos", 3823 + ] 3824 + 3825 + [[package]] 3772 3826 name = "unicase" 3773 3827 version = "2.8.1" 3774 3828 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3851 3905 3852 3906 [[package]] 3853 3907 name = "value-log" 3854 - version = "1.7.2" 3908 + version = "1.8.0" 3855 3909 source = "registry+https://github.com/rust-lang/crates.io-index" 3856 - checksum = "d65573c63cf768179763226edb8d614d8b314130a3f50422d6d375d3947c529f" 3910 + checksum = "fd29b17c041f94e0885179637289815cd038f0c9fc19c4549d5a97017404fb7d" 3857 3911 dependencies = [ 3858 3912 "byteorder", 3859 - "bytes", 3860 3913 "byteview", 3861 3914 "interval-heap", 3862 3915 "log", 3863 3916 "path-absolutize", 3864 - "quick_cache", 3865 3917 "rustc-hash 2.1.1", 3866 3918 "tempfile", 3867 3919 "varint-rs", ··· 4027 4079 4028 4080 [[package]] 4029 4081 name = "windows" 4030 - version = "0.52.0" 4082 + version = "0.58.0" 4031 4083 source = "registry+https://github.com/rust-lang/crates.io-index" 4032 - checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" 4084 + checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" 4033 4085 dependencies = [ 4034 - "windows-core 0.52.0", 4086 + "windows-core 0.58.0", 4035 4087 "windows-targets", 4036 4088 ] 4037 4089 4038 4090 [[package]] 4039 - name = "windows" 4091 + name = "windows-core" 4040 4092 version = "0.58.0" 4041 4093 source = "registry+https://github.com/rust-lang/crates.io-index" 4042 - checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" 4094 + checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" 4043 4095 dependencies = [ 4044 - "windows-core 0.58.0", 4096 + "windows-implement 0.58.0", 4097 + "windows-interface 0.58.0", 4098 + "windows-result 0.2.0", 4099 + "windows-strings 0.1.0", 4045 4100 "windows-targets", 4046 4101 ] 4047 4102 4048 4103 [[package]] 4049 4104 name = "windows-core" 4050 - version = "0.52.0" 4105 + version = "0.61.0" 4051 4106 source = "registry+https://github.com/rust-lang/crates.io-index" 4052 - checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" 4107 + checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" 4053 4108 dependencies = [ 4054 - "windows-targets", 4109 + "windows-implement 0.60.0", 4110 + "windows-interface 0.59.1", 4111 + "windows-link", 4112 + "windows-result 0.3.2", 4113 + "windows-strings 0.4.0", 4055 4114 ] 4056 4115 4057 4116 [[package]] 4058 - name = "windows-core" 4117 + name = "windows-implement" 4059 4118 version = "0.58.0" 4060 4119 source = "registry+https://github.com/rust-lang/crates.io-index" 4061 - checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" 4120 + checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" 4062 4121 dependencies = [ 4063 - "windows-implement", 4064 - "windows-interface", 4065 - "windows-result", 4066 - "windows-strings", 4067 - "windows-targets", 4122 + "proc-macro2", 4123 + "quote", 4124 + "syn", 4068 4125 ] 4069 4126 4070 4127 [[package]] 4071 4128 name = "windows-implement" 4072 - version = "0.58.0" 4129 + version = "0.60.0" 4073 4130 source = "registry+https://github.com/rust-lang/crates.io-index" 4074 - checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" 4131 + checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" 4075 4132 dependencies = [ 4076 4133 "proc-macro2", 4077 4134 "quote", ··· 4090 4147 ] 4091 4148 4092 4149 [[package]] 4150 + name = "windows-interface" 4151 + version = "0.59.1" 4152 + source = "registry+https://github.com/rust-lang/crates.io-index" 4153 + checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" 4154 + dependencies = [ 4155 + "proc-macro2", 4156 + "quote", 4157 + "syn", 4158 + ] 4159 + 4160 + [[package]] 4093 4161 name = "windows-link" 4094 4162 version = "0.1.1" 4095 4163 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4105 4173 ] 4106 4174 4107 4175 [[package]] 4176 + name = "windows-result" 4177 + version = "0.3.2" 4178 + source = "registry+https://github.com/rust-lang/crates.io-index" 4179 + checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" 4180 + dependencies = [ 4181 + "windows-link", 4182 + ] 4183 + 4184 + [[package]] 4108 4185 name = "windows-strings" 4109 4186 version = "0.1.0" 4110 4187 source = "registry+https://github.com/rust-lang/crates.io-index" 4111 4188 checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" 4112 4189 dependencies = [ 4113 - "windows-result", 4190 + "windows-result 0.2.0", 4114 4191 "windows-targets", 4192 + ] 4193 + 4194 + [[package]] 4195 + name = "windows-strings" 4196 + version = "0.4.0" 4197 + source = "registry+https://github.com/rust-lang/crates.io-index" 4198 + checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" 4199 + dependencies = [ 4200 + "windows-link", 4115 4201 ] 4116 4202 4117 4203 [[package]] ··· 4198 4284 4199 4285 [[package]] 4200 4286 name = "winnow" 4201 - version = "0.7.4" 4287 + version = "0.7.6" 4202 4288 source = "registry+https://github.com/rust-lang/crates.io-index" 4203 - checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" 4289 + checksum = "63d3fcd9bba44b03821e7d699eeee959f3126dcc4aa8e4ae18ec617c2a5cea10" 4204 4290 dependencies = [ 4205 4291 "memchr", 4206 4292 ] ··· 4225 4311 version = "0.5.5" 4226 4312 source = "registry+https://github.com/rust-lang/crates.io-index" 4227 4313 checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" 4314 + 4315 + [[package]] 4316 + name = "wyhash" 4317 + version = "0.5.0" 4318 + source = "registry+https://github.com/rust-lang/crates.io-index" 4319 + checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" 4320 + dependencies = [ 4321 + "rand_core 0.6.4", 4322 + ] 4228 4323 4229 4324 [[package]] 4230 4325 name = "xxhash-rust"
+1 -3
Cargo.toml
··· 5 5 "constellation", 6 6 "jetstream", 7 7 "ufos", 8 + "ufos/fuzz", 8 9 ] 9 - 10 - [patch.crates-io] 11 - lsm-tree = { git = "https://github.com/fjall-rs/lsm-tree.git", branch = "fix/lockless-ranges" }
+1 -1
constellation/templates/base.html.j2
··· 55 55 </style> 56 56 </head> 57 57 <body class="{% block body_classes %}{% endblock %}"> 58 - <h1><a href="/">This</a> is a <a href="https://github.com/at-ucosm/links/tree/main/constellation">constellation 🌌</a> API server from <a href="https://github.com/at-microcosm">microcosm</a> ✨</h1> 58 + <h1><a href="/">This</a> is a <a href="https://github.com/at-microcosm/links/tree/main/constellation">constellation 🌌</a> API server from <a href="https://github.com/at-microcosm">microcosm</a> ✨</h1> 59 59 {% block content %}{% endblock %} 60 60 61 61 <footer>
+2 -2
jetstream/Cargo.toml
··· 10 10 11 11 [dependencies] 12 12 async-trait = "0.1.83" 13 - atrium-api = { version = "0.25", default-features = false, features = [ 13 + atrium-api = { version = "0.25.2", default-features = false, features = [ 14 14 "namespace-appbsky", 15 15 ] } 16 16 tokio = { version = "1.44.2", features = ["full", "sync", "time"] } ··· 22 22 futures-util = "0.3.31" 23 23 url = "2.5.4" 24 24 serde = { version = "1.0.215", features = ["derive"] } 25 - serde_json = "1.0.132" 25 + serde_json = { version = "1.0.140", features = ["raw_value"] } 26 26 chrono = "0.4.38" 27 27 zstd = "0.13.2" 28 28 thiserror = "2.0.3"
+22 -5
jetstream/examples/arbitrary_record.rs
··· 5 5 use clap::Parser; 6 6 use jetstream::{ 7 7 events::{ 8 - commit::CommitEvent, 9 - JetstreamEvent::Commit, 8 + CommitOp, 9 + EventKind, 10 + JetstreamEvent, 10 11 }, 11 12 DefaultJetstreamEndpoints, 12 13 JetstreamCompression, ··· 30 31 let args = Args::parse(); 31 32 32 33 let dids = args.did.unwrap_or_default(); 33 - let config: JetstreamConfig<serde_json::Value> = JetstreamConfig { 34 + let config: JetstreamConfig = JetstreamConfig { 34 35 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), 35 36 wanted_collections: vec![args.nsid.clone()], 36 37 wanted_dids: dids.clone(), ··· 48 49 ); 49 50 50 51 while let Some(event) = receiver.recv().await { 51 - if let Commit(CommitEvent::CreateOrUpdate { commit, .. }) = event { 52 - println!("got record: {:?}", commit.record); 52 + if let JetstreamEvent { 53 + kind: EventKind::Commit, 54 + commit: Some(commit), 55 + .. 56 + } = event 57 + { 58 + if commit.collection != args.nsid { 59 + continue; 60 + } 61 + if !(commit.operation == CommitOp::Create || commit.operation == CommitOp::Update) { 62 + continue; 63 + } 64 + let Some(rec) = commit.record else { continue }; 65 + println!( 66 + "New or updated record! ({})\n{:?}\n", 67 + commit.rkey.as_str(), 68 + rec.get() 69 + ); 53 70 } 54 71 } 55 72
+20 -31
jetstream/examples/basic.rs
··· 7 7 use clap::Parser; 8 8 use jetstream::{ 9 9 events::{ 10 - commit::{ 11 - CommitEvent, 12 - CommitType, 13 - }, 14 - JetstreamEvent::Commit, 10 + CommitEvent, 11 + CommitOp, 12 + EventKind, 13 + JetstreamEvent, 15 14 }, 16 15 DefaultJetstreamEndpoints, 17 16 JetstreamCompression, ··· 25 24 /// The DIDs to listen for events on, if not provided we will listen for all DIDs. 26 25 #[arg(short, long)] 27 26 did: Option<Vec<string::Did>>, 28 - /// The NSID for the collection to listen for (e.g. `app.bsky.feed.post`). 29 - #[arg(short, long)] 30 - nsid: string::Nsid, 31 27 } 32 28 33 29 #[tokio::main] ··· 37 33 let dids = args.did.unwrap_or_default(); 38 34 let config = JetstreamConfig { 39 35 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), 40 - wanted_collections: vec![args.nsid.clone()], 36 + wanted_collections: vec![string::Nsid::new("app.bsky.feed.post".to_string()).unwrap()], 41 37 wanted_dids: dids.clone(), 42 38 compression: JetstreamCompression::Zstd, 43 39 ..Default::default() ··· 46 42 let jetstream = JetstreamConnector::new(config)?; 47 43 let mut receiver = jetstream.connect().await?; 48 44 49 - println!( 50 - "Listening for '{}' events on DIDs: {:?}", 51 - args.nsid.as_str(), 52 - dids 53 - ); 45 + println!("Listening for 'app.bsky.feed.post' events on DIDs: {dids:?}"); 54 46 55 47 while let Some(event) = receiver.recv().await { 56 - if let Commit(commit) = event { 57 - match commit { 58 - CommitEvent::CreateOrUpdate { info: _, commit } 59 - if commit.info.operation == CommitType::Create => 60 - { 61 - if let AppBskyFeedPost(record) = commit.record { 62 - println!( 63 - "New post created! ({})\n\n'{}'", 64 - commit.info.rkey.as_str(), 65 - record.text 66 - ); 67 - } 68 - } 69 - CommitEvent::Delete { info: _, commit } => { 70 - println!("A post has been deleted. ({})", commit.rkey.as_str()); 71 - } 72 - _ => {} 48 + if let JetstreamEvent { 49 + kind: EventKind::Commit, 50 + commit: 51 + Some(CommitEvent { 52 + operation: CommitOp::Create, 53 + rkey, 54 + record: Some(record), 55 + .. 56 + }), 57 + .. 58 + } = event 59 + { 60 + if let Ok(AppBskyFeedPost(rec)) = serde_json::from_str(record.get()) { 61 + println!("New post created! ({})\n{:?}\n", rkey.as_str(), rec.text); 73 62 } 74 63 } 75 64 }
+205
jetstream/src/events.rs
··· 1 + use std::time::{ 2 + Duration, 3 + SystemTime, 4 + SystemTimeError, 5 + UNIX_EPOCH, 6 + }; 7 + 8 + use chrono::Utc; 9 + use serde::{ 10 + Deserialize, 11 + Serialize, 12 + }; 13 + use serde_json::value::RawValue; 14 + 15 + use crate::exports; 16 + 17 + /// Opaque wrapper for the time_us cursor used by jetstream 18 + #[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, PartialOrd)] 19 + pub struct Cursor(u64); 20 + 21 + #[derive(Debug, Deserialize)] 22 + #[serde(rename_all = "snake_case")] 23 + pub struct JetstreamEvent { 24 + #[serde(rename = "time_us")] 25 + pub cursor: Cursor, 26 + pub did: exports::Did, 27 + pub kind: EventKind, 28 + pub commit: Option<CommitEvent>, 29 + pub identity: Option<IdentityEvent>, 30 + pub account: Option<AccountEvent>, 31 + } 32 + 33 + #[derive(Debug, Deserialize, PartialEq)] 34 + #[serde(rename_all = "snake_case")] 35 + pub enum EventKind { 36 + Commit, 37 + Identity, 38 + Account, 39 + } 40 + 41 + #[derive(Debug, Deserialize)] 42 + #[serde(rename_all = "snake_case")] 43 + pub struct CommitEvent { 44 + pub collection: exports::Nsid, 45 + pub rkey: exports::RecordKey, 46 + pub rev: String, 47 + pub operation: CommitOp, 48 + pub record: Option<Box<RawValue>>, 49 + pub cid: Option<exports::Cid>, 50 + } 51 + 52 + #[derive(Debug, Deserialize, PartialEq)] 53 + #[serde(rename_all = "snake_case")] 54 + pub enum CommitOp { 55 + Create, 56 + Update, 57 + Delete, 58 + } 59 + 60 + #[derive(Debug, Deserialize, PartialEq)] 61 + pub struct IdentityEvent { 62 + pub did: exports::Did, 63 + pub handle: Option<exports::Handle>, 64 + pub seq: u64, 65 + pub time: chrono::DateTime<Utc>, 66 + } 67 + 68 + #[derive(Debug, Deserialize, PartialEq)] 69 + pub struct AccountEvent { 70 + pub active: bool, 71 + pub did: exports::Did, 72 + pub seq: u64, 73 + pub time: chrono::DateTime<Utc>, 74 + pub status: Option<String>, 75 + } 76 + 77 + impl Cursor { 78 + /// Get a cursor that will consume all available jetstream replay 79 + /// 80 + /// This sets the cursor to zero. 81 + /// 82 + /// Jetstream instances typically only have a few days of replay. 83 + pub fn from_start() -> Self { 84 + Self(0) 85 + } 86 + /// Get a cursor for a specific time 87 + /// 88 + /// Panics: if t is older than the unix epoch: Jan 1, 1970. 89 + /// 90 + /// If you want to receive all available jetstream replay (typically a few days), use 91 + /// .from_start() 92 + /// 93 + /// Warning: this exploits the internal implementation detail of jetstream cursors 94 + /// being ~microsecond timestamps. 95 + pub fn at(t: SystemTime) -> Self { 96 + let unix_dt = t 97 + .duration_since(UNIX_EPOCH) 98 + .expect("cannot set jetstream cursor earlier than unix epoch"); 99 + Self(unix_dt.as_micros() as u64) 100 + } 101 + /// Get a cursor rewound from now by this amount 102 + /// 103 + /// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970. 104 + /// 105 + /// Jetstream instances typically only have a few days of replay. 106 + /// 107 + /// Warning: this exploits the internal implementation detail of jetstream cursors 108 + /// being ~microsecond timestamps. 109 + pub fn back_by(d: Duration) -> Self { 110 + Self::at(SystemTime::now() - d) 111 + } 112 + /// Get a Cursor from a raw u64 113 + /// 114 + /// For example, from a jetstream event's `time_us` field. 115 + pub fn from_raw_u64(time_us: u64) -> Self { 116 + Self(time_us) 117 + } 118 + /// Get the raw u64 value from this cursor. 119 + pub fn to_raw_u64(&self) -> u64 { 120 + self.0 121 + } 122 + /// Format the cursor value for use in a jetstream connection url querystring 123 + pub fn to_jetstream(&self) -> String { 124 + self.0.to_string() 125 + } 126 + /// Compute the time span since an earlier cursor or [SystemTime] 127 + /// 128 + /// Warning: this exploits the internal implementation detail of jetstream cursors 129 + /// being ~microsecond timestamps. 130 + pub fn duration_since( 131 + &self, 132 + earlier: impl Into<SystemTime>, 133 + ) -> Result<Duration, SystemTimeError> { 134 + let t: SystemTime = self.into(); 135 + t.duration_since(earlier.into()) 136 + } 137 + /// Compute the age of the cursor vs the local clock 138 + /// 139 + /// Warning: this exploits the internal implementation detail of jetstream cursors 140 + pub fn elapsed(&self) -> Result<Duration, SystemTimeError> { 141 + let t: SystemTime = self.into(); 142 + t.elapsed() 143 + } 144 + /// Get the immediate next cursor value 145 + /// 146 + /// This is possible for the implementation of jetstream cursors 147 + pub fn next(&self) -> Cursor { 148 + Self(self.0 + 1) 149 + } 150 + } 151 + 152 + impl From<&Cursor> for SystemTime { 153 + /// Convert a cursor directly to a [SystemTime] 154 + /// 155 + /// Warning: this exploits the internal implementation detail of jetstream cursors 156 + /// being ~microsecond timestamps. 157 + fn from(c: &Cursor) -> Self { 158 + UNIX_EPOCH + Duration::from_micros(c.0) 159 + } 160 + } 161 + 162 + #[cfg(test)] 163 + mod test { 164 + use super::*; 165 + 166 + #[test] 167 + fn test_parse_commit_event() -> anyhow::Result<()> { 168 + let json = r#"{ 169 + "rev":"3llrdsginou2i", 170 + "operation":"create", 171 + "collection":"app.bsky.feed.post", 172 + "rkey":"3llrdsglqdc2s", 173 + "cid": "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy", 174 + "record": {"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"} 175 + }"#; 176 + let commit: CommitEvent = serde_json::from_str(json)?; 177 + assert_eq!( 178 + commit.cid.unwrap(), 179 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy".parse()? 180 + ); 181 + assert_eq!( 182 + commit.record.unwrap().get(), 183 + r#"{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}"# 184 + ); 185 + Ok(()) 186 + } 187 + 188 + #[test] 189 + fn test_parse_whole_event() -> anyhow::Result<()> { 190 + let json = r#"{"did":"did:plc:ai3dzf35cth7s3st7n7jsd7r","time_us":1743526687419798,"kind":"commit","commit":{"rev":"3llrdsginou2i","operation":"create","collection":"app.bsky.feed.post","rkey":"3llrdsglqdc2s","record":{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"},"cid":"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"}}"#; 191 + let event: JetstreamEvent = serde_json::from_str(json)?; 192 + assert_eq!(event.kind, EventKind::Commit); 193 + assert!(event.commit.is_some()); 194 + let commit = event.commit.unwrap(); 195 + assert_eq!( 196 + commit.cid.unwrap(), 197 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy".parse()? 198 + ); 199 + assert_eq!( 200 + commit.record.unwrap().get(), 201 + r#"{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}"# 202 + ); 203 + Ok(()) 204 + } 205 + }
-40
jetstream/src/events/account.rs
··· 1 - use chrono::Utc; 2 - use serde::Deserialize; 3 - 4 - use crate::{ 5 - events::EventInfo, 6 - exports, 7 - }; 8 - 9 - /// An event representing a change to an account. 10 - #[derive(Deserialize, Debug)] 11 - pub struct AccountEvent { 12 - /// Basic metadata included with every event. 13 - #[serde(flatten)] 14 - pub info: EventInfo, 15 - /// Account specific data bundled with this event. 16 - pub account: AccountData, 17 - } 18 - 19 - /// Account specific data bundled with an account event. 20 - #[derive(Deserialize, Debug)] 21 - pub struct AccountData { 22 - /// Whether the account is currently active. 23 - pub active: bool, 24 - /// The DID of the account. 25 - pub did: exports::Did, 26 - pub seq: u64, 27 - pub time: chrono::DateTime<Utc>, 28 - /// If `active` is `false` this will be present to explain why the account is inactive. 29 - pub status: Option<AccountStatus>, 30 - } 31 - 32 - /// The possible reasons an account might be listed as inactive. 33 - #[derive(Deserialize, Debug)] 34 - #[serde(rename_all = "lowercase")] 35 - pub enum AccountStatus { 36 - Deactivated, 37 - Deleted, 38 - Suspended, 39 - TakenDown, 40 - }
-55
jetstream/src/events/commit.rs
··· 1 - use serde::Deserialize; 2 - 3 - use crate::{ 4 - events::EventInfo, 5 - exports, 6 - }; 7 - 8 - /// An event representing a repo commit, which can be a `create`, `update`, or `delete` operation. 9 - #[derive(Deserialize, Debug)] 10 - #[serde(untagged, rename_all = "snake_case")] 11 - pub enum CommitEvent<R> { 12 - CreateOrUpdate { 13 - #[serde(flatten)] 14 - info: EventInfo, 15 - commit: CommitData<R>, 16 - }, 17 - Delete { 18 - #[serde(flatten)] 19 - info: EventInfo, 20 - commit: CommitInfo, 21 - }, 22 - } 23 - 24 - /// The type of commit operation that was performed. 25 - #[derive(Deserialize, Debug, PartialEq)] 26 - #[serde(rename_all = "snake_case")] 27 - pub enum CommitType { 28 - Create, 29 - Update, 30 - Delete, 31 - } 32 - 33 - /// Basic commit specific info bundled with every event, also the only data included with a `delete` 34 - /// operation. 35 - #[derive(Deserialize, Debug)] 36 - pub struct CommitInfo { 37 - /// The type of commit operation that was performed. 38 - pub operation: CommitType, 39 - pub rev: String, 40 - pub rkey: exports::RecordKey, 41 - /// The NSID of the record type that this commit is associated with. 42 - pub collection: exports::Nsid, 43 - } 44 - 45 - /// Detailed data bundled with a commit event. This data is only included when the event is 46 - /// `create` or `update`. 47 - #[derive(Deserialize, Debug)] 48 - pub struct CommitData<R> { 49 - #[serde(flatten)] 50 - pub info: CommitInfo, 51 - /// The CID of the record that was operated on. 52 - pub cid: exports::Cid, 53 - /// The record that was operated on. 54 - pub record: R, 55 - }
-28
jetstream/src/events/identity.rs
··· 1 - use chrono::Utc; 2 - use serde::Deserialize; 3 - 4 - use crate::{ 5 - events::EventInfo, 6 - exports, 7 - }; 8 - 9 - /// An event representing a change to an identity. 10 - #[derive(Deserialize, Debug)] 11 - pub struct IdentityEvent { 12 - /// Basic metadata included with every event. 13 - #[serde(flatten)] 14 - pub info: EventInfo, 15 - /// Identity specific data bundled with this event. 16 - pub identity: IdentityData, 17 - } 18 - 19 - /// Identity specific data bundled with an identity event. 20 - #[derive(Deserialize, Debug)] 21 - pub struct IdentityData { 22 - /// The DID of the identity. 23 - pub did: exports::Did, 24 - /// The handle associated with the identity. 25 - pub handle: Option<exports::Handle>, 26 - pub seq: u64, 27 - pub time: chrono::DateTime<Utc>, 28 - }
-138
jetstream/src/events/mod.rs
··· 1 - pub mod account; 2 - pub mod commit; 3 - pub mod identity; 4 - 5 - use std::time::{ 6 - Duration, 7 - SystemTime, 8 - SystemTimeError, 9 - UNIX_EPOCH, 10 - }; 11 - 12 - use serde::Deserialize; 13 - 14 - use crate::exports; 15 - 16 - /// Opaque wrapper for the time_us cursor used by jetstream 17 - /// 18 - /// Generally, you should use a cursor 19 - #[derive(Deserialize, Debug, Clone, PartialEq, PartialOrd)] 20 - pub struct Cursor(u64); 21 - 22 - /// Basic data that is included with every event. 23 - #[derive(Deserialize, Debug)] 24 - pub struct EventInfo { 25 - pub did: exports::Did, 26 - pub time_us: Cursor, 27 - pub kind: EventKind, 28 - } 29 - 30 - #[derive(Deserialize, Debug)] 31 - #[serde(untagged)] 32 - pub enum JetstreamEvent<R> { 33 - Commit(commit::CommitEvent<R>), 34 - Identity(identity::IdentityEvent), 35 - Account(account::AccountEvent), 36 - } 37 - 38 - #[derive(Deserialize, Debug)] 39 - #[serde(rename_all = "snake_case")] 40 - pub enum EventKind { 41 - Commit, 42 - Identity, 43 - Account, 44 - } 45 - 46 - impl<R> JetstreamEvent<R> { 47 - pub fn cursor(&self) -> Cursor { 48 - match self { 49 - JetstreamEvent::Commit(commit::CommitEvent::CreateOrUpdate { info, .. }) => { 50 - info.time_us.clone() 51 - } 52 - JetstreamEvent::Commit(commit::CommitEvent::Delete { info, .. }) => { 53 - info.time_us.clone() 54 - } 55 - JetstreamEvent::Identity(e) => e.info.time_us.clone(), 56 - JetstreamEvent::Account(e) => e.info.time_us.clone(), 57 - } 58 - } 59 - } 60 - 61 - impl Cursor { 62 - /// Get a cursor that will consume all available jetstream replay 63 - /// 64 - /// This sets the cursor to zero. 65 - /// 66 - /// Jetstream instances typically only have a few days of replay. 67 - pub fn from_start() -> Self { 68 - Self(0) 69 - } 70 - /// Get a cursor for a specific time 71 - /// 72 - /// Panics: if t is older than the unix epoch: Jan 1, 1970. 73 - /// 74 - /// If you want to receive all available jetstream replay (typically a few days), use 75 - /// .from_start() 76 - /// 77 - /// Warning: this exploits the internal implementation detail of jetstream cursors 78 - /// being ~microsecond timestamps. 79 - pub fn at(t: SystemTime) -> Self { 80 - let unix_dt = t 81 - .duration_since(UNIX_EPOCH) 82 - .expect("cannot set jetstream cursor earlier than unix epoch"); 83 - Self(unix_dt.as_micros() as u64) 84 - } 85 - /// Get a cursor rewound from now by this amount 86 - /// 87 - /// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970. 88 - /// 89 - /// Jetstream instances typically only have a few days of replay. 90 - /// 91 - /// Warning: this exploits the internal implementation detail of jetstream cursors 92 - /// being ~microsecond timestamps. 93 - pub fn back_by(d: Duration) -> Self { 94 - Self::at(SystemTime::now() - d) 95 - } 96 - /// Get a Cursor from a raw u64 97 - /// 98 - /// For example, from a jetstream event's `time_us` field. 99 - pub fn from_raw_u64(time_us: u64) -> Self { 100 - Self(time_us) 101 - } 102 - /// Get the raw u64 value from this cursor. 103 - pub fn to_raw_u64(&self) -> u64 { 104 - self.0 105 - } 106 - /// Format the cursor value for use in a jetstream connection url querystring 107 - pub fn to_jetstream(&self) -> String { 108 - self.0.to_string() 109 - } 110 - /// Compute the time span since an earlier cursor or [SystemTime] 111 - /// 112 - /// Warning: this exploits the internal implementation detail of jetstream cursors 113 - /// being ~microsecond timestamps. 114 - pub fn duration_since( 115 - &self, 116 - earlier: impl Into<SystemTime>, 117 - ) -> Result<Duration, SystemTimeError> { 118 - let t: SystemTime = self.into(); 119 - t.duration_since(earlier.into()) 120 - } 121 - /// Compute the age of the cursor vs the local clock 122 - /// 123 - /// Warning: this exploits the internal implementation detail of jetstream cursors 124 - pub fn elapsed(&self) -> Result<Duration, SystemTimeError> { 125 - let t: SystemTime = self.into(); 126 - t.elapsed() 127 - } 128 - } 129 - 130 - impl From<&Cursor> for SystemTime { 131 - /// Convert a cursor directly to a [SystemTime] 132 - /// 133 - /// Warning: this exploits the internal implementation detail of jetstream cursors 134 - /// being ~microsecond timestamps. 135 - fn from(c: &Cursor) -> Self { 136 - UNIX_EPOCH + Duration::from_micros(c.0) 137 - } 138 - }
+21 -40
jetstream/src/lib.rs
··· 3 3 pub mod exports; 4 4 5 5 use std::{ 6 - io::{ 7 - Cursor as IoCursor, 8 - Read, 9 - }, 10 - marker::PhantomData, 6 + io::Cursor as IoCursor, 11 7 time::{ 12 8 Duration, 13 9 Instant, 14 10 }, 15 11 }; 16 12 17 - use atrium_api::record::KnownRecord; 18 13 use futures_util::{ 19 14 stream::StreamExt, 20 15 SinkExt, 21 16 }; 22 - use serde::de::DeserializeOwned; 23 17 use tokio::{ 24 18 net::TcpStream, 25 19 sync::mpsc::{ ··· 124 118 const JETSTREAM_ZSTD_DICTIONARY: &[u8] = include_bytes!("../zstd/dictionary"); 125 119 126 120 /// A receiver channel for consuming Jetstream events. 127 - pub type JetstreamReceiver<R> = Receiver<JetstreamEvent<R>>; 121 + pub type JetstreamReceiver = Receiver<JetstreamEvent>; 128 122 129 123 /// An internal sender channel for sending Jetstream events to [JetstreamReceiver]'s. 130 - type JetstreamSender<R> = Sender<JetstreamEvent<R>>; 124 + type JetstreamSender = Sender<JetstreamEvent>; 131 125 132 126 /// A wrapper connector type for working with a WebSocket connection to a Jetstream instance to 133 127 /// receive and consume events. See [JetstreamConnector::connect] for more info. 134 - pub struct JetstreamConnector<R: DeserializeOwned> { 128 + pub struct JetstreamConnector { 135 129 /// The configuration for the Jetstream connection. 136 - config: JetstreamConfig<R>, 130 + config: JetstreamConfig, 137 131 } 138 132 139 133 pub enum JetstreamCompression { ··· 163 157 } 164 158 } 165 159 166 - pub struct JetstreamConfig<R: DeserializeOwned = KnownRecord> { 160 + pub struct JetstreamConfig { 167 161 /// A Jetstream endpoint to connect to with a WebSocket Scheme i.e. 168 162 /// `wss://jetstream1.us-east.bsky.network/subscribe`. 169 163 pub endpoint: String, ··· 200 194 /// can help prevent that if your consumer sometimes pauses, at a cost of higher memory 201 195 /// usage while events are buffered. 202 196 pub channel_size: usize, 203 - /// Marker for record deserializable type. 204 - /// 205 - /// See examples/arbitrary_record.rs for an example using serde_json::Value 206 - /// 207 - /// You can omit this if you construct `JetstreamConfig { a: b, ..Default::default() }. 208 - /// If you have to specify it, use `std::marker::PhantomData` with no type parameters. 209 - pub record_type: PhantomData<R>, 210 197 } 211 198 212 - impl<R: DeserializeOwned> Default for JetstreamConfig<R> { 199 + impl Default for JetstreamConfig { 213 200 fn default() -> Self { 214 201 JetstreamConfig { 215 202 endpoint: DefaultJetstreamEndpoints::USEastOne.into(), ··· 220 207 omit_user_agent_jetstream_info: false, 221 208 replay_on_reconnect: false, 222 209 channel_size: 4096, // a few seconds of firehose buffer 223 - record_type: PhantomData, 224 210 } 225 211 } 226 212 } 227 213 228 - impl<R: DeserializeOwned> JetstreamConfig<R> { 214 + impl JetstreamConfig { 229 215 /// Constructs a new endpoint URL with the given [JetstreamConfig] applied. 230 216 pub fn get_request_builder( 231 217 &self, ··· 313 299 } 314 300 } 315 301 316 - impl<R: DeserializeOwned + Send + 'static> JetstreamConnector<R> { 302 + impl JetstreamConnector { 317 303 /// Create a Jetstream connector with a valid [JetstreamConfig]. 318 304 /// 319 305 /// After creation, you can call [connect] to connect to the provided Jetstream instance. 320 - pub fn new(config: JetstreamConfig<R>) -> Result<Self, ConfigValidationError> { 306 + pub fn new(config: JetstreamConfig) -> Result<Self, ConfigValidationError> { 321 307 // We validate the configuration here so any issues are caught early. 322 308 config.validate()?; 323 309 Ok(JetstreamConnector { config }) ··· 327 313 /// 328 314 /// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances 329 315 /// of this receiver are dropped, the connection and task are automatically closed. 330 - pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> { 316 + pub async fn connect(&self) -> Result<JetstreamReceiver, ConnectionError> { 331 317 self.connect_cursor(None).await 332 318 } 333 319 ··· 343 329 pub async fn connect_cursor( 344 330 &self, 345 331 cursor: Option<Cursor>, 346 - ) -> Result<JetstreamReceiver<R>, ConnectionError> { 332 + ) -> Result<JetstreamReceiver, ConnectionError> { 347 333 // We validate the config again for good measure. Probably not necessary but it can't hurt. 348 334 self.config 349 335 .validate() ··· 365 351 loop { 366 352 let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY); 367 353 368 - let req = match build_request(connect_cursor.clone()) { 354 + let req = match build_request(connect_cursor) { 369 355 Ok(req) => req, 370 356 Err(e) => { 371 357 log::error!("Could not build jetstream websocket request: {e:?}"); ··· 373 359 } 374 360 }; 375 361 376 - let mut last_cursor = connect_cursor.clone(); 362 + let mut last_cursor = connect_cursor; 377 363 retry_attempt += 1; 378 364 if let Ok((ws_stream, _)) = connect_async(req).await { 379 365 let t_connected = Instant::now(); ··· 424 410 425 411 /// The main task that handles the WebSocket connection and sends [JetstreamEvent]'s to any 426 412 /// receivers that are listening for them. 427 - async fn websocket_task<R: DeserializeOwned>( 413 + async fn websocket_task( 428 414 dictionary: DecoderDictionary<'_>, 429 415 ws: WebSocketStream<MaybeTlsStream<TcpStream>>, 430 - send_channel: JetstreamSender<R>, 416 + send_channel: JetstreamSender, 431 417 last_cursor: &mut Option<Cursor>, 432 418 ) -> Result<(), JetstreamEventError> { 433 419 // TODO: Use the write half to allow the user to change configuration settings on the fly. ··· 439 425 Some(Ok(message)) => { 440 426 match message { 441 427 Message::Text(json) => { 442 - let event: JetstreamEvent<R> = serde_json::from_str(&json) 428 + let event: JetstreamEvent = serde_json::from_str(&json) 443 429 .map_err(JetstreamEventError::ReceivedMalformedJSON)?; 444 - let event_cursor = event.cursor(); 430 + let event_cursor = event.cursor; 445 431 446 432 if let Some(last) = last_cursor { 447 433 if event_cursor <= *last { ··· 464 450 } 465 451 Message::Binary(zstd_json) => { 466 452 let mut cursor = IoCursor::new(zstd_json); 467 - let mut decoder = zstd::stream::Decoder::with_prepared_dictionary( 453 + let decoder = zstd::stream::Decoder::with_prepared_dictionary( 468 454 &mut cursor, 469 455 &dictionary, 470 456 ) 471 457 .map_err(JetstreamEventError::CompressionDictionaryError)?; 472 458 473 - let mut json = String::new(); 474 - decoder 475 - .read_to_string(&mut json) 476 - .map_err(JetstreamEventError::CompressionDecoderError)?; 477 - 478 - let event: JetstreamEvent<R> = serde_json::from_str(&json) 459 + let event: JetstreamEvent = serde_json::from_reader(decoder) 479 460 .map_err(JetstreamEventError::ReceivedMalformedJSON)?; 480 - let event_cursor = event.cursor(); 461 + let event_cursor = event.cursor; 481 462 482 463 if let Some(last) = last_cursor { 483 464 if event_cursor <= *last {
+6 -6
ufos/Cargo.toml
··· 5 5 6 6 [dependencies] 7 7 anyhow = "1.0.97" 8 + async-trait = "0.1.88" 8 9 bincode = { version = "2.0.1", features = ["serde"] } 10 + cardinality-estimator = { version = "1.0.2", features = ["with_serde"] } 9 11 clap = { version = "4.5.31", features = ["derive"] } 10 12 dropshot = "0.16.0" 11 13 env_logger = "0.11.7" 14 + fjall = { version = "2.8.0", features = ["lz4"] } 12 15 jetstream = { path = "../jetstream" } 13 16 log = "0.4.26" 14 17 lsm-tree = "2.6.6" 15 - schemars = "0.8.22" 18 + schemars = { version = "0.8.22", features = ["raw_value"] } 16 19 semver = "1.0.26" 17 20 serde = "1.0.219" 18 21 serde_json = "1.0.140" ··· 22 25 [target.'cfg(not(target_env = "msvc"))'.dependencies] 23 26 tikv-jemallocator = "0.6.0" 24 27 25 - [dependencies.fjall] 26 - git = "https://github.com/fjall-rs/fjall.git" 27 - branch = "fix/lockless-ranges" 28 - features = ["bytes", "single_writer_tx"] 29 - default-features = false 28 + [dev-dependencies] 29 + tempfile = "3.19.1"
+4
ufos/fuzz/.gitignore
··· 1 + target 2 + corpus 3 + artifacts 4 + coverage
+39
ufos/fuzz/Cargo.toml
··· 1 + [package] 2 + name = "ufos-fuzz" 3 + version = "0.0.0" 4 + publish = false 5 + edition = "2021" 6 + 7 + [package.metadata] 8 + cargo-fuzz = true 9 + 10 + [dependencies] 11 + libfuzzer-sys = "0.4" 12 + ufos = { path = ".." } 13 + jetstream = { path = "../../jetstream" } 14 + bincode = { version = "2.0.1", features = ["serde"] } 15 + cardinality-estimator = { version = "1.0.2", features = ["with_serde"] } 16 + 17 + [target.'cfg(not(target_env = "msvc"))'.dependencies] 18 + tikv-jemallocator = "0.6.0" 19 + 20 + [[bin]] 21 + name = "counts_value" 22 + path = "fuzz_targets/counts_value.rs" 23 + test = false 24 + doc = false 25 + bench = false 26 + 27 + [[bin]] 28 + name = "estimated_dids_value" 29 + path = "fuzz_targets/estimated_dids_value.rs" 30 + test = false 31 + doc = false 32 + bench = false 33 + 34 + [[bin]] 35 + name = "cardinality_estimator" 36 + path = "fuzz_targets/cardinality_estimator.rs" 37 + test = false 38 + doc = false 39 + bench = false
+20
ufos/fuzz/fuzz_targets/cardinality_estimator.rs
··· 1 + #![no_main] 2 + 3 + use bincode::config::{Configuration, LittleEndian, Varint, Limit, standard}; 4 + use bincode::serde::decode_from_slice; 5 + use cardinality_estimator::CardinalityEstimator; 6 + use libfuzzer_sys::fuzz_target; 7 + 8 + type C = Configuration<LittleEndian, Varint, Limit<1048576>>; 9 + static BINCODE_CONF: C = standard().with_limit::<1048576>(); 10 + 11 + fuzz_target!(|data: &[u8]| { 12 + if let Ok((mut estimator, _n)) = decode_from_slice::<CardinalityEstimator<String>, C>( 13 + data, 14 + BINCODE_CONF, 15 + ) { 16 + // crash happens *much* faster if we just do kinda anything with the estimator 17 + estimator.insert(&"asdf".to_string()); 18 + assert!(estimator.estimate() > 0); 19 + } 20 + });
+25
ufos/fuzz/fuzz_targets/counts_value.rs
··· 1 + #![no_main] 2 + 3 + // use jetstream::exports::Did; 4 + use ufos::db_types::DbBytes; 5 + use ufos::store_types::CountsValue; 6 + use libfuzzer_sys::fuzz_target; 7 + 8 + #[cfg(not(target_env = "msvc"))] 9 + use tikv_jemallocator::Jemalloc; 10 + 11 + #[cfg(not(target_env = "msvc"))] 12 + #[global_allocator] 13 + static GLOBAL: Jemalloc = Jemalloc; 14 + 15 + fuzz_target!(|data: &[u8]| { 16 + if let Ok((counts_value, n)) = CountsValue::from_db_bytes(data) { 17 + assert!(n <= data.len()); 18 + let serialized = counts_value.to_db_bytes().unwrap(); 19 + assert_eq!(serialized.len(), n); 20 + let (and_back, n_again) = CountsValue::from_db_bytes(&serialized).unwrap(); 21 + assert_eq!(n_again, n); 22 + assert_eq!(and_back.records(), counts_value.records()); 23 + assert_eq!(and_back.dids().estimate(), counts_value.dids().estimate()); 24 + } 25 + });
+24
ufos/fuzz/fuzz_targets/estimated_dids_value.rs
··· 1 + #![no_main] 2 + 3 + // use jetstream::exports::Did; 4 + use ufos::db_types::DbBytes; 5 + use ufos::store_types::EstimatedDidsValue; 6 + use libfuzzer_sys::fuzz_target; 7 + 8 + #[cfg(not(target_env = "msvc"))] 9 + use tikv_jemallocator::Jemalloc; 10 + 11 + #[cfg(not(target_env = "msvc"))] 12 + #[global_allocator] 13 + static GLOBAL: Jemalloc = Jemalloc; 14 + 15 + fuzz_target!(|data: &[u8]| { 16 + if let Ok((counts_value, n)) = EstimatedDidsValue::from_db_bytes(data) { 17 + assert!(n <= data.len()); 18 + let serialized = counts_value.to_db_bytes().unwrap(); 19 + assert_eq!(serialized.len(), n); 20 + let (and_back, n_again) = EstimatedDidsValue::from_db_bytes(&serialized).unwrap(); 21 + assert_eq!(n_again, n); 22 + assert_eq!(and_back.0.estimate(), counts_value.0.estimate()); 23 + } 24 + });
+15
ufos/readme.md
··· 122 122 ```bash 123 123 sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 124 124 ``` 125 + 126 + 127 + --- 128 + 129 + ## fuzzing 130 + 131 + got bit by https://github.com/cloudflare/cardinality-estimator/pull/12, so now we have a fuzz target. 132 + 133 + install cargo-fuzz and then 134 + 135 + ```bash 136 + RUSTFLAGS="-Z sanitizer=address" cargo +nightly fuzz run cardinality_estimator 137 + ``` 138 + 139 + to fuzz the counts value things
+92 -140
ufos/src/consumer.rs
··· 1 1 use jetstream::{ 2 - events::{ 3 - account::AccountEvent, 4 - commit::{CommitData, CommitEvent, CommitInfo, CommitType}, 5 - Cursor, EventInfo, JetstreamEvent, 6 - }, 7 - exports::Did, 2 + events::{Cursor, EventKind, JetstreamEvent}, 3 + exports::{Did, Nsid}, 8 4 DefaultJetstreamEndpoints, JetstreamCompression, JetstreamConfig, JetstreamConnector, 9 5 JetstreamReceiver, 10 6 }; ··· 12 8 use std::time::Duration; 13 9 use tokio::sync::mpsc::{channel, Receiver, Sender}; 14 10 15 - use crate::{CreateRecord, DeleteAccount, DeleteRecord, EventBatch, ModifyRecord, UpdateRecord}; 11 + use crate::error::{BatchInsertError, FirehoseEventError}; 12 + use crate::{DeleteAccount, EventBatch, UFOsCommit}; 16 13 17 - const MAX_BATCHED_RECORDS: usize = 128; // *non-blocking* limit. drops oldest batched record per collection once reached. 18 - const MAX_BATCHED_MODIFIES: usize = 512; // hard limit, total updates and deletes across all collections. 19 - const MAX_ACCOUNT_REMOVES: usize = 512; // hard limit, total account deletions. actually the least frequent event, but tiny. 20 - const MAX_BATCHED_COLLECTIONS: usize = 64; // hard limit, MAX_BATCHED_RECORDS applies per collection 21 - const MIN_BATCH_SPAN_SECS: f64 = 2.; // try to get a bit of rest a bit. 22 - const MAX_BATCH_SPAN_SECS: f64 = 60.; // hard limit of duration from oldest to latest event cursor within a batch, in seconds. 14 + pub const MAX_BATCHED_RECORDS: usize = 128; // *non-blocking* limit. drops oldest batched record per collection once reached. 15 + pub const MAX_ACCOUNT_REMOVES: usize = 1024; // hard limit, extremely unlikely to reach, but just in case 16 + pub const MAX_BATCHED_COLLECTIONS: usize = 64; // hard limit, MAX_BATCHED_RECORDS applies per-collection 17 + pub const MIN_BATCH_SPAN_SECS: f64 = 2.; // breathe 18 + pub const MAX_BATCH_SPAN_SECS: f64 = 60.; // hard limit, pause consumer if we're unable to send by now 19 + pub const SEND_TIMEOUT_S: f64 = 15.; // if the channel is blocked longer than this, something is probably up 20 + pub const BATCH_QUEUE_SIZE: usize = 1; // nearly-rendez-vous 23 21 24 - const SEND_TIMEOUT_S: f64 = 60.; 25 - const BATCH_QUEUE_SIZE: usize = 512; // 4096 got OOM'd. update: 1024 also got OOM'd during L0 compaction blocking 22 + pub type LimitedBatch = EventBatch<MAX_BATCHED_RECORDS>; 23 + 24 + #[derive(Debug, Default)] 25 + struct CurrentBatch { 26 + initial_cursor: Option<Cursor>, 27 + batch: LimitedBatch, 28 + } 26 29 27 30 #[derive(Debug)] 28 - struct Batcher { 29 - jetstream_receiver: JetstreamReceiver<serde_json::Value>, 30 - batch_sender: Sender<EventBatch>, 31 - current_batch: EventBatch, 31 + pub struct Batcher { 32 + jetstream_receiver: JetstreamReceiver, 33 + batch_sender: Sender<LimitedBatch>, 34 + current_batch: CurrentBatch, 32 35 } 33 36 34 37 pub async fn consume( 35 38 jetstream_endpoint: &str, 36 39 cursor: Option<Cursor>, 37 40 no_compress: bool, 38 - ) -> anyhow::Result<Receiver<EventBatch>> { 41 + ) -> anyhow::Result<Receiver<LimitedBatch>> { 39 42 let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(jetstream_endpoint); 40 43 if endpoint == jetstream_endpoint { 41 - eprintln!("connecting to jetstream at {endpoint}"); 44 + log::info!("connecting to jetstream at {endpoint}"); 42 45 } else { 43 - eprintln!("connecting to jetstream at {jetstream_endpoint} => {endpoint}"); 46 + log::info!("connecting to jetstream at {jetstream_endpoint} => {endpoint}"); 44 47 } 45 - let config: JetstreamConfig<serde_json::Value> = JetstreamConfig { 48 + let config: JetstreamConfig = JetstreamConfig { 46 49 endpoint, 47 50 compression: if no_compress { 48 51 JetstreamCompression::None 49 52 } else { 50 53 JetstreamCompression::Zstd 51 54 }, 52 - channel_size: 64, // small because we'd rather buffer events into batches 55 + replay_on_reconnect: true, 56 + channel_size: 1024, // buffer up to ~1s of jetstream events 53 57 ..Default::default() 54 58 }; 55 59 let jetstream_receiver = JetstreamConnector::new(config)? 56 60 .connect_cursor(cursor) 57 61 .await?; 58 - let (batch_sender, batch_reciever) = channel::<EventBatch>(BATCH_QUEUE_SIZE); 62 + let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE); 59 63 let mut batcher = Batcher::new(jetstream_receiver, batch_sender); 60 64 tokio::task::spawn(async move { batcher.run().await }); 61 65 Ok(batch_reciever) 62 66 } 63 67 64 68 impl Batcher { 65 - fn new( 66 - jetstream_receiver: JetstreamReceiver<serde_json::Value>, 67 - batch_sender: Sender<EventBatch>, 68 - ) -> Self { 69 + pub fn new(jetstream_receiver: JetstreamReceiver, batch_sender: Sender<LimitedBatch>) -> Self { 69 70 Self { 70 71 jetstream_receiver, 71 72 batch_sender, ··· 73 74 } 74 75 } 75 76 76 - async fn run(&mut self) -> anyhow::Result<()> { 77 + pub async fn run(&mut self) -> anyhow::Result<()> { 77 78 loop { 78 79 if let Some(event) = self.jetstream_receiver.recv().await { 79 80 self.handle_event(event).await? ··· 83 84 } 84 85 } 85 86 86 - async fn handle_event( 87 - &mut self, 88 - event: JetstreamEvent<serde_json::Value>, 89 - ) -> anyhow::Result<()> { 90 - let event_cursor = event.cursor(); 91 - 92 - if let Some(earliest) = &self.current_batch.first_jetstream_cursor { 93 - if event_cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS) 87 + async fn handle_event(&mut self, event: JetstreamEvent) -> anyhow::Result<()> { 88 + if let Some(earliest) = &self.current_batch.initial_cursor { 89 + if event.cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS) 94 90 { 95 - self.send_current_batch_now().await?; 91 + self.send_current_batch_now(false).await?; 96 92 } 97 93 } else { 98 - self.current_batch.first_jetstream_cursor = Some(event_cursor.clone()); 94 + self.current_batch.initial_cursor = Some(event.cursor); 99 95 } 100 96 101 - match event { 102 - JetstreamEvent::Commit(CommitEvent::CreateOrUpdate { commit, info }) => { 103 - match commit.info.operation { 104 - CommitType::Create => self.handle_create_record(commit, info).await?, 105 - CommitType::Update => { 106 - self.handle_modify_record(modify_update(commit, info)) 107 - .await? 108 - } 109 - CommitType::Delete => { 110 - panic!("jetstream Commit::CreateOrUpdate had Delete operation type") 111 - } 97 + match event.kind { 98 + EventKind::Commit => { 99 + let commit = event 100 + .commit 101 + .ok_or(FirehoseEventError::CommitEventMissingCommit)?; 102 + let (commit, nsid) = UFOsCommit::from_commit_info(commit, event.did, event.cursor)?; 103 + self.handle_commit(commit, nsid).await?; 104 + } 105 + EventKind::Account => { 106 + let account = event 107 + .account 108 + .ok_or(FirehoseEventError::AccountEventMissingAccount)?; 109 + if !account.active { 110 + self.handle_delete_account(event.did, event.cursor).await?; 112 111 } 113 112 } 114 - JetstreamEvent::Commit(CommitEvent::Delete { commit, info }) => { 115 - self.handle_modify_record(modify_delete(commit, info)) 116 - .await? 117 - } 118 - JetstreamEvent::Account(AccountEvent { info, account }) if !account.active => { 119 - self.handle_remove_account(info.did, info.time_us).await? 120 - } 121 - JetstreamEvent::Account(_) => {} // ignore account *activations* 122 - JetstreamEvent::Identity(_) => {} // identity events are noops for us 123 - }; 124 - self.current_batch.last_jetstream_cursor = Some(event_cursor.clone()); 113 + _ => {} 114 + } 125 115 126 116 // if the queue is empty and we have enough, send immediately. otherewise, let the current batch fill up. 127 - if let Some(earliest) = &self.current_batch.first_jetstream_cursor { 128 - if event_cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS 117 + if let Some(earliest) = &self.current_batch.initial_cursor { 118 + if event.cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS 129 119 && self.batch_sender.capacity() == BATCH_QUEUE_SIZE 130 120 { 131 - log::trace!("queue empty: immediately sending batch."); 132 - if let Err(send_err) = self 133 - .batch_sender 134 - .send(mem::take(&mut self.current_batch)) 135 - .await 136 - { 137 - anyhow::bail!("Could not send batch, likely because the receiver closed or dropped: {send_err:?}"); 138 - } 121 + self.send_current_batch_now(true).await?; 139 122 } 140 123 } 141 124 Ok(()) 142 125 } 143 126 144 - // holds up all consumer progress until it can send to the channel 145 - // use this when the current batch is too full to add more to it 146 - async fn send_current_batch_now(&mut self) -> anyhow::Result<()> { 147 - log::warn!( 148 - "attempting to send batch now (capacity: {})", 149 - self.batch_sender.capacity() 127 + async fn handle_commit(&mut self, commit: UFOsCommit, collection: Nsid) -> anyhow::Result<()> { 128 + let optimistic_res = self.current_batch.batch.insert_commit_by_nsid( 129 + &collection, 130 + commit, 131 + MAX_BATCHED_COLLECTIONS, 150 132 ); 151 - self.batch_sender 152 - .send_timeout( 153 - mem::take(&mut self.current_batch), 154 - Duration::from_secs_f64(SEND_TIMEOUT_S), 155 - ) 156 - .await?; 157 - Ok(()) 158 - } 159 133 160 - async fn handle_create_record( 161 - &mut self, 162 - commit: CommitData<serde_json::Value>, 163 - info: EventInfo, 164 - ) -> anyhow::Result<()> { 165 - if !self 166 - .current_batch 167 - .record_creates 168 - .contains_key(&commit.info.collection) 169 - && self.current_batch.record_creates.len() >= MAX_BATCHED_COLLECTIONS 170 - { 171 - self.send_current_batch_now().await?; 134 + if let Err(BatchInsertError::BatchFull(commit)) = optimistic_res { 135 + self.send_current_batch_now(false).await?; 136 + self.current_batch.batch.insert_commit_by_nsid( 137 + &collection, 138 + commit, 139 + MAX_BATCHED_COLLECTIONS, 140 + )?; 141 + } else { 142 + optimistic_res?; 172 143 } 173 - let record = CreateRecord { 174 - did: info.did, 175 - rkey: commit.info.rkey, 176 - record: commit.record, 177 - cursor: info.time_us, 178 - }; 179 - let collection = self 180 - .current_batch 181 - .record_creates 182 - .entry(commit.info.collection) 183 - .or_default(); 184 - collection.total_seen += 1; 185 - collection.samples.push_front(record); 186 - collection.samples.truncate(MAX_BATCHED_RECORDS); 187 - Ok(()) 188 - } 189 144 190 - async fn handle_modify_record(&mut self, modify_record: ModifyRecord) -> anyhow::Result<()> { 191 - if self.current_batch.record_modifies.len() >= MAX_BATCHED_MODIFIES { 192 - self.send_current_batch_now().await?; 193 - } 194 - self.current_batch.record_modifies.push(modify_record); 195 145 Ok(()) 196 146 } 197 147 198 - async fn handle_remove_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> { 199 - if self.current_batch.account_removes.len() >= MAX_ACCOUNT_REMOVES { 200 - self.send_current_batch_now().await?; 148 + async fn handle_delete_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> { 149 + if self.current_batch.batch.account_removes.len() >= MAX_ACCOUNT_REMOVES { 150 + self.send_current_batch_now(false).await?; 201 151 } 202 152 self.current_batch 153 + .batch 203 154 .account_removes 204 155 .push(DeleteAccount { did, cursor }); 205 156 Ok(()) 206 157 } 207 - } 208 158 209 - fn modify_update(commit: CommitData<serde_json::Value>, info: EventInfo) -> ModifyRecord { 210 - ModifyRecord::Update(UpdateRecord { 211 - did: info.did, 212 - collection: commit.info.collection, 213 - rkey: commit.info.rkey, 214 - record: commit.record, 215 - cursor: info.time_us, 216 - }) 217 - } 218 - 219 - fn modify_delete(commit_info: CommitInfo, info: EventInfo) -> ModifyRecord { 220 - ModifyRecord::Delete(DeleteRecord { 221 - did: info.did, 222 - collection: commit_info.collection, 223 - rkey: commit_info.rkey, 224 - cursor: info.time_us, 225 - }) 159 + // holds up all consumer progress until it can send to the channel 160 + // use this when the current batch is too full to add more to it 161 + async fn send_current_batch_now(&mut self, small: bool) -> anyhow::Result<()> { 162 + let beginning = match self.current_batch.initial_cursor.map(|c| c.elapsed()) { 163 + None => "unknown".to_string(), 164 + Some(Ok(t)) => format!("{:?}", t), 165 + Some(Err(e)) => format!("+{:?}", e.duration()), 166 + }; 167 + log::info!( 168 + "sending batch now from {beginning}, {}, queue capacity: {}", 169 + if small { "small" } else { "full" }, 170 + self.batch_sender.capacity(), 171 + ); 172 + let current = mem::take(&mut self.current_batch); 173 + self.batch_sender 174 + .send_timeout(current.batch, Duration::from_secs_f64(SEND_TIMEOUT_S)) 175 + .await?; 176 + Ok(()) 177 + } 226 178 }
+51 -5
ufos/src/db_types.rs
··· 32 32 UnterminatedString, 33 33 #[error("could not convert from utf8: {0}")] 34 34 NotUtf8(#[from] std::str::Utf8Error), 35 + #[error("could not convert from utf8: {0}")] 36 + NotUtf8String(#[from] std::string::FromUtf8Error), 35 37 #[error("could not get array from slice: {0}")] 36 38 BadSlice(#[from] std::array::TryFromSliceError), 37 39 #[error("wrong static prefix. expected {1:?}, found {0:?}")] ··· 42 44 DecodeTooManyBytes(usize), 43 45 #[error("expected exclusive bound from lsm_tree (likely bug)")] 44 46 BadRangeBound, 47 + #[error("expected a truncated u64 for mod {0}, found remainder: {1}")] 48 + InvalidTruncated(u64, u64), 45 49 } 46 50 47 51 fn bincode_conf() -> impl Config { 48 - standard().with_big_endian().with_fixed_int_encoding() 52 + standard() 53 + .with_big_endian() 54 + .with_fixed_int_encoding() 55 + .with_limit::<{ 2_usize.pow(20) }>() // 1MB 49 56 } 50 57 51 58 pub trait DbBytes { ··· 73 80 pub fn to_prefix_db_bytes(&self) -> Result<Vec<u8>, EncodingError> { 74 81 self.prefix.to_db_bytes() 75 82 } 76 - pub fn range_end(&self) -> Result<Vec<u8>, EncodingError> { 77 - let prefix_bytes = self.prefix.to_db_bytes()?; 83 + pub fn prefix_range_end(prefix: &P) -> Result<Vec<u8>, EncodingError> { 84 + let prefix_bytes = prefix.to_db_bytes()?; 78 85 let (_, Bound::Excluded(range_end)) = prefix_to_range(&prefix_bytes) else { 79 86 return Err(EncodingError::BadRangeBound); 80 87 }; 81 88 Ok(range_end.to_vec()) 82 89 } 90 + pub fn range_end(&self) -> Result<Vec<u8>, EncodingError> { 91 + Self::prefix_range_end(&self.prefix) 92 + } 83 93 pub fn range(&self) -> Result<Range<Vec<u8>>, EncodingError> { 84 94 let prefix_bytes = self.prefix.to_db_bytes()?; 85 95 let (Bound::Included(start), Bound::Excluded(end)) = prefix_to_range(&prefix_bytes) else { ··· 109 119 Self: Sized, 110 120 { 111 121 let (prefix, eaten) = P::from_db_bytes(bytes)?; 122 + assert!( 123 + eaten <= bytes.len(), 124 + "eaten({}) < len({})", 125 + eaten, 126 + bytes.len() 127 + ); 112 128 let Some(suffix_bytes) = bytes.get(eaten..) else { 113 129 return Err(EncodingError::DecodeMissingSuffix); 114 130 }; 131 + if suffix_bytes.is_empty() { 132 + return Err(EncodingError::DecodeMissingSuffix); 133 + }; 115 134 let (suffix, also_eaten) = S::from_db_bytes(suffix_bytes)?; 135 + assert!( 136 + also_eaten <= suffix_bytes.len(), 137 + "also eaten({}) < suffix len({})", 138 + also_eaten, 139 + suffix_bytes.len() 140 + ); 116 141 Ok((Self { prefix, suffix }, eaten + also_eaten)) 117 142 } 118 143 } ··· 174 199 175 200 impl<T> DbBytes for T 176 201 where 177 - T: BincodeEncode + BincodeDecode<()> + UseBincodePlz + Sized, 202 + T: BincodeEncode + BincodeDecode<()> + UseBincodePlz + Sized + std::fmt::Debug, 178 203 { 179 204 fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> { 180 205 Ok(encode_to_vec(self, bincode_conf())?) ··· 184 209 } 185 210 } 186 211 212 + /// helper trait: impl on a type to get helpers to implement DbBytes 213 + pub trait SerdeBytes: serde::Serialize + for<'a> serde::Deserialize<'a> { 214 + fn to_bytes(&self) -> Result<Vec<u8>, EncodingError> 215 + where 216 + Self: std::fmt::Debug, 217 + { 218 + Ok(bincode::serde::encode_to_vec(self, bincode_conf())?) 219 + } 220 + fn from_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 221 + Ok(bincode::serde::decode_from_slice(bytes, bincode_conf())?) 222 + } 223 + } 224 + 187 225 ////// 226 + 227 + impl DbBytes for Vec<u8> { 228 + fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> { 229 + Ok(self.to_vec()) 230 + } 231 + fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 232 + Ok((bytes.to_owned(), bytes.len())) 233 + } 234 + } 188 235 189 236 /// Lexicographic-sort-friendly null-terminating serialization for String 190 237 /// ··· 358 405 (1234, "", "empty string"), 359 406 (789, "aaaaa", "string and cursor"), 360 407 ] { 361 - eprintln!("{desc}"); 362 408 let original = TwoThings { 363 409 prefix: Cursor::from_raw_u64(tired_prefix), 364 410 suffix: sad_suffix.to_string(),
+41
ufos/src/error.rs
··· 1 + use crate::db_types::EncodingError; 2 + use crate::UFOsCommit; 3 + use thiserror::Error; 4 + 5 + #[derive(Debug, Error)] 6 + pub enum FirehoseEventError { 7 + #[error("Create/Update commit operation missing record data")] 8 + CruMissingRecord, 9 + #[error("Account event missing account info")] 10 + AccountEventMissingAccount, 11 + #[error("Commit event missing commit info")] 12 + CommitEventMissingCommit, 13 + } 14 + 15 + #[derive(Debug, Error)] 16 + pub enum BatchInsertError { 17 + #[error("Batch is full and no creates are left to be truncated")] 18 + BatchFull(UFOsCommit), 19 + #[error("Bug: tried to index beyond batch limit: {0}")] 20 + BatchOverflow(usize), 21 + #[error("Bug: non-terminating head advancement??")] 22 + BatchForever, 23 + } 24 + 25 + #[derive(Debug, Error)] 26 + pub enum StorageError { 27 + #[error("Failed to initialize: {0}")] 28 + InitError(String), 29 + #[error("DB seems to be in a bad state: {0}")] 30 + BadStateError(String), 31 + #[error("Fjall error")] 32 + FjallError(#[from] fjall::Error), 33 + #[error("LSM-tree error (from fjall)")] 34 + FjallLsmError(#[from] fjall::LsmError), 35 + #[error("Bytes encoding error")] 36 + EncodingError(#[from] EncodingError), 37 + #[error("If you ever see this, there's a bug in the code. The error was stolen")] 38 + Stolen, 39 + #[error("Failed to join tokio task: {0}")] 40 + JoinError(#[from] tokio::task::JoinError), 41 + }
+32
ufos/src/file_consumer.rs
··· 1 + use crate::consumer::{Batcher, LimitedBatch, BATCH_QUEUE_SIZE}; 2 + use anyhow::Result; 3 + use jetstream::{error::JetstreamEventError, events::JetstreamEvent}; 4 + use std::path::PathBuf; 5 + use tokio::{ 6 + fs::File, 7 + io::{AsyncBufReadExt, BufReader}, 8 + sync::mpsc::{channel, Receiver, Sender}, 9 + }; 10 + 11 + async fn read_jsonl(f: File, sender: Sender<JetstreamEvent>) -> Result<()> { 12 + let mut lines = BufReader::new(f).lines(); 13 + while let Some(line) = lines.next_line().await? { 14 + let event: JetstreamEvent = 15 + serde_json::from_str(&line).map_err(JetstreamEventError::ReceivedMalformedJSON)?; 16 + if sender.send(event).await.is_err() { 17 + log::warn!("All receivers for the jsonl fixture have been dropped, bye."); 18 + return Err(JetstreamEventError::ReceiverClosedError.into()); 19 + } 20 + } 21 + Ok(()) 22 + } 23 + 24 + pub async fn consume(p: PathBuf) -> Result<Receiver<LimitedBatch>> { 25 + let f = File::open(p).await?; 26 + let (jsonl_sender, jsonl_receiver) = channel::<JetstreamEvent>(16); 27 + let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE); 28 + let mut batcher = Batcher::new(jsonl_receiver, batch_sender); 29 + tokio::task::spawn(async move { read_jsonl(f, jsonl_sender).await }); 30 + tokio::task::spawn(async move { batcher.run().await }); 31 + Ok(batch_reciever) 32 + }
+429 -33
ufos/src/lib.rs
··· 1 1 pub mod consumer; 2 2 pub mod db_types; 3 + pub mod error; 4 + pub mod file_consumer; 3 5 pub mod server; 4 - pub mod store; 6 + pub mod storage; 7 + pub mod storage_fjall; 8 + pub mod storage_mem; 5 9 pub mod store_types; 6 10 7 - use jetstream::events::Cursor; 11 + use crate::error::BatchInsertError; 12 + use cardinality_estimator::CardinalityEstimator; 13 + use error::FirehoseEventError; 14 + use jetstream::events::{CommitEvent, CommitOp, Cursor}; 8 15 use jetstream::exports::{Did, Nsid, RecordKey}; 9 - use std::collections::{HashMap, VecDeque}; 10 - 11 - #[derive(Debug, Clone)] 12 - pub struct CreateRecord { 13 - pub did: Did, 14 - // collection omitted because the batch keys off it 15 - pub rkey: RecordKey, 16 - pub record: serde_json::Value, 17 - pub cursor: Cursor, 18 - } 16 + use schemars::JsonSchema; 17 + use serde::Serialize; 18 + use serde_json::value::RawValue; 19 + use std::collections::HashMap; 19 20 20 21 #[derive(Debug, Default, Clone)] 21 - pub struct CollectionSamples { 22 + pub struct CollectionCommits<const LIMIT: usize> { 22 23 pub total_seen: usize, 23 - pub samples: VecDeque<CreateRecord>, 24 + pub dids_estimate: CardinalityEstimator<Did>, 25 + pub commits: Vec<UFOsCommit>, 26 + head: usize, 27 + non_creates: usize, 28 + } 29 + 30 + impl<const LIMIT: usize> CollectionCommits<LIMIT> { 31 + fn advance_head(&mut self) { 32 + self.head += 1; 33 + if self.head > LIMIT { 34 + self.head = 0; 35 + } 36 + } 37 + pub fn truncating_insert(&mut self, commit: UFOsCommit) -> Result<(), BatchInsertError> { 38 + if self.non_creates == LIMIT { 39 + return Err(BatchInsertError::BatchFull(commit)); 40 + } 41 + let did = commit.did.clone(); 42 + let is_create = commit.action.is_create(); 43 + if self.commits.len() < LIMIT { 44 + self.commits.push(commit); 45 + if self.commits.capacity() > LIMIT { 46 + self.commits.shrink_to(LIMIT); // save mem?????? maybe?? 47 + } 48 + } else { 49 + let head_started_at = self.head; 50 + loop { 51 + let candidate = self 52 + .commits 53 + .get_mut(self.head) 54 + .ok_or(BatchInsertError::BatchOverflow(self.head))?; 55 + if candidate.action.is_create() { 56 + *candidate = commit; 57 + break; 58 + } 59 + self.advance_head(); 60 + if self.head == head_started_at { 61 + return Err(BatchInsertError::BatchForever); 62 + } 63 + } 64 + } 65 + 66 + if is_create { 67 + self.total_seen += 1; 68 + self.dids_estimate.insert(&did); 69 + } else { 70 + self.non_creates += 1; 71 + } 72 + 73 + Ok(()) 74 + } 24 75 } 25 76 26 77 #[derive(Debug, Clone)] 27 - pub struct UpdateRecord { 78 + pub struct DeleteAccount { 28 79 pub did: Did, 29 - pub collection: Nsid, 30 - pub rkey: RecordKey, 31 - pub record: serde_json::Value, 32 80 pub cursor: Cursor, 33 81 } 34 82 35 83 #[derive(Debug, Clone)] 36 - pub struct DeleteRecord { 37 - pub did: Did, 38 - pub collection: Nsid, 39 - pub rkey: RecordKey, 40 - pub cursor: Cursor, 84 + pub enum CommitAction { 85 + Put(PutAction), 86 + Cut, 87 + } 88 + impl CommitAction { 89 + pub fn is_create(&self) -> bool { 90 + match self { 91 + CommitAction::Put(PutAction { is_update, .. }) => !is_update, 92 + CommitAction::Cut => false, 93 + } 94 + } 41 95 } 42 96 43 97 #[derive(Debug, Clone)] 44 - pub enum ModifyRecord { 45 - Update(UpdateRecord), 46 - Delete(DeleteRecord), 98 + pub struct PutAction { 99 + record: Box<RawValue>, 100 + is_update: bool, 47 101 } 48 102 49 103 #[derive(Debug, Clone)] 50 - pub struct DeleteAccount { 104 + pub struct UFOsCommit { 105 + cursor: Cursor, 106 + did: Did, 107 + rkey: RecordKey, 108 + rev: String, 109 + action: CommitAction, 110 + } 111 + 112 + #[derive(Debug, Clone, Serialize)] 113 + pub struct UFOsRecord { 114 + pub cursor: Cursor, 51 115 pub did: Did, 52 - pub cursor: Cursor, 116 + pub collection: Nsid, 117 + pub rkey: RecordKey, 118 + pub rev: String, 119 + // TODO: cid? 120 + pub record: Box<RawValue>, 121 + pub is_update: bool, 122 + } 123 + 124 + impl UFOsCommit { 125 + pub fn from_commit_info( 126 + commit: CommitEvent, 127 + did: Did, 128 + cursor: Cursor, 129 + ) -> Result<(Self, Nsid), FirehoseEventError> { 130 + let action = match commit.operation { 131 + CommitOp::Delete => CommitAction::Cut, 132 + cru => CommitAction::Put(PutAction { 133 + record: commit.record.ok_or(FirehoseEventError::CruMissingRecord)?, 134 + is_update: cru == CommitOp::Update, 135 + }), 136 + }; 137 + let batched = Self { 138 + cursor, 139 + did, 140 + rkey: commit.rkey, 141 + rev: commit.rev, 142 + action, 143 + }; 144 + Ok((batched, commit.collection)) 145 + } 53 146 } 54 147 55 148 #[derive(Debug, Default, Clone)] 56 - pub struct EventBatch { 57 - pub record_creates: HashMap<Nsid, CollectionSamples>, 58 - pub record_modifies: Vec<ModifyRecord>, 149 + pub struct EventBatch<const LIMIT: usize> { 150 + pub commits_by_nsid: HashMap<Nsid, CollectionCommits<LIMIT>>, 59 151 pub account_removes: Vec<DeleteAccount>, 60 - pub first_jetstream_cursor: Option<Cursor>, 61 - pub last_jetstream_cursor: Option<Cursor>, 152 + } 153 + 154 + impl<const LIMIT: usize> EventBatch<LIMIT> { 155 + pub fn insert_commit_by_nsid( 156 + &mut self, 157 + collection: &Nsid, 158 + commit: UFOsCommit, 159 + max_collections: usize, 160 + ) -> Result<(), BatchInsertError> { 161 + let map = &mut self.commits_by_nsid; 162 + if !map.contains_key(collection) && map.len() >= max_collections { 163 + return Err(BatchInsertError::BatchFull(commit)); 164 + } 165 + map.entry(collection.clone()) 166 + .or_default() 167 + .truncating_insert(commit)?; 168 + Ok(()) 169 + } 170 + pub fn total_records(&self) -> usize { 171 + self.commits_by_nsid.values().map(|v| v.commits.len()).sum() 172 + } 173 + pub fn total_seen(&self) -> usize { 174 + self.commits_by_nsid.values().map(|v| v.total_seen).sum() 175 + } 176 + pub fn total_collections(&self) -> usize { 177 + self.commits_by_nsid.len() 178 + } 179 + pub fn account_removes(&self) -> usize { 180 + self.account_removes.len() 181 + } 182 + pub fn estimate_dids(&self) -> usize { 183 + let mut estimator = CardinalityEstimator::<Did>::new(); 184 + for commits in self.commits_by_nsid.values() { 185 + estimator.merge(&commits.dids_estimate); 186 + } 187 + estimator.estimate() 188 + } 189 + pub fn latest_cursor(&self) -> Option<Cursor> { 190 + let mut oldest = Cursor::from_start(); 191 + for commits in self.commits_by_nsid.values() { 192 + for commit in &commits.commits { 193 + if commit.cursor > oldest { 194 + oldest = commit.cursor; 195 + } 196 + } 197 + } 198 + if let Some(del) = self.account_removes.last() { 199 + if del.cursor > oldest { 200 + oldest = del.cursor; 201 + } 202 + } 203 + if oldest > Cursor::from_start() { 204 + Some(oldest) 205 + } else { 206 + None 207 + } 208 + } 209 + pub fn is_empty(&self) -> bool { 210 + self.commits_by_nsid.is_empty() && self.account_removes.is_empty() 211 + } 212 + } 213 + 214 + #[derive(Debug, Serialize, JsonSchema)] 215 + pub enum ConsumerInfo { 216 + Jetstream { 217 + endpoint: String, 218 + started_at: u64, 219 + latest_cursor: Option<u64>, 220 + }, 221 + } 222 + 223 + #[derive(Debug, Default, PartialEq, Serialize, JsonSchema)] 224 + pub struct TopCollections { 225 + total_records: u64, 226 + dids_estimate: u64, 227 + nsid_child_segments: HashMap<String, TopCollections>, 228 + } 229 + 230 + // this is not safe from ~DOS 231 + // todo: remove this and just iterate the all-time rollups to get nsids? (or recent rollups?) 232 + impl From<TopCollections> for Vec<String> { 233 + fn from(tc: TopCollections) -> Self { 234 + let mut me = vec![]; 235 + for (segment, children) in tc.nsid_child_segments { 236 + let child_segments: Self = children.into(); 237 + if child_segments.is_empty() { 238 + me.push(segment); 239 + } else { 240 + for ch in child_segments { 241 + let nsid = format!("{segment}.{ch}"); 242 + me.push(nsid); 243 + } 244 + } 245 + } 246 + me 247 + } 248 + } 249 + 250 + #[cfg(test)] 251 + mod tests { 252 + use super::*; 253 + 254 + #[test] 255 + fn test_top_collections_to_nsids() { 256 + let empty_tc = TopCollections::default(); 257 + assert_eq!(Into::<Vec<String>>::into(empty_tc), Vec::<String>::new()); 258 + 259 + let tc = TopCollections { 260 + nsid_child_segments: HashMap::from([ 261 + ( 262 + "a".to_string(), 263 + TopCollections { 264 + nsid_child_segments: HashMap::from([ 265 + ("b".to_string(), TopCollections::default()), 266 + ("c".to_string(), TopCollections::default()), 267 + ]), 268 + ..Default::default() 269 + }, 270 + ), 271 + ("z".to_string(), TopCollections::default()), 272 + ]), 273 + ..Default::default() 274 + }; 275 + 276 + let mut nsids: Vec<String> = tc.into(); 277 + nsids.sort(); 278 + assert_eq!(nsids, ["a.b", "a.c", "z"]); 279 + } 280 + 281 + #[test] 282 + fn test_truncating_insert_truncates() -> anyhow::Result<()> { 283 + let mut commits: CollectionCommits<2> = Default::default(); 284 + 285 + commits.truncating_insert(UFOsCommit { 286 + cursor: Cursor::from_raw_u64(100), 287 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 288 + rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(), 289 + rev: "rev-asdf".to_string(), 290 + action: CommitAction::Put(PutAction { 291 + record: RawValue::from_string("{}".to_string())?, 292 + is_update: false, 293 + }), 294 + })?; 295 + 296 + commits.truncating_insert(UFOsCommit { 297 + cursor: Cursor::from_raw_u64(101), 298 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 299 + rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(), 300 + rev: "rev-asdg".to_string(), 301 + action: CommitAction::Put(PutAction { 302 + record: RawValue::from_string("{}".to_string())?, 303 + is_update: false, 304 + }), 305 + })?; 306 + 307 + commits.truncating_insert(UFOsCommit { 308 + cursor: Cursor::from_raw_u64(102), 309 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 310 + rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(), 311 + rev: "rev-asdh".to_string(), 312 + action: CommitAction::Put(PutAction { 313 + record: RawValue::from_string("{}".to_string())?, 314 + is_update: false, 315 + }), 316 + })?; 317 + 318 + assert_eq!(commits.total_seen, 3); 319 + assert_eq!(commits.dids_estimate.estimate(), 1); 320 + assert_eq!(commits.commits.len(), 2); 321 + 322 + let mut found_first = false; 323 + let mut found_last = false; 324 + for commit in commits.commits { 325 + match commit.rev.as_ref() { 326 + "rev-asdf" => { 327 + found_first = true; 328 + } 329 + "rev-asdh" => { 330 + found_last = true; 331 + } 332 + _ => {} 333 + } 334 + } 335 + assert!(!found_first); 336 + assert!(found_last); 337 + 338 + Ok(()) 339 + } 340 + 341 + #[test] 342 + fn test_truncating_insert_does_not_truncate_deletes() -> anyhow::Result<()> { 343 + let mut commits: CollectionCommits<2> = Default::default(); 344 + 345 + commits.truncating_insert(UFOsCommit { 346 + cursor: Cursor::from_raw_u64(100), 347 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 348 + rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(), 349 + rev: "rev-asdf".to_string(), 350 + action: CommitAction::Cut, 351 + })?; 352 + 353 + commits.truncating_insert(UFOsCommit { 354 + cursor: Cursor::from_raw_u64(101), 355 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 356 + rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(), 357 + rev: "rev-asdg".to_string(), 358 + action: CommitAction::Put(PutAction { 359 + record: RawValue::from_string("{}".to_string())?, 360 + is_update: false, 361 + }), 362 + })?; 363 + 364 + commits.truncating_insert(UFOsCommit { 365 + cursor: Cursor::from_raw_u64(102), 366 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 367 + rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(), 368 + rev: "rev-asdh".to_string(), 369 + action: CommitAction::Put(PutAction { 370 + record: RawValue::from_string("{}".to_string())?, 371 + is_update: false, 372 + }), 373 + })?; 374 + 375 + assert_eq!(commits.total_seen, 2); 376 + assert_eq!(commits.dids_estimate.estimate(), 1); 377 + assert_eq!(commits.commits.len(), 2); 378 + 379 + let mut found_first = false; 380 + let mut found_last = false; 381 + let mut found_delete = false; 382 + for commit in commits.commits { 383 + match commit.rev.as_ref() { 384 + "rev-asdg" => { 385 + found_first = true; 386 + } 387 + "rev-asdh" => { 388 + found_last = true; 389 + } 390 + _ => {} 391 + } 392 + if let CommitAction::Cut = commit.action { 393 + found_delete = true; 394 + } 395 + } 396 + assert!(!found_first); 397 + assert!(found_last); 398 + assert!(found_delete); 399 + 400 + Ok(()) 401 + } 402 + 403 + #[test] 404 + fn test_truncating_insert_maxes_out_deletes() -> anyhow::Result<()> { 405 + let mut commits: CollectionCommits<2> = Default::default(); 406 + 407 + commits 408 + .truncating_insert(UFOsCommit { 409 + cursor: Cursor::from_raw_u64(100), 410 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 411 + rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(), 412 + rev: "rev-asdf".to_string(), 413 + action: CommitAction::Cut, 414 + }) 415 + .unwrap(); 416 + 417 + // this create will just be discarded 418 + commits 419 + .truncating_insert(UFOsCommit { 420 + cursor: Cursor::from_raw_u64(80), 421 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 422 + rkey: RecordKey::new("rkey-asdf-zzz".to_string()).unwrap(), 423 + rev: "rev-asdzzz".to_string(), 424 + action: CommitAction::Put(PutAction { 425 + record: RawValue::from_string("{}".to_string())?, 426 + is_update: false, 427 + }), 428 + }) 429 + .unwrap(); 430 + 431 + commits 432 + .truncating_insert(UFOsCommit { 433 + cursor: Cursor::from_raw_u64(101), 434 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 435 + rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(), 436 + rev: "rev-asdg".to_string(), 437 + action: CommitAction::Cut, 438 + }) 439 + .unwrap(); 440 + 441 + let res = commits.truncating_insert(UFOsCommit { 442 + cursor: Cursor::from_raw_u64(102), 443 + did: Did::new("did:plc:whatever".to_string()).unwrap(), 444 + rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(), 445 + rev: "rev-asdh".to_string(), 446 + action: CommitAction::Cut, 447 + }); 448 + 449 + assert!(res.is_err()); 450 + let overflowed = match res { 451 + Err(BatchInsertError::BatchFull(c)) => c, 452 + e => panic!("expected overflow but a different error happened: {e:?}"), 453 + }; 454 + assert_eq!(overflowed.rev, "rev-asdh"); 455 + 456 + Ok(()) 457 + } 62 458 }
+86 -33
ufos/src/main.rs
··· 1 1 use clap::Parser; 2 + use jetstream::events::Cursor; 2 3 use std::path::PathBuf; 3 - use ufos::{consumer, server, store}; 4 + use ufos::consumer; 5 + use ufos::error::StorageError; 6 + use ufos::file_consumer; 7 + use ufos::server; 8 + use ufos::storage::{StorageWhatever, StoreReader, StoreWriter}; 9 + use ufos::storage_fjall::FjallStorage; 10 + use ufos::storage_mem::MemStorage; 4 11 5 12 #[cfg(not(target_env = "msvc"))] 6 13 use tikv_jemallocator::Jemalloc; ··· 29 36 #[arg(long)] 30 37 data: PathBuf, 31 38 /// DEBUG: don't start the jetstream consumer or its write loop 39 + /// todo: restore this 32 40 #[arg(long, action)] 33 41 pause_writer: bool, 34 42 /// DEBUG: force the rw loop to fall behind by pausing it 43 + /// todo: restore this 35 44 #[arg(long, action)] 36 45 pause_rw: bool, 46 + /// DEBUG: use an in-memory store instead of fjall 47 + #[arg(long, action)] 48 + in_mem: bool, 49 + /// DEBUG: interpret jetstream as a file fixture 50 + #[arg(long, action)] 51 + jetstream_fixture: bool, 37 52 } 38 53 39 54 // #[tokio::main(flavor = "current_thread")] // TODO: move this to config via args ··· 42 57 env_logger::init(); 43 58 44 59 let args = Args::parse(); 45 - let (storage, cursor) = 46 - store::Storage::open(args.data, &args.jetstream, args.jetstream_force).await?; 60 + let jetstream = args.jetstream.clone(); 61 + if args.in_mem { 62 + let (read_store, write_store, cursor) = MemStorage::init( 63 + args.data, 64 + jetstream, 65 + args.jetstream_force, 66 + Default::default(), 67 + )?; 68 + go( 69 + args.jetstream, 70 + args.jetstream_fixture, 71 + args.pause_writer, 72 + read_store, 73 + write_store, 74 + cursor, 75 + ) 76 + .await?; 77 + } else { 78 + let (read_store, write_store, cursor) = FjallStorage::init( 79 + args.data, 80 + jetstream, 81 + args.jetstream_force, 82 + Default::default(), 83 + )?; 84 + go( 85 + args.jetstream, 86 + args.jetstream_fixture, 87 + args.pause_writer, 88 + read_store, 89 + write_store, 90 + cursor, 91 + ) 92 + .await?; 93 + } 47 94 95 + Ok(()) 96 + } 97 + 98 + async fn go( 99 + jetstream: String, 100 + jetstream_fixture: bool, 101 + pause_writer: bool, 102 + read_store: impl StoreReader + 'static, 103 + mut write_store: impl StoreWriter + 'static, 104 + cursor: Option<Cursor>, 105 + ) -> anyhow::Result<()> { 48 106 println!("starting server with storage..."); 49 - let serving = server::serve(storage.clone()); 107 + let serving = server::serve(read_store); 50 108 51 109 let t1 = tokio::task::spawn(async { 52 110 let r = serving.await; ··· 54 112 }); 55 113 56 114 let t2: tokio::task::JoinHandle<anyhow::Result<()>> = tokio::task::spawn({ 57 - let storage = storage.clone(); 58 115 async move { 59 - if !args.pause_writer { 116 + if !pause_writer { 60 117 println!( 61 118 "starting consumer with cursor: {cursor:?} from {:?} ago", 62 - cursor.clone().map(|c| c.elapsed()) 119 + cursor.map(|c| c.elapsed()) 63 120 ); 64 - let batches = 65 - consumer::consume(&args.jetstream, cursor, args.jetstream_no_zstd).await?; 66 - let r = storage.receive(batches).await; 67 - log::warn!("storage.receive ended with: {r:?}"); 121 + let mut batches = if jetstream_fixture { 122 + file_consumer::consume(jetstream.into()).await? 123 + } else { 124 + consumer::consume(&jetstream, cursor, false).await? 125 + }; 126 + 127 + tokio::task::spawn_blocking(move || { 128 + while let Some(event_batch) = batches.blocking_recv() { 129 + write_store.insert_batch(event_batch)?; 130 + write_store 131 + .step_rollup() 132 + .inspect_err(|e| log::error!("laksjdfl: {e:?}"))?; 133 + } 134 + Ok::<(), StorageError>(()) 135 + }) 136 + .await??; 137 + 138 + log::warn!("storage.receive ended with"); 68 139 } else { 69 140 log::info!("not starting jetstream or the write loop."); 70 141 } ··· 72 143 } 73 144 }); 74 145 75 - let t3 = tokio::task::spawn(async move { 76 - if !args.pause_rw { 77 - let r = storage.rw_loop().await; 78 - log::warn!("storage.rw_loop ended with: {r:?}"); 79 - } else { 80 - log::info!("not starting rw loop."); 81 - } 82 - }); 83 - 84 - // tokio::select! { 85 - // // v = serving => eprintln!("serving ended: {v:?}"), 86 - // v = storage.receive(batches) => eprintln!("storage consumer ended: {v:?}"), 87 - // v = storage.rw_loop() => eprintln!("storage rw-loop ended: {v:?}"), 88 - // }; 89 - 90 - log::trace!("tasks running. waiting."); 91 - t1.await?; 92 - log::trace!("serve task ended."); 93 - t2.await??; 94 - log::trace!("storage receive task ended."); 95 - t3.await?; 96 - log::trace!("storage rw task ended."); 146 + tokio::select! { 147 + z = t1 => log::warn!("serve task ended: {z:?}"), 148 + z = t2 => log::warn!("storage task ended: {z:?}"), 149 + }; 97 150 98 151 println!("bye!"); 99 152
+91 -92
ufos/src/server.rs
··· 1 - use crate::store::{Storage, StorageInfo}; 2 - use crate::{CreateRecord, Nsid}; 1 + use crate::storage::StoreReader; 2 + use crate::{ConsumerInfo, Nsid, TopCollections, UFOsRecord}; 3 3 use dropshot::endpoint; 4 4 use dropshot::ApiDescription; 5 5 use dropshot::ConfigDropshot; ··· 16 16 use std::collections::HashMap; 17 17 use std::sync::Arc; 18 18 19 - #[derive(Clone)] 20 19 struct Context { 21 20 pub spec: Arc<serde_json::Value>, 22 - storage: Storage, 21 + storage: Box<dyn StoreReader>, 23 22 } 24 23 25 24 /// Meta: get the openapi spec for this api ··· 34 33 35 34 #[derive(Debug, Serialize, JsonSchema)] 36 35 struct MetaInfo { 37 - storage_info: StorageInfo, 38 - jetstream_endpoint: Option<String>, 39 - jetstream_cursor: Option<u64>, 40 - mod_cursor: Option<u64>, 36 + storage: serde_json::Value, 37 + consumer: ConsumerInfo, 41 38 } 42 39 /// Get meta information about UFOs itself 43 40 #[endpoint { ··· 46 43 }] 47 44 async fn get_meta_info(ctx: RequestContext<Context>) -> OkCorsResponse<MetaInfo> { 48 45 let Context { storage, .. } = ctx.context(); 49 - 50 46 let failed_to_get = 51 47 |what| move |e| HttpError::for_internal_error(format!("failed to get {what}: {e:?}")); 52 48 53 49 let storage_info = storage 54 - .get_meta_info() 55 - .await 56 - .map_err(failed_to_get("meta info"))?; 57 - 58 - let jetstream_endpoint = storage 59 - .get_jetstream_endpoint() 60 - .await 61 - .map_err(failed_to_get("jetstream endpoint"))? 62 - .map(|v| v.0); 63 - 64 - let jetstream_cursor = storage 65 - .get_jetstream_cursor() 50 + .get_storage_stats() 66 51 .await 67 - .map_err(failed_to_get("jetstream cursor"))? 68 - .map(|c| c.to_raw_u64()); 52 + .map_err(failed_to_get("storage info"))?; 69 53 70 - let mod_cursor = storage 71 - .get_mod_cursor() 54 + let consumer = storage 55 + .get_consumer_info() 72 56 .await 73 - .map_err(failed_to_get("jetstream cursor"))? 74 - .map(|c| c.to_raw_u64()); 57 + .map_err(failed_to_get("consumer info"))?; 75 58 76 59 ok_cors(MetaInfo { 77 - storage_info, 78 - jetstream_endpoint, 79 - jetstream_cursor, 80 - mod_cursor, 60 + storage: storage_info, 61 + consumer, 81 62 }) 82 63 } 64 + fn to_multiple_nsids(s: &str) -> Result<Vec<Nsid>, String> { 65 + let mut out = Vec::new(); 66 + for collection in s.split(',') { 67 + let Ok(nsid) = Nsid::new(collection.to_string()) else { 68 + return Err(format!("collection {collection:?} was not a valid NSID")); 69 + }; 70 + out.push(nsid); 71 + } 72 + Ok(out) 73 + } 83 74 84 75 #[derive(Debug, Deserialize, JsonSchema)] 85 - struct CollectionsQuery { 86 - collection: String, // JsonSchema not implemented for Nsid :( 87 - } 88 - impl CollectionsQuery { 89 - fn to_multiple_nsids(&self) -> Result<Vec<Nsid>, String> { 90 - let mut out = Vec::with_capacity(self.collection.len()); 91 - for collection in self.collection.split(',') { 92 - let Ok(nsid) = Nsid::new(collection.to_string()) else { 93 - return Err(format!("collection {collection:?} was not a valid NSID")); 94 - }; 95 - out.push(nsid); 96 - } 97 - Ok(out) 98 - } 76 + struct RecordsCollectionsQuery { 77 + collection: Option<String>, // JsonSchema not implemented for Nsid :( 99 78 } 100 79 #[derive(Debug, Serialize, JsonSchema)] 101 80 struct ApiRecord { 102 81 did: String, 103 82 collection: String, 104 83 rkey: String, 105 - record: serde_json::Value, 84 + record: Box<serde_json::value::RawValue>, 106 85 time_us: u64, 107 86 } 108 - impl ApiRecord { 109 - fn from_create_record(create_record: CreateRecord, collection: &Nsid) -> Self { 110 - let CreateRecord { 111 - did, 112 - rkey, 113 - record, 114 - cursor, 115 - } = create_record; 87 + impl From<UFOsRecord> for ApiRecord { 88 + fn from(ufo: UFOsRecord) -> Self { 116 89 Self { 117 - did: did.to_string(), 118 - collection: collection.to_string(), 119 - rkey: rkey.to_string(), 120 - record, 121 - time_us: cursor.to_raw_u64(), 90 + did: ufo.did.to_string(), 91 + collection: ufo.collection.to_string(), 92 + rkey: ufo.rkey.to_string(), 93 + record: ufo.record, 94 + time_us: ufo.cursor.to_raw_u64(), 122 95 } 123 96 } 124 97 } ··· 130 103 method = GET, 131 104 path = "/records", 132 105 }] 133 - async fn get_records_by_collection( 106 + async fn get_records_by_collections( 134 107 ctx: RequestContext<Context>, 135 - collection_query: Query<CollectionsQuery>, 108 + collection_query: Query<RecordsCollectionsQuery>, 136 109 ) -> OkCorsResponse<Vec<ApiRecord>> { 137 110 let Context { storage, .. } = ctx.context(); 111 + let mut limit = 42; 112 + let query = collection_query.into_inner(); 113 + let collections = if let Some(provided_collection) = query.collection { 114 + to_multiple_nsids(&provided_collection) 115 + .map_err(|reason| HttpError::for_bad_request(None, reason))? 116 + } else { 117 + let all_collections_should_be_nsids: Vec<String> = storage 118 + .get_top_collections() 119 + .await 120 + .map_err(|e| { 121 + HttpError::for_internal_error(format!("failed to get top collections: {e:?}")) 122 + })? 123 + .into(); 124 + let mut all_collections = Vec::with_capacity(all_collections_should_be_nsids.len()); 125 + for raw_nsid in all_collections_should_be_nsids { 126 + let nsid = Nsid::new(raw_nsid).map_err(|e| { 127 + HttpError::for_internal_error(format!("failed to parse nsid: {e:?}")) 128 + })?; 129 + all_collections.push(nsid); 130 + } 138 131 139 - let collections = collection_query 140 - .into_inner() 141 - .to_multiple_nsids() 142 - .map_err(|reason| HttpError::for_bad_request(None, reason))?; 132 + limit = 12; 133 + all_collections 134 + }; 143 135 144 - let mut api_records = Vec::new(); 136 + let records = storage 137 + .get_records_by_collections(&collections, limit, true) 138 + .await 139 + .map_err(|e| HttpError::for_internal_error(e.to_string()))? 140 + .into_iter() 141 + .map(|r| r.into()) 142 + .collect(); 145 143 146 - // TODO: set up multiple db iterators and iterate them together with merge sort 147 - for collection in &collections { 148 - let records = storage 149 - .get_collection_records(collection, 100) 150 - .await 151 - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; 144 + ok_cors(records) 145 + } 152 146 153 - for record in records { 154 - let api_record = ApiRecord::from_create_record(record, collection); 155 - api_records.push(api_record); 156 - } 157 - } 158 - 159 - ok_cors(api_records) 147 + #[derive(Debug, Deserialize, JsonSchema)] 148 + struct TotalSeenCollectionsQuery { 149 + collection: String, // JsonSchema not implemented for Nsid :( 150 + } 151 + #[derive(Debug, Serialize, JsonSchema)] 152 + struct TotalCounts { 153 + total_records: u64, 154 + dids_estimate: u64, 160 155 } 161 - 162 156 /// Get total records seen by collection 163 157 #[endpoint { 164 158 method = GET, ··· 166 160 }] 167 161 async fn get_records_total_seen( 168 162 ctx: RequestContext<Context>, 169 - collection_query: Query<CollectionsQuery>, 170 - ) -> OkCorsResponse<HashMap<String, u64>> { 163 + collection_query: Query<TotalSeenCollectionsQuery>, 164 + ) -> OkCorsResponse<HashMap<String, TotalCounts>> { 171 165 let Context { storage, .. } = ctx.context(); 172 166 173 - let collections = collection_query 174 - .into_inner() 175 - .to_multiple_nsids() 167 + let query = collection_query.into_inner(); 168 + let collections = to_multiple_nsids(&query.collection) 176 169 .map_err(|reason| HttpError::for_bad_request(None, reason))?; 177 170 178 171 let mut seen_by_collection = HashMap::with_capacity(collections.len()); 179 172 180 173 for collection in &collections { 181 - let total = storage 182 - .get_collection_total_seen(collection) 174 + let (total_records, dids_estimate) = storage 175 + .get_counts_by_collection(collection) 183 176 .await 184 177 .map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?; 185 178 186 - seen_by_collection.insert(collection.to_string(), total); 179 + seen_by_collection.insert( 180 + collection.to_string(), 181 + TotalCounts { 182 + total_records, 183 + dids_estimate, 184 + }, 185 + ); 187 186 } 188 187 189 188 ok_cors(seen_by_collection) ··· 194 193 method = GET, 195 194 path = "/collections" 196 195 }] 197 - async fn get_top_collections(ctx: RequestContext<Context>) -> OkCorsResponse<HashMap<String, u64>> { 196 + async fn get_top_collections(ctx: RequestContext<Context>) -> OkCorsResponse<TopCollections> { 198 197 let Context { storage, .. } = ctx.context(); 199 198 let collections = storage 200 199 .get_top_collections() ··· 204 203 ok_cors(collections) 205 204 } 206 205 207 - pub async fn serve(storage: Storage) -> Result<(), String> { 206 + pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> { 208 207 let log = ConfigLogging::StderrTerminal { 209 208 level: ConfigLoggingLevel::Info, 210 209 } ··· 215 214 216 215 api.register(get_openapi).unwrap(); 217 216 api.register(get_meta_info).unwrap(); 218 - api.register(get_records_by_collection).unwrap(); 217 + api.register(get_records_by_collections).unwrap(); 219 218 api.register(get_records_total_seen).unwrap(); 220 219 api.register(get_top_collections).unwrap(); 221 220 ··· 225 224 .json() 226 225 .map_err(|e| e.to_string())?, 227 226 ), 228 - storage, 227 + storage: Box::new(storage), 229 228 }; 230 229 231 230 ServerBuilder::new(api, context, log)
+49
ufos/src/storage.rs
··· 1 + // use crate::store_types::CountsValue; 2 + use crate::{error::StorageError, ConsumerInfo, Cursor, EventBatch, TopCollections, UFOsRecord}; 3 + use async_trait::async_trait; 4 + use jetstream::exports::{Did, Nsid}; 5 + use std::path::Path; 6 + 7 + pub type StorageResult<T> = Result<T, StorageError>; 8 + 9 + pub trait StorageWhatever<R: StoreReader, W: StoreWriter, C> { 10 + fn init( 11 + path: impl AsRef<Path>, 12 + endpoint: String, 13 + force_endpoint: bool, 14 + config: C, 15 + ) -> StorageResult<(R, W, Option<Cursor>)> 16 + where 17 + Self: Sized; 18 + } 19 + 20 + pub trait StoreWriter: Send + Sync { 21 + fn insert_batch<const LIMIT: usize>( 22 + &mut self, 23 + event_batch: EventBatch<LIMIT>, 24 + ) -> StorageResult<()>; 25 + 26 + fn step_rollup(&mut self) -> StorageResult<usize>; 27 + 28 + fn trim_collection(&mut self, collection: &Nsid, limit: usize) -> StorageResult<()>; 29 + 30 + fn delete_account(&mut self, did: &Did) -> StorageResult<usize>; 31 + } 32 + 33 + #[async_trait] 34 + pub trait StoreReader: Send + Sync { 35 + async fn get_storage_stats(&self) -> StorageResult<serde_json::Value>; 36 + 37 + async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo>; 38 + 39 + async fn get_top_collections(&self) -> StorageResult<TopCollections>; 40 + 41 + async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)>; 42 + 43 + async fn get_records_by_collections( 44 + &self, 45 + collections: &[Nsid], 46 + limit: usize, 47 + expand_each_collection: bool, 48 + ) -> StorageResult<Vec<UFOsRecord>>; 49 + }
+1795
ufos/src/storage_fjall.rs
··· 1 + use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr}; 2 + use crate::error::StorageError; 3 + use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter}; 4 + use crate::store_types::{ 5 + AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal, 6 + HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue, 7 + JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey, 8 + NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey, 9 + RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue, 10 + WeekTruncatedCursor, WeeklyRollupKey, 11 + }; 12 + use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord}; 13 + use async_trait::async_trait; 14 + use fjall::{Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle}; 15 + use jetstream::events::Cursor; 16 + use std::collections::HashMap; 17 + use std::path::Path; 18 + use std::time::SystemTime; 19 + 20 + const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds 21 + const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024; 22 + const MAX_BATCHED_ROLLUP_COUNTS: usize = 256; 23 + 24 + /// 25 + /// new data format, roughly: 26 + /// 27 + /// Partion: 'global' 28 + /// 29 + /// - Global sequence counter (is the jetstream cursor -- monotonic with many gaps) 30 + /// - key: "js_cursor" (literal) 31 + /// - val: u64 32 + /// 33 + /// - Jetstream server endpoint (persisted because the cursor can't be used on another instance without data loss) 34 + /// - key: "js_endpoint" (literal) 35 + /// - val: string (URL of the instance) 36 + /// 37 + /// - Launch date 38 + /// - key: "takeoff" (literal) 39 + /// - val: u64 (micros timestamp, not from jetstream for now so not precise) 40 + /// 41 + /// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes) 42 + /// - key: "rollup_cursor" (literal) 43 + /// - val: u64 (tracks behind js_cursor) 44 + /// 45 + /// 46 + /// Partition: 'feed' 47 + /// 48 + /// - Per-collection list of record references ordered by jetstream cursor 49 + /// - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor) 50 + /// - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.) 51 + /// 52 + /// 53 + /// Partition: 'records' 54 + /// 55 + /// - Actual records by their atproto location 56 + /// - key: nullstr || nullstr || nullstr (did, collection, rkey) 57 + /// - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record) 58 + /// 59 + /// 60 + /// Partition: 'rollups' 61 + /// 62 + /// - Live (batched) records counts and dids estimate per collection 63 + /// - key: "live_counts" || u64 || nullstr (js_cursor, nsid) 64 + /// - val: u64 || HLL (count (not cursor), estimator) 65 + /// 66 + /// - Hourly total record counts and dids estimate per collection 67 + /// - key: "hourly_counts" || u64 || nullstr (hour, nsid) 68 + /// - val: u64 || HLL (count (not cursor), estimator) 69 + /// 70 + /// - Weekly total record counts and dids estimate per collection 71 + /// - key: "weekly_counts" || u64 || nullstr (hour, nsid) 72 + /// - val: u64 || HLL (count (not cursor), estimator) 73 + /// 74 + /// - All-time total record counts and dids estimate per collection 75 + /// - key: "ever_counts" || nullstr (nsid) 76 + /// - val: u64 || HLL (count (not cursor), estimator) 77 + /// 78 + /// - TODO: sorted indexes for all-times? 79 + /// 80 + /// 81 + /// Partition: 'queues' 82 + /// 83 + /// - Delete account queue 84 + /// - key: "delete_acount" || u64 (js_cursor) 85 + /// - val: nullstr (did) 86 + /// 87 + /// 88 + /// TODO: moderation actions 89 + /// TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy fetching + caching on read. 90 + #[derive(Debug)] 91 + pub struct FjallStorage {} 92 + 93 + #[derive(Debug, Default)] 94 + pub struct FjallConfig { 95 + /// drop the db when the storage is dropped 96 + /// 97 + /// this is only meant for tests 98 + #[cfg(test)] 99 + pub temp: bool, 100 + } 101 + 102 + impl StorageWhatever<FjallReader, FjallWriter, FjallConfig> for FjallStorage { 103 + fn init( 104 + path: impl AsRef<Path>, 105 + endpoint: String, 106 + force_endpoint: bool, 107 + _config: FjallConfig, 108 + ) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>)> { 109 + let keyspace = { 110 + let config = Config::new(path); 111 + 112 + #[cfg(not(test))] 113 + let config = config.fsync_ms(Some(4_000)); 114 + 115 + config.open()? 116 + }; 117 + 118 + let global = keyspace.open_partition("global", PartitionCreateOptions::default())?; 119 + let feeds = keyspace.open_partition("feeds", PartitionCreateOptions::default())?; 120 + let records = keyspace.open_partition("records", PartitionCreateOptions::default())?; 121 + let rollups = keyspace.open_partition("rollups", PartitionCreateOptions::default())?; 122 + let queues = keyspace.open_partition("queues", PartitionCreateOptions::default())?; 123 + 124 + let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?; 125 + 126 + if js_cursor.is_some() { 127 + let stored_endpoint = 128 + get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?; 129 + 130 + let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError( 131 + "found cursor but missing js_endpoint, refusing to start.".to_string(), 132 + ))?; 133 + 134 + if stored != endpoint { 135 + if force_endpoint { 136 + log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}"); 137 + insert_static_neu::<JetstreamEndpointKey>( 138 + &global, 139 + JetstreamEndpointValue(endpoint.to_string()), 140 + )?; 141 + } else { 142 + return Err(StorageError::InitError(format!( 143 + "stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start."))); 144 + } 145 + } 146 + } else { 147 + insert_static_neu::<JetstreamEndpointKey>( 148 + &global, 149 + JetstreamEndpointValue(endpoint.to_string()), 150 + )?; 151 + insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?; 152 + insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?; 153 + } 154 + 155 + let reader = FjallReader { 156 + keyspace: keyspace.clone(), 157 + global: global.clone(), 158 + feeds: feeds.clone(), 159 + records: records.clone(), 160 + rollups: rollups.clone(), 161 + }; 162 + let writer = FjallWriter { 163 + keyspace, 164 + global, 165 + feeds, 166 + records, 167 + rollups, 168 + queues, 169 + }; 170 + Ok((reader, writer, js_cursor)) 171 + } 172 + } 173 + 174 + type FjallRKV = fjall::Result<(fjall::Slice, fjall::Slice)>; 175 + 176 + #[derive(Clone)] 177 + pub struct FjallReader { 178 + keyspace: Keyspace, 179 + global: PartitionHandle, 180 + feeds: PartitionHandle, 181 + records: PartitionHandle, 182 + rollups: PartitionHandle, 183 + } 184 + 185 + /// An iterator that knows how to skip over deleted/invalidated records 186 + struct RecordIterator { 187 + db_iter: Box<dyn Iterator<Item = FjallRKV>>, 188 + records: PartitionHandle, 189 + limit: usize, 190 + fetched: usize, 191 + } 192 + impl RecordIterator { 193 + pub fn new( 194 + feeds: &PartitionHandle, 195 + records: PartitionHandle, 196 + collection: &Nsid, 197 + limit: usize, 198 + ) -> StorageResult<Self> { 199 + let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?; 200 + let db_iter = feeds.prefix(prefix).rev(); 201 + Ok(Self { 202 + db_iter: Box::new(db_iter), 203 + records, 204 + limit, 205 + fetched: 0, 206 + }) 207 + } 208 + fn get_record(&self, db_next: FjallRKV) -> StorageResult<Option<UFOsRecord>> { 209 + let (key_bytes, val_bytes) = db_next?; 210 + let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?; 211 + let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?; 212 + let location_key: RecordLocationKey = (&feed_key, &feed_val).into(); 213 + 214 + let Some(location_val_bytes) = self.records.get(location_key.to_db_bytes()?)? else { 215 + // record was deleted (hopefully) 216 + return Ok(None); 217 + }; 218 + 219 + let (meta, n) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?; 220 + 221 + if meta.cursor() != feed_key.cursor() { 222 + // older/different version 223 + return Ok(None); 224 + } 225 + if meta.rev != feed_val.rev() { 226 + // weird... 227 + log::warn!("record lookup: cursor match but rev did not...? excluding."); 228 + return Ok(None); 229 + } 230 + let Some(raw_value_bytes) = location_val_bytes.get(n..) else { 231 + log::warn!( 232 + "record lookup: found record but could not get bytes to decode the record??" 233 + ); 234 + return Ok(None); 235 + }; 236 + let rawval = db_complete::<RecordRawValue>(raw_value_bytes)?; 237 + Ok(Some(UFOsRecord { 238 + collection: feed_key.collection().clone(), 239 + cursor: feed_key.cursor(), 240 + did: feed_val.did().clone(), 241 + rkey: feed_val.rkey().clone(), 242 + rev: meta.rev.to_string(), 243 + record: rawval.try_into()?, 244 + is_update: meta.is_update, 245 + })) 246 + } 247 + } 248 + impl Iterator for RecordIterator { 249 + type Item = StorageResult<Option<UFOsRecord>>; 250 + fn next(&mut self) -> Option<Self::Item> { 251 + if self.fetched == self.limit { 252 + return Some(Ok(None)); 253 + } 254 + let record = loop { 255 + let db_next = self.db_iter.next()?; // None short-circuits here 256 + match self.get_record(db_next) { 257 + Err(e) => return Some(Err(e)), 258 + Ok(Some(record)) => break record, 259 + Ok(None) => continue, 260 + } 261 + }; 262 + self.fetched += 1; 263 + Some(Ok(Some(record))) 264 + } 265 + } 266 + 267 + impl FjallReader { 268 + fn get_storage_stats(&self) -> StorageResult<serde_json::Value> { 269 + let rollup_cursor = 270 + get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)? 271 + .map(|c| c.to_raw_u64()); 272 + 273 + Ok(serde_json::json!({ 274 + "keyspace_disk_space": self.keyspace.disk_space(), 275 + "keyspace_journal_count": self.keyspace.journal_count(), 276 + "keyspace_sequence": self.keyspace.instant(), 277 + "rollup_cursor": rollup_cursor, 278 + })) 279 + } 280 + 281 + fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> { 282 + let global = self.global.snapshot(); 283 + 284 + let endpoint = 285 + get_snapshot_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)? 286 + .ok_or(StorageError::BadStateError( 287 + "Could not find jetstream endpoint".to_string(), 288 + ))? 289 + .0; 290 + 291 + let started_at = get_snapshot_static_neu::<TakeoffKey, TakeoffValue>(&global)? 292 + .ok_or(StorageError::BadStateError( 293 + "Could not find jetstream takeoff time".to_string(), 294 + ))? 295 + .to_raw_u64(); 296 + 297 + let latest_cursor = 298 + get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)? 299 + .map(|c| c.to_raw_u64()); 300 + 301 + Ok(ConsumerInfo::Jetstream { 302 + endpoint, 303 + started_at, 304 + latest_cursor, 305 + }) 306 + } 307 + 308 + fn get_top_collections(&self) -> Result<TopCollections, StorageError> { 309 + // TODO: limit nsid traversal depth 310 + // TODO: limit nsid traversal breadth 311 + // TODO: be serious about anything 312 + 313 + // TODO: probably use a stack of segments to reduce to ~log-n merges 314 + 315 + #[derive(Default)] 316 + struct Blah { 317 + counts: CountsValue, 318 + children: HashMap<String, Blah>, 319 + } 320 + impl From<&Blah> for TopCollections { 321 + fn from(bla: &Blah) -> Self { 322 + Self { 323 + total_records: bla.counts.records(), 324 + dids_estimate: bla.counts.dids().estimate() as u64, 325 + nsid_child_segments: HashMap::from_iter( 326 + bla.children.iter().map(|(k, v)| (k.to_string(), v.into())), 327 + ), 328 + } 329 + } 330 + } 331 + 332 + let mut b = Blah::default(); 333 + let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?; 334 + for kv in self.rollups.prefix(&prefix.to_db_bytes()?) { 335 + let (key_bytes, val_bytes) = kv?; 336 + let key = db_complete::<AllTimeRollupKey>(&key_bytes)?; 337 + let val = db_complete::<CountsValue>(&val_bytes)?; 338 + 339 + let mut node = &mut b; 340 + node.counts.merge(&val); 341 + for segment in key.collection().split('.') { 342 + node = node.children.entry(segment.to_string()).or_default(); 343 + node.counts.merge(&val); 344 + } 345 + } 346 + 347 + Ok((&b).into()) 348 + } 349 + 350 + fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> { 351 + // 0. grab a snapshot in case rollups happen while we're working 352 + let instant = self.keyspace.instant(); 353 + let global = self.global.snapshot_at(instant); 354 + let rollups = self.rollups.snapshot_at(instant); 355 + 356 + // 1. all-time counts 357 + let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?; 358 + let mut total_counts = rollups 359 + .get(&all_time_key)? 360 + .as_deref() 361 + .map(db_complete::<CountsValue>) 362 + .transpose()? 363 + .unwrap_or_default(); 364 + 365 + // 2. live counts that haven't been rolled into all-time yet. 366 + let rollup_cursor = 367 + get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or( 368 + StorageError::BadStateError("Could not find current rollup cursor".to_string()), 369 + )?; 370 + 371 + let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?; 372 + for kv in rollups.range(full_range) { 373 + let (key_bytes, val_bytes) = kv?; 374 + let key = db_complete::<LiveCountsKey>(&key_bytes)?; 375 + if key.collection() == collection { 376 + let counts = db_complete::<CountsValue>(&val_bytes)?; 377 + total_counts.merge(&counts); 378 + } 379 + } 380 + Ok(( 381 + total_counts.records(), 382 + total_counts.dids().estimate() as u64, 383 + )) 384 + } 385 + 386 + fn get_records_by_collections( 387 + &self, 388 + collections: &[Nsid], 389 + limit: usize, 390 + expand_each_collection: bool, 391 + ) -> StorageResult<Vec<UFOsRecord>> { 392 + if collections.is_empty() { 393 + return Ok(vec![]); 394 + } 395 + let mut record_iterators = Vec::new(); 396 + for collection in collections { 397 + let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?; 398 + record_iterators.push(iter.peekable()); 399 + } 400 + let mut merged = Vec::new(); 401 + loop { 402 + let mut latest: Option<(Cursor, usize)> = None; // ugh 403 + for (i, iter) in record_iterators.iter_mut().enumerate() { 404 + let Some(it) = iter.peek_mut() else { 405 + continue; 406 + }; 407 + let it = match it { 408 + Ok(v) => v, 409 + Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?, 410 + }; 411 + let Some(rec) = it else { 412 + if expand_each_collection { 413 + continue; 414 + } else { 415 + break; 416 + } 417 + }; 418 + if let Some((cursor, _)) = latest { 419 + if rec.cursor > cursor { 420 + latest = Some((rec.cursor, i)) 421 + } 422 + } else { 423 + latest = Some((rec.cursor, i)); 424 + } 425 + } 426 + let Some((_, idx)) = latest else { 427 + break; 428 + }; 429 + // yeah yeah whateverrrrrrrrrrrrrrrr 430 + merged.push(record_iterators[idx].next().unwrap().unwrap().unwrap()); 431 + } 432 + Ok(merged) 433 + } 434 + } 435 + 436 + #[async_trait] 437 + impl StoreReader for FjallReader { 438 + async fn get_storage_stats(&self) -> StorageResult<serde_json::Value> { 439 + let s = self.clone(); 440 + tokio::task::spawn_blocking(move || FjallReader::get_storage_stats(&s)).await? 441 + } 442 + async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> { 443 + let s = self.clone(); 444 + tokio::task::spawn_blocking(move || FjallReader::get_consumer_info(&s)).await? 445 + } 446 + async fn get_top_collections(&self) -> Result<TopCollections, StorageError> { 447 + let s = self.clone(); 448 + tokio::task::spawn_blocking(move || FjallReader::get_top_collections(&s)).await? 449 + } 450 + async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> { 451 + let s = self.clone(); 452 + let collection = collection.clone(); 453 + tokio::task::spawn_blocking(move || FjallReader::get_counts_by_collection(&s, &collection)) 454 + .await? 455 + } 456 + async fn get_records_by_collections( 457 + &self, 458 + collections: &[Nsid], 459 + limit: usize, 460 + expand_each_collection: bool, 461 + ) -> StorageResult<Vec<UFOsRecord>> { 462 + let s = self.clone(); 463 + let collections = collections.to_vec(); 464 + tokio::task::spawn_blocking(move || { 465 + FjallReader::get_records_by_collections(&s, &collections, limit, expand_each_collection) 466 + }) 467 + .await? 468 + } 469 + } 470 + 471 + pub struct FjallWriter { 472 + keyspace: Keyspace, 473 + global: PartitionHandle, 474 + feeds: PartitionHandle, 475 + records: PartitionHandle, 476 + rollups: PartitionHandle, 477 + queues: PartitionHandle, 478 + } 479 + 480 + impl FjallWriter { 481 + fn rollup_delete_account( 482 + &mut self, 483 + cursor: Cursor, 484 + key_bytes: &[u8], 485 + val_bytes: &[u8], 486 + ) -> StorageResult<usize> { 487 + let did = db_complete::<DeleteAccountQueueVal>(val_bytes)?; 488 + self.delete_account(&did)?; 489 + let mut batch = self.keyspace.batch(); 490 + batch.remove(&self.queues, key_bytes); 491 + insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, cursor)?; 492 + batch.commit()?; 493 + Ok(1) 494 + } 495 + 496 + fn rollup_live_counts( 497 + &mut self, 498 + timelies: impl Iterator<Item = Result<(fjall::Slice, fjall::Slice), fjall::Error>>, 499 + cursor_exclusive_limit: Option<Cursor>, 500 + rollup_limit: usize, 501 + ) -> StorageResult<usize> { 502 + // current strategy is to buffer counts in mem before writing the rollups 503 + // we *could* read+write every single batch to rollup.. but their merge is associative so 504 + // ...so save the db some work up front? is this worth it? who knows... 505 + 506 + #[derive(Eq, Hash, PartialEq)] 507 + enum Rollup { 508 + Hourly(HourTruncatedCursor), 509 + Weekly(WeekTruncatedCursor), 510 + AllTime, 511 + } 512 + 513 + let mut batch = self.keyspace.batch(); 514 + let mut cursors_advanced = 0; 515 + let mut last_cursor = Cursor::from_start(); 516 + let mut counts_by_rollup: HashMap<(Nsid, Rollup), CountsValue> = HashMap::new(); 517 + 518 + for (i, kv) in timelies.enumerate() { 519 + if i >= rollup_limit { 520 + break; 521 + } 522 + 523 + let (key_bytes, val_bytes) = kv?; 524 + let key = db_complete::<LiveCountsKey>(&key_bytes)?; 525 + 526 + if cursor_exclusive_limit 527 + .map(|limit| key.cursor() > limit) 528 + .unwrap_or(false) 529 + { 530 + break; 531 + } 532 + 533 + batch.remove(&self.rollups, key_bytes); 534 + let val = db_complete::<CountsValue>(&val_bytes)?; 535 + counts_by_rollup 536 + .entry(( 537 + key.collection().clone(), 538 + Rollup::Hourly(key.cursor().into()), 539 + )) 540 + .or_default() 541 + .merge(&val); 542 + counts_by_rollup 543 + .entry(( 544 + key.collection().clone(), 545 + Rollup::Weekly(key.cursor().into()), 546 + )) 547 + .or_default() 548 + .merge(&val); 549 + counts_by_rollup 550 + .entry((key.collection().clone(), Rollup::AllTime)) 551 + .or_default() 552 + .merge(&val); 553 + 554 + cursors_advanced += 1; 555 + last_cursor = key.cursor(); 556 + } 557 + 558 + for ((nsid, rollup), counts) in counts_by_rollup { 559 + let key_bytes = match rollup { 560 + Rollup::Hourly(hourly_cursor) => { 561 + let k = HourlyRollupKey::new(hourly_cursor, &nsid); 562 + k.to_db_bytes()? 563 + } 564 + Rollup::Weekly(weekly_cursor) => { 565 + let k = WeeklyRollupKey::new(weekly_cursor, &nsid); 566 + k.to_db_bytes()? 567 + } 568 + Rollup::AllTime => { 569 + let k = AllTimeRollupKey::new(&nsid); 570 + k.to_db_bytes()? 571 + } 572 + }; 573 + let mut rolled: CountsValue = self 574 + .rollups 575 + .get(&key_bytes)? 576 + .as_deref() 577 + .map(db_complete::<CountsValue>) 578 + .transpose()? 579 + .unwrap_or_default(); 580 + 581 + // try to round-trip before inserting, for funsies 582 + let tripppin = counts.to_db_bytes()?; 583 + let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?; 584 + assert_eq!(n, tripppin.len()); 585 + assert_eq!(counts.prefix, and_back.prefix); 586 + assert_eq!(counts.dids().estimate(), and_back.dids().estimate()); 587 + if counts.records() > 200_000_000_000 { 588 + panic!("COUNTS maybe wtf? {counts:?}") 589 + } 590 + 591 + rolled.merge(&counts); 592 + batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?); 593 + } 594 + 595 + insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)?; 596 + 597 + batch.commit()?; 598 + Ok(cursors_advanced) 599 + } 600 + } 601 + 602 + impl StoreWriter for FjallWriter { 603 + fn insert_batch<const LIMIT: usize>( 604 + &mut self, 605 + event_batch: EventBatch<LIMIT>, 606 + ) -> StorageResult<()> { 607 + if event_batch.is_empty() { 608 + return Ok(()); 609 + } 610 + 611 + let mut batch = self.keyspace.batch(); 612 + 613 + // would be nice not to have to iterate everything at once here 614 + let latest = event_batch.latest_cursor().unwrap(); 615 + 616 + for (nsid, commits) in event_batch.commits_by_nsid { 617 + for commit in commits.commits { 618 + let location_key: RecordLocationKey = (&commit, &nsid).into(); 619 + 620 + match commit.action { 621 + CommitAction::Cut => { 622 + batch.remove(&self.records, &location_key.to_db_bytes()?); 623 + } 624 + CommitAction::Put(put_action) => { 625 + let feed_key = NsidRecordFeedKey::from_pair(nsid.clone(), commit.cursor); 626 + let feed_val: NsidRecordFeedVal = 627 + (&commit.did, &commit.rkey, commit.rev.as_str()).into(); 628 + batch.insert( 629 + &self.feeds, 630 + feed_key.to_db_bytes()?, 631 + feed_val.to_db_bytes()?, 632 + ); 633 + 634 + let location_val: RecordLocationVal = 635 + (commit.cursor, commit.rev.as_str(), put_action).into(); 636 + batch.insert( 637 + &self.records, 638 + &location_key.to_db_bytes()?, 639 + &location_val.to_db_bytes()?, 640 + ); 641 + } 642 + } 643 + } 644 + let live_counts_key: LiveCountsKey = (latest, &nsid).into(); 645 + let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate); 646 + batch.insert( 647 + &self.rollups, 648 + &live_counts_key.to_db_bytes()?, 649 + &counts_value.to_db_bytes()?, 650 + ); 651 + } 652 + 653 + for remove in event_batch.account_removes { 654 + let queue_key = DeleteAccountQueueKey::new(remove.cursor); 655 + let queue_val: DeleteAccountQueueVal = remove.did; 656 + batch.insert( 657 + &self.queues, 658 + &queue_key.to_db_bytes()?, 659 + &queue_val.to_db_bytes()?, 660 + ); 661 + } 662 + 663 + batch.insert( 664 + &self.global, 665 + DbStaticStr::<JetstreamCursorKey>::default().to_db_bytes()?, 666 + latest.to_db_bytes()?, 667 + ); 668 + 669 + batch.commit()?; 670 + Ok(()) 671 + } 672 + 673 + fn step_rollup(&mut self) -> StorageResult<usize> { 674 + let rollup_cursor = 675 + get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?.ok_or( 676 + StorageError::BadStateError("Could not find current rollup cursor".to_string()), 677 + )?; 678 + 679 + // timelies 680 + let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)?; 681 + let mut timely_iter = self.rollups.range(live_counts_range).peekable(); 682 + 683 + let timely_next_cursor = timely_iter 684 + .peek_mut() 685 + .map(|kv| -> StorageResult<Cursor> { 686 + match kv { 687 + Err(e) => Err(std::mem::replace(e, fjall::Error::Poisoned))?, 688 + Ok((key_bytes, _)) => { 689 + let key = db_complete::<LiveCountsKey>(key_bytes)?; 690 + Ok(key.cursor()) 691 + } 692 + } 693 + }) 694 + .transpose()?; 695 + 696 + // delete accounts 697 + let delete_accounts_range = 698 + DeleteAccountQueueKey::new(rollup_cursor).range_to_prefix_end()?; 699 + 700 + let next_delete = self 701 + .queues 702 + .range(delete_accounts_range) 703 + .next() 704 + .transpose()? 705 + .map(|(key_bytes, val_bytes)| { 706 + db_complete::<DeleteAccountQueueKey>(&key_bytes) 707 + .map(|k| (k.suffix, key_bytes, val_bytes)) 708 + }) 709 + .transpose()?; 710 + 711 + let cursors_stepped = match (timely_next_cursor, next_delete) { 712 + ( 713 + Some(timely_next_cursor), 714 + Some((delete_cursor, delete_key_bytes, delete_val_bytes)), 715 + ) => { 716 + if timely_next_cursor < delete_cursor { 717 + self.rollup_live_counts( 718 + timely_iter, 719 + Some(delete_cursor), 720 + MAX_BATCHED_ROLLUP_COUNTS, 721 + )? 722 + } else { 723 + self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)? 724 + } 725 + } 726 + (Some(_), None) => { 727 + self.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)? 728 + } 729 + (None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => { 730 + self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)? 731 + } 732 + (None, None) => 0, 733 + }; 734 + 735 + Ok(cursors_stepped) 736 + } 737 + 738 + fn trim_collection( 739 + &mut self, 740 + collection: &Nsid, 741 + limit: usize, 742 + // TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end) 743 + ) -> StorageResult<()> { 744 + let mut dangling_feed_keys_cleaned = 0; 745 + let mut records_deleted = 0; 746 + 747 + let mut batch = self.keyspace.batch(); 748 + 749 + let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?; 750 + let mut found = 0; 751 + for kv in self.feeds.prefix(prefix).rev() { 752 + let (key_bytes, val_bytes) = kv?; 753 + let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?; 754 + let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?; 755 + let location_key: RecordLocationKey = (&feed_key, &feed_val).into(); 756 + let location_key_bytes = location_key.to_db_bytes()?; 757 + 758 + let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else { 759 + // record was deleted (hopefully) 760 + batch.remove(&self.feeds, &location_key_bytes); 761 + dangling_feed_keys_cleaned += 1; 762 + continue; 763 + }; 764 + 765 + let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?; 766 + 767 + if meta.cursor() != feed_key.cursor() { 768 + // older/different version 769 + batch.remove(&self.feeds, &location_key_bytes); 770 + dangling_feed_keys_cleaned += 1; 771 + continue; 772 + } 773 + if meta.rev != feed_val.rev() { 774 + // weird... 775 + log::warn!("record lookup: cursor match but rev did not...? removing."); 776 + batch.remove(&self.feeds, &location_key_bytes); 777 + dangling_feed_keys_cleaned += 1; 778 + continue; 779 + } 780 + 781 + if batch.len() >= MAX_BATCHED_CLEANUP_SIZE { 782 + batch.commit()?; 783 + batch = self.keyspace.batch(); 784 + } 785 + 786 + found += 1; 787 + if found <= limit { 788 + continue; 789 + } 790 + 791 + batch.remove(&self.feeds, &location_key_bytes); 792 + batch.remove(&self.records, &location_key_bytes); 793 + records_deleted += 1; 794 + } 795 + 796 + batch.commit()?; 797 + 798 + log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records"); 799 + Ok(()) 800 + } 801 + 802 + fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> { 803 + let mut records_deleted = 0; 804 + let mut batch = self.keyspace.batch(); 805 + let prefix = RecordLocationKey::from_prefix_to_db_bytes(did)?; 806 + for kv in self.records.prefix(prefix) { 807 + let (key_bytes, _) = kv?; 808 + batch.remove(&self.records, key_bytes); 809 + records_deleted += 1; 810 + if batch.len() >= MAX_BATCHED_ACCOUNT_DELETE_RECORDS { 811 + batch.commit()?; 812 + batch = self.keyspace.batch(); 813 + } 814 + } 815 + batch.commit()?; 816 + Ok(records_deleted) 817 + } 818 + } 819 + 820 + /// Get a value from a fixed key 821 + fn get_static_neu<K: StaticStr, V: DbBytes>(global: &PartitionHandle) -> StorageResult<Option<V>> { 822 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 823 + let value = global 824 + .get(&key_bytes)? 825 + .map(|value_bytes| db_complete(&value_bytes)) 826 + .transpose()?; 827 + Ok(value) 828 + } 829 + 830 + /// Get a value from a fixed key 831 + fn get_snapshot_static_neu<K: StaticStr, V: DbBytes>( 832 + global: &fjall::Snapshot, 833 + ) -> StorageResult<Option<V>> { 834 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 835 + let value = global 836 + .get(&key_bytes)? 837 + .map(|value_bytes| db_complete(&value_bytes)) 838 + .transpose()?; 839 + Ok(value) 840 + } 841 + 842 + /// Set a value to a fixed key 843 + fn insert_static_neu<K: StaticStr>( 844 + global: &PartitionHandle, 845 + value: impl DbBytes, 846 + ) -> StorageResult<()> { 847 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 848 + let value_bytes = value.to_db_bytes()?; 849 + global.insert(&key_bytes, &value_bytes)?; 850 + Ok(()) 851 + } 852 + 853 + /// Set a value to a fixed key 854 + fn insert_batch_static_neu<K: StaticStr>( 855 + batch: &mut FjallBatch, 856 + global: &PartitionHandle, 857 + value: impl DbBytes, 858 + ) -> StorageResult<()> { 859 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 860 + let value_bytes = value.to_db_bytes()?; 861 + batch.insert(global, &key_bytes, &value_bytes); 862 + Ok(()) 863 + } 864 + 865 + #[derive(Debug, serde::Serialize, schemars::JsonSchema)] 866 + pub struct StorageInfo { 867 + pub keyspace_disk_space: u64, 868 + pub keyspace_journal_count: usize, 869 + pub keyspace_sequence: u64, 870 + pub global_approximate_len: usize, 871 + } 872 + 873 + ////////// temp stuff to remove: 874 + 875 + // fn summarize_batch<const LIMIT: usize>(batch: &EventBatch<LIMIT>) -> String { 876 + // format!( 877 + // "batch of {: >3} samples from {: >4} records in {: >2} collections from ~{: >4} DIDs, {} acct removes, cursor {: <12?}", 878 + // batch.total_records(), 879 + // batch.total_seen(), 880 + // batch.total_collections(), 881 + // batch.estimate_dids(), 882 + // batch.account_removes(), 883 + // batch.latest_cursor().map(|c| c.elapsed()), 884 + // ) 885 + // } 886 + 887 + #[cfg(test)] 888 + mod tests { 889 + use super::*; 890 + use crate::{DeleteAccount, RecordKey, UFOsCommit}; 891 + use jetstream::events::{CommitEvent, CommitOp}; 892 + use jetstream::exports::Cid; 893 + use serde_json::value::RawValue; 894 + 895 + fn fjall_db() -> (FjallReader, FjallWriter) { 896 + let (read, write, _) = FjallStorage::init( 897 + tempfile::tempdir().unwrap(), 898 + "offline test (no real jetstream endpoint)".to_string(), 899 + false, 900 + FjallConfig { temp: true }, 901 + ) 902 + .unwrap(); 903 + (read, write) 904 + } 905 + 906 + const TEST_BATCH_LIMIT: usize = 16; 907 + 908 + #[derive(Debug, Default)] 909 + struct TestBatch { 910 + pub batch: EventBatch<TEST_BATCH_LIMIT>, 911 + } 912 + 913 + impl TestBatch { 914 + #[allow(clippy::too_many_arguments)] 915 + pub fn create( 916 + &mut self, 917 + did: &str, 918 + collection: &str, 919 + rkey: &str, 920 + record: &str, 921 + rev: Option<&str>, 922 + cid: Option<Cid>, 923 + cursor: u64, 924 + ) -> Nsid { 925 + let did = Did::new(did.to_string()).unwrap(); 926 + let collection = Nsid::new(collection.to_string()).unwrap(); 927 + let record = RawValue::from_string(record.to_string()).unwrap(); 928 + let cid = cid.unwrap_or( 929 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy" 930 + .parse() 931 + .unwrap(), 932 + ); 933 + 934 + let event = CommitEvent { 935 + collection, 936 + rkey: RecordKey::new(rkey.to_string()).unwrap(), 937 + rev: rev.unwrap_or("asdf").to_string(), 938 + operation: CommitOp::Create, 939 + record: Some(record), 940 + cid: Some(cid), 941 + }; 942 + 943 + let (commit, collection) = 944 + UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor)) 945 + .unwrap(); 946 + 947 + self.batch 948 + .commits_by_nsid 949 + .entry(collection.clone()) 950 + .or_default() 951 + .truncating_insert(commit) 952 + .unwrap(); 953 + 954 + collection 955 + } 956 + #[allow(clippy::too_many_arguments)] 957 + pub fn update( 958 + &mut self, 959 + did: &str, 960 + collection: &str, 961 + rkey: &str, 962 + record: &str, 963 + rev: Option<&str>, 964 + cid: Option<Cid>, 965 + cursor: u64, 966 + ) -> Nsid { 967 + let did = Did::new(did.to_string()).unwrap(); 968 + let collection = Nsid::new(collection.to_string()).unwrap(); 969 + let record = RawValue::from_string(record.to_string()).unwrap(); 970 + let cid = cid.unwrap_or( 971 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy" 972 + .parse() 973 + .unwrap(), 974 + ); 975 + 976 + let event = CommitEvent { 977 + collection, 978 + rkey: RecordKey::new(rkey.to_string()).unwrap(), 979 + rev: rev.unwrap_or("asdf").to_string(), 980 + operation: CommitOp::Update, 981 + record: Some(record), 982 + cid: Some(cid), 983 + }; 984 + 985 + let (commit, collection) = 986 + UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor)) 987 + .unwrap(); 988 + 989 + self.batch 990 + .commits_by_nsid 991 + .entry(collection.clone()) 992 + .or_default() 993 + .truncating_insert(commit) 994 + .unwrap(); 995 + 996 + collection 997 + } 998 + #[allow(clippy::too_many_arguments)] 999 + pub fn delete( 1000 + &mut self, 1001 + did: &str, 1002 + collection: &str, 1003 + rkey: &str, 1004 + rev: Option<&str>, 1005 + cursor: u64, 1006 + ) -> Nsid { 1007 + let did = Did::new(did.to_string()).unwrap(); 1008 + let collection = Nsid::new(collection.to_string()).unwrap(); 1009 + let event = CommitEvent { 1010 + collection, 1011 + rkey: RecordKey::new(rkey.to_string()).unwrap(), 1012 + rev: rev.unwrap_or("asdf").to_string(), 1013 + operation: CommitOp::Delete, 1014 + record: None, 1015 + cid: None, 1016 + }; 1017 + 1018 + let (commit, collection) = 1019 + UFOsCommit::from_commit_info(event, did, Cursor::from_raw_u64(cursor)).unwrap(); 1020 + 1021 + self.batch 1022 + .commits_by_nsid 1023 + .entry(collection.clone()) 1024 + .or_default() 1025 + .truncating_insert(commit) 1026 + .unwrap(); 1027 + 1028 + collection 1029 + } 1030 + pub fn delete_account(&mut self, did: &str, cursor: u64) -> Did { 1031 + let did = Did::new(did.to_string()).unwrap(); 1032 + self.batch.account_removes.push(DeleteAccount { 1033 + did: did.clone(), 1034 + cursor: Cursor::from_raw_u64(cursor), 1035 + }); 1036 + did 1037 + } 1038 + } 1039 + 1040 + #[test] 1041 + fn test_hello() -> anyhow::Result<()> { 1042 + let (read, mut write) = fjall_db(); 1043 + write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?; 1044 + let (records, dids) = 1045 + read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?; 1046 + assert_eq!(records, 0); 1047 + assert_eq!(dids, 0); 1048 + Ok(()) 1049 + } 1050 + 1051 + #[test] 1052 + fn test_insert_one() -> anyhow::Result<()> { 1053 + let (read, mut write) = fjall_db(); 1054 + 1055 + let mut batch = TestBatch::default(); 1056 + let collection = batch.create( 1057 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1058 + "a.b.c", 1059 + "asdf", 1060 + "{}", 1061 + Some("rev-z"), 1062 + None, 1063 + 100, 1064 + ); 1065 + write.insert_batch(batch.batch)?; 1066 + 1067 + let (records, dids) = read.get_counts_by_collection(&collection)?; 1068 + assert_eq!(records, 1); 1069 + assert_eq!(dids, 1); 1070 + let (records, dids) = 1071 + read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?; 1072 + assert_eq!(records, 0); 1073 + assert_eq!(dids, 0); 1074 + 1075 + let records = read.get_records_by_collections(&[collection], 2, false)?; 1076 + assert_eq!(records.len(), 1); 1077 + let rec = &records[0]; 1078 + assert_eq!(rec.record.get(), "{}"); 1079 + assert!(!rec.is_update); 1080 + 1081 + let records = 1082 + read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?; 1083 + assert_eq!(records.len(), 0); 1084 + 1085 + Ok(()) 1086 + } 1087 + 1088 + #[test] 1089 + fn test_get_multi_collection() -> anyhow::Result<()> { 1090 + let (read, mut write) = fjall_db(); 1091 + 1092 + let mut batch = TestBatch::default(); 1093 + batch.create( 1094 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1095 + "a.a.a", 1096 + "aaa", 1097 + r#""earliest""#, 1098 + Some("rev-a"), 1099 + None, 1100 + 100, 1101 + ); 1102 + batch.create( 1103 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1104 + "a.a.b", 1105 + "aab", 1106 + r#""in between""#, 1107 + Some("rev-ab"), 1108 + None, 1109 + 101, 1110 + ); 1111 + batch.create( 1112 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1113 + "a.a.a", 1114 + "aaa-2", 1115 + r#""last""#, 1116 + Some("rev-a-2"), 1117 + None, 1118 + 102, 1119 + ); 1120 + write.insert_batch(batch.batch)?; 1121 + 1122 + let records = read.get_records_by_collections( 1123 + &[ 1124 + Nsid::new("a.a.a".to_string()).unwrap(), 1125 + Nsid::new("a.a.b".to_string()).unwrap(), 1126 + Nsid::new("a.a.c".to_string()).unwrap(), 1127 + ], 1128 + 100, 1129 + false, 1130 + )?; 1131 + assert_eq!(records.len(), 3); 1132 + assert_eq!(records[0].record.get(), r#""last""#); 1133 + assert_eq!( 1134 + records[0].collection, 1135 + Nsid::new("a.a.a".to_string()).unwrap() 1136 + ); 1137 + assert_eq!(records[1].record.get(), r#""in between""#); 1138 + assert_eq!( 1139 + records[1].collection, 1140 + Nsid::new("a.a.b".to_string()).unwrap() 1141 + ); 1142 + assert_eq!(records[2].record.get(), r#""earliest""#); 1143 + assert_eq!( 1144 + records[2].collection, 1145 + Nsid::new("a.a.a".to_string()).unwrap() 1146 + ); 1147 + 1148 + Ok(()) 1149 + } 1150 + 1151 + #[test] 1152 + fn test_get_multi_collection_expanded() -> anyhow::Result<()> { 1153 + let (read, mut write) = fjall_db(); 1154 + 1155 + let mut batch = TestBatch::default(); 1156 + // insert some older ones in aab 1157 + for i in 1..=3 { 1158 + batch.create( 1159 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1160 + "a.a.b", 1161 + &format!("aab-{i}"), 1162 + &format!(r#""b {i}""#), 1163 + Some(&format!("rev-b-{i}")), 1164 + None, 1165 + 100 + i, 1166 + ); 1167 + } 1168 + // and some newer ones in aaa 1169 + for i in 1..=3 { 1170 + batch.create( 1171 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1172 + "a.a.a", 1173 + &format!("aaa-{i}"), 1174 + &format!(r#""a {i}""#), 1175 + Some(&format!("rev-a-{i}")), 1176 + None, 1177 + 200 + i, 1178 + ); 1179 + } 1180 + write.insert_batch(batch.batch)?; 1181 + 1182 + let records = read.get_records_by_collections( 1183 + &[ 1184 + Nsid::new("a.a.a".to_string()).unwrap(), 1185 + Nsid::new("a.a.b".to_string()).unwrap(), 1186 + Nsid::new("a.a.c".to_string()).unwrap(), 1187 + ], 1188 + 2, 1189 + true, 1190 + )?; 1191 + assert_eq!(records.len(), 4); 1192 + assert_eq!(records[0].record.get(), r#""a 3""#); 1193 + assert_eq!( 1194 + records[0].collection, 1195 + Nsid::new("a.a.a".to_string()).unwrap() 1196 + ); 1197 + 1198 + assert_eq!(records[3].record.get(), r#""b 2""#); 1199 + assert_eq!( 1200 + records[3].collection, 1201 + Nsid::new("a.a.b".to_string()).unwrap() 1202 + ); 1203 + 1204 + Ok(()) 1205 + } 1206 + 1207 + #[test] 1208 + fn test_update_one() -> anyhow::Result<()> { 1209 + let (read, mut write) = fjall_db(); 1210 + 1211 + let mut batch = TestBatch::default(); 1212 + let collection = batch.create( 1213 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1214 + "a.b.c", 1215 + "rkey-asdf", 1216 + "{}", 1217 + Some("rev-a"), 1218 + None, 1219 + 100, 1220 + ); 1221 + write.insert_batch(batch.batch)?; 1222 + 1223 + let mut batch = TestBatch::default(); 1224 + batch.update( 1225 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1226 + "a.b.c", 1227 + "rkey-asdf", 1228 + r#"{"ch": "ch-ch-ch-changes"}"#, 1229 + Some("rev-z"), 1230 + None, 1231 + 101, 1232 + ); 1233 + write.insert_batch(batch.batch)?; 1234 + 1235 + let (records, dids) = read.get_counts_by_collection(&collection)?; 1236 + assert_eq!(records, 1); 1237 + assert_eq!(dids, 1); 1238 + 1239 + let records = read.get_records_by_collections(&[collection], 2, false)?; 1240 + assert_eq!(records.len(), 1); 1241 + let rec = &records[0]; 1242 + assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#); 1243 + assert!(rec.is_update); 1244 + Ok(()) 1245 + } 1246 + 1247 + #[test] 1248 + fn test_delete_one() -> anyhow::Result<()> { 1249 + let (read, mut write) = fjall_db(); 1250 + 1251 + let mut batch = TestBatch::default(); 1252 + let collection = batch.create( 1253 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1254 + "a.b.c", 1255 + "rkey-asdf", 1256 + "{}", 1257 + Some("rev-a"), 1258 + None, 1259 + 100, 1260 + ); 1261 + write.insert_batch(batch.batch)?; 1262 + 1263 + let mut batch = TestBatch::default(); 1264 + batch.delete( 1265 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1266 + "a.b.c", 1267 + "rkey-asdf", 1268 + Some("rev-z"), 1269 + 101, 1270 + ); 1271 + write.insert_batch(batch.batch)?; 1272 + 1273 + let (records, dids) = read.get_counts_by_collection(&collection)?; 1274 + assert_eq!(records, 1); 1275 + assert_eq!(dids, 1); 1276 + 1277 + let records = read.get_records_by_collections(&[collection], 2, false)?; 1278 + assert_eq!(records.len(), 0); 1279 + 1280 + Ok(()) 1281 + } 1282 + 1283 + #[test] 1284 + fn test_collection_trim() -> anyhow::Result<()> { 1285 + let (read, mut write) = fjall_db(); 1286 + 1287 + let mut batch = TestBatch::default(); 1288 + batch.create( 1289 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1290 + "a.a.a", 1291 + "rkey-aaa", 1292 + "{}", 1293 + Some("rev-aaa"), 1294 + None, 1295 + 10_000, 1296 + ); 1297 + let mut last_b_cursor; 1298 + for i in 1..=10 { 1299 + last_b_cursor = 11_000 + i; 1300 + batch.create( 1301 + &format!("did:plc:inze6wrmsm7pjl7yta3oig7{}", i % 3), 1302 + "a.a.b", 1303 + &format!("rkey-bbb-{i}"), 1304 + &format!(r#"{{"n": {i}}}"#), 1305 + Some(&format!("rev-bbb-{i}")), 1306 + None, 1307 + last_b_cursor, 1308 + ); 1309 + } 1310 + batch.create( 1311 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1312 + "a.a.c", 1313 + "rkey-ccc", 1314 + "{}", 1315 + Some("rev-ccc"), 1316 + None, 1317 + 12_000, 1318 + ); 1319 + 1320 + write.insert_batch(batch.batch)?; 1321 + 1322 + let records = read.get_records_by_collections( 1323 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1324 + 100, 1325 + false, 1326 + )?; 1327 + assert_eq!(records.len(), 1); 1328 + let records = read.get_records_by_collections( 1329 + &[Nsid::new("a.a.b".to_string()).unwrap()], 1330 + 100, 1331 + false, 1332 + )?; 1333 + assert_eq!(records.len(), 10); 1334 + let records = read.get_records_by_collections( 1335 + &[Nsid::new("a.a.c".to_string()).unwrap()], 1336 + 100, 1337 + false, 1338 + )?; 1339 + assert_eq!(records.len(), 1); 1340 + let records = read.get_records_by_collections( 1341 + &[Nsid::new("a.a.d".to_string()).unwrap()], 1342 + 100, 1343 + false, 1344 + )?; 1345 + assert_eq!(records.len(), 0); 1346 + 1347 + write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?; 1348 + write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?; 1349 + write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?; 1350 + write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?; 1351 + 1352 + let records = read.get_records_by_collections( 1353 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1354 + 100, 1355 + false, 1356 + )?; 1357 + assert_eq!(records.len(), 1); 1358 + let records = read.get_records_by_collections( 1359 + &[Nsid::new("a.a.b".to_string()).unwrap()], 1360 + 100, 1361 + false, 1362 + )?; 1363 + assert_eq!(records.len(), 6); 1364 + let records = read.get_records_by_collections( 1365 + &[Nsid::new("a.a.c".to_string()).unwrap()], 1366 + 100, 1367 + false, 1368 + )?; 1369 + assert_eq!(records.len(), 1); 1370 + let records = read.get_records_by_collections( 1371 + &[Nsid::new("a.a.d".to_string()).unwrap()], 1372 + 100, 1373 + false, 1374 + )?; 1375 + assert_eq!(records.len(), 0); 1376 + 1377 + Ok(()) 1378 + } 1379 + 1380 + #[test] 1381 + fn test_delete_account() -> anyhow::Result<()> { 1382 + let (read, mut write) = fjall_db(); 1383 + 1384 + let mut batch = TestBatch::default(); 1385 + batch.create( 1386 + "did:plc:person-a", 1387 + "a.a.a", 1388 + "rkey-aaa", 1389 + "{}", 1390 + Some("rev-aaa"), 1391 + None, 1392 + 10_000, 1393 + ); 1394 + for i in 1..=2 { 1395 + batch.create( 1396 + "did:plc:person-b", 1397 + "a.a.a", 1398 + &format!("rkey-bbb-{i}"), 1399 + &format!(r#"{{"n": {i}}}"#), 1400 + Some(&format!("rev-bbb-{i}")), 1401 + None, 1402 + 11_000 + i, 1403 + ); 1404 + } 1405 + write.insert_batch(batch.batch)?; 1406 + 1407 + let records = read.get_records_by_collections( 1408 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1409 + 100, 1410 + false, 1411 + )?; 1412 + assert_eq!(records.len(), 3); 1413 + 1414 + let records_deleted = 1415 + write.delete_account(&Did::new("did:plc:person-b".to_string()).unwrap())?; 1416 + assert_eq!(records_deleted, 2); 1417 + 1418 + let records = read.get_records_by_collections( 1419 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1420 + 100, 1421 + false, 1422 + )?; 1423 + assert_eq!(records.len(), 1); 1424 + 1425 + Ok(()) 1426 + } 1427 + 1428 + #[test] 1429 + fn rollup_delete_account_removes_record() -> anyhow::Result<()> { 1430 + let (read, mut write) = fjall_db(); 1431 + 1432 + let mut batch = TestBatch::default(); 1433 + batch.create( 1434 + "did:plc:person-a", 1435 + "a.a.a", 1436 + "rkey-aaa", 1437 + "{}", 1438 + Some("rev-aaa"), 1439 + None, 1440 + 10_000, 1441 + ); 1442 + write.insert_batch(batch.batch)?; 1443 + 1444 + let mut batch = TestBatch::default(); 1445 + batch.delete_account("did:plc:person-a", 9_999); // queue it before the rollup 1446 + write.insert_batch(batch.batch)?; 1447 + 1448 + write.step_rollup()?; 1449 + 1450 + let records = 1451 + read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?; 1452 + assert_eq!(records.len(), 0); 1453 + 1454 + Ok(()) 1455 + } 1456 + 1457 + #[test] 1458 + fn rollup_delete_live_count_step() -> anyhow::Result<()> { 1459 + let (read, mut write) = fjall_db(); 1460 + 1461 + let mut batch = TestBatch::default(); 1462 + batch.create( 1463 + "did:plc:person-a", 1464 + "a.a.a", 1465 + "rkey-aaa", 1466 + "{}", 1467 + Some("rev-aaa"), 1468 + None, 1469 + 10_000, 1470 + ); 1471 + write.insert_batch(batch.batch)?; 1472 + 1473 + let n = write.step_rollup()?; 1474 + assert_eq!(n, 1); 1475 + 1476 + let mut batch = TestBatch::default(); 1477 + batch.delete_account("did:plc:person-a", 10_001); 1478 + write.insert_batch(batch.batch)?; 1479 + 1480 + let records = 1481 + read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?; 1482 + assert_eq!(records.len(), 1); 1483 + 1484 + let n = write.step_rollup()?; 1485 + assert_eq!(n, 1); 1486 + 1487 + let records = 1488 + read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?; 1489 + assert_eq!(records.len(), 0); 1490 + 1491 + let mut batch = TestBatch::default(); 1492 + batch.delete_account("did:plc:person-a", 9_999); 1493 + write.insert_batch(batch.batch)?; 1494 + 1495 + let n = write.step_rollup()?; 1496 + assert_eq!(n, 0); 1497 + 1498 + Ok(()) 1499 + } 1500 + 1501 + #[test] 1502 + fn rollup_multiple_count_batches() -> anyhow::Result<()> { 1503 + let (_read, mut write) = fjall_db(); 1504 + 1505 + let mut batch = TestBatch::default(); 1506 + batch.create( 1507 + "did:plc:person-a", 1508 + "a.a.a", 1509 + "rkey-aaa", 1510 + "{}", 1511 + Some("rev-aaa"), 1512 + None, 1513 + 10_000, 1514 + ); 1515 + write.insert_batch(batch.batch)?; 1516 + 1517 + let mut batch = TestBatch::default(); 1518 + batch.create( 1519 + "did:plc:person-a", 1520 + "a.a.a", 1521 + "rkey-aab", 1522 + "{}", 1523 + Some("rev-aab"), 1524 + None, 1525 + 10_001, 1526 + ); 1527 + write.insert_batch(batch.batch)?; 1528 + 1529 + let n = write.step_rollup()?; 1530 + assert_eq!(n, 2); 1531 + 1532 + let n = write.step_rollup()?; 1533 + assert_eq!(n, 0); 1534 + 1535 + Ok(()) 1536 + } 1537 + 1538 + #[test] 1539 + fn counts_before_and_after_rollup() -> anyhow::Result<()> { 1540 + let (read, mut write) = fjall_db(); 1541 + 1542 + let mut batch = TestBatch::default(); 1543 + batch.create( 1544 + "did:plc:person-a", 1545 + "a.a.a", 1546 + "rkey-aaa", 1547 + "{}", 1548 + Some("rev-aaa"), 1549 + None, 1550 + 10_000, 1551 + ); 1552 + batch.create( 1553 + "did:plc:person-b", 1554 + "a.a.a", 1555 + "rkey-bbb", 1556 + "{}", 1557 + Some("rev-bbb"), 1558 + None, 1559 + 10_001, 1560 + ); 1561 + write.insert_batch(batch.batch)?; 1562 + 1563 + let mut batch = TestBatch::default(); 1564 + batch.delete_account("did:plc:person-a", 11_000); 1565 + write.insert_batch(batch.batch)?; 1566 + 1567 + let mut batch = TestBatch::default(); 1568 + batch.create( 1569 + "did:plc:person-a", 1570 + "a.a.a", 1571 + "rkey-aac", 1572 + "{}", 1573 + Some("rev-aac"), 1574 + None, 1575 + 12_000, 1576 + ); 1577 + write.insert_batch(batch.batch)?; 1578 + 1579 + // before any rollup 1580 + let (records, dids) = 1581 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1582 + assert_eq!(records, 3); 1583 + assert_eq!(dids, 2); 1584 + 1585 + // first batch rolled up 1586 + let n = write.step_rollup()?; 1587 + assert_eq!(n, 1); 1588 + 1589 + let (records, dids) = 1590 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1591 + assert_eq!(records, 3); 1592 + assert_eq!(dids, 2); 1593 + 1594 + // delete account rolled up 1595 + let n = write.step_rollup()?; 1596 + assert_eq!(n, 1); 1597 + 1598 + let (records, dids) = 1599 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1600 + assert_eq!(records, 3); 1601 + assert_eq!(dids, 2); 1602 + 1603 + // second batch rolled up 1604 + let n = write.step_rollup()?; 1605 + assert_eq!(n, 1); 1606 + 1607 + let (records, dids) = 1608 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1609 + assert_eq!(records, 3); 1610 + assert_eq!(dids, 2); 1611 + 1612 + // no more rollups left 1613 + let n = write.step_rollup()?; 1614 + assert_eq!(n, 0); 1615 + 1616 + Ok(()) 1617 + } 1618 + 1619 + #[test] 1620 + fn get_top_collections() -> anyhow::Result<()> { 1621 + let (read, mut write) = fjall_db(); 1622 + 1623 + let mut batch = TestBatch::default(); 1624 + batch.create( 1625 + "did:plc:person-a", 1626 + "a.a.a", 1627 + "rkey-aaa", 1628 + "{}", 1629 + Some("rev-aaa"), 1630 + None, 1631 + 10_000, 1632 + ); 1633 + batch.create( 1634 + "did:plc:person-b", 1635 + "a.a.b", 1636 + "rkey-bbb", 1637 + "{}", 1638 + Some("rev-bbb"), 1639 + None, 1640 + 10_001, 1641 + ); 1642 + batch.create( 1643 + "did:plc:person-c", 1644 + "a.b.c", 1645 + "rkey-ccc", 1646 + "{}", 1647 + Some("rev-ccc"), 1648 + None, 1649 + 10_002, 1650 + ); 1651 + batch.create( 1652 + "did:plc:person-a", 1653 + "a.a.a", 1654 + "rkey-aaa-2", 1655 + "{}", 1656 + Some("rev-aaa-2"), 1657 + None, 1658 + 10_003, 1659 + ); 1660 + write.insert_batch(batch.batch)?; 1661 + 1662 + let n = write.step_rollup()?; 1663 + assert_eq!(n, 3); // 3 collections 1664 + 1665 + let tops = read.get_top_collections()?; 1666 + assert_eq!( 1667 + tops, 1668 + TopCollections { 1669 + total_records: 4, 1670 + dids_estimate: 3, 1671 + nsid_child_segments: HashMap::from([( 1672 + "a".to_string(), 1673 + TopCollections { 1674 + total_records: 4, 1675 + dids_estimate: 3, 1676 + nsid_child_segments: HashMap::from([ 1677 + ( 1678 + "a".to_string(), 1679 + TopCollections { 1680 + total_records: 3, 1681 + dids_estimate: 2, 1682 + nsid_child_segments: HashMap::from([ 1683 + ( 1684 + "a".to_string(), 1685 + TopCollections { 1686 + total_records: 2, 1687 + dids_estimate: 1, 1688 + nsid_child_segments: HashMap::from([]), 1689 + }, 1690 + ), 1691 + ( 1692 + "b".to_string(), 1693 + TopCollections { 1694 + total_records: 1, 1695 + dids_estimate: 1, 1696 + nsid_child_segments: HashMap::from([]), 1697 + } 1698 + ), 1699 + ]), 1700 + }, 1701 + ), 1702 + ( 1703 + "b".to_string(), 1704 + TopCollections { 1705 + total_records: 1, 1706 + dids_estimate: 1, 1707 + nsid_child_segments: HashMap::from([( 1708 + "c".to_string(), 1709 + TopCollections { 1710 + total_records: 1, 1711 + dids_estimate: 1, 1712 + nsid_child_segments: HashMap::from([]), 1713 + }, 1714 + ),]), 1715 + }, 1716 + ), 1717 + ]), 1718 + }, 1719 + ),]), 1720 + } 1721 + ); 1722 + Ok(()) 1723 + } 1724 + 1725 + #[test] 1726 + fn get_top_collections_with_parent_nsid() -> anyhow::Result<()> { 1727 + let (read, mut write) = fjall_db(); 1728 + 1729 + let mut batch = TestBatch::default(); 1730 + batch.create( 1731 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1732 + "a.a.a.a", 1733 + "aaaa", 1734 + r#""child nsid""#, 1735 + Some("rev-aaaa"), 1736 + None, 1737 + 100, 1738 + ); 1739 + batch.create( 1740 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1741 + "a.a.a", 1742 + "aaa", 1743 + r#""parent nsid""#, 1744 + Some("rev-aaa"), 1745 + None, 1746 + 101, 1747 + ); 1748 + write.insert_batch(batch.batch)?; 1749 + 1750 + let n = write.step_rollup()?; 1751 + assert_eq!(n, 2); // 3 collections 1752 + 1753 + let tops = read.get_top_collections()?; 1754 + assert_eq!( 1755 + tops, 1756 + TopCollections { 1757 + total_records: 2, 1758 + dids_estimate: 1, 1759 + nsid_child_segments: HashMap::from([( 1760 + "a".to_string(), 1761 + TopCollections { 1762 + total_records: 2, 1763 + dids_estimate: 1, 1764 + nsid_child_segments: HashMap::from([( 1765 + "a".to_string(), 1766 + TopCollections { 1767 + total_records: 2, 1768 + dids_estimate: 1, 1769 + nsid_child_segments: HashMap::from([( 1770 + "a".to_string(), 1771 + TopCollections { 1772 + total_records: 2, 1773 + dids_estimate: 1, 1774 + nsid_child_segments: HashMap::from([( 1775 + "a".to_string(), 1776 + TopCollections { 1777 + total_records: 1, 1778 + dids_estimate: 1, 1779 + nsid_child_segments: HashMap::from([]), 1780 + }, 1781 + ),]), 1782 + }, 1783 + ),]), 1784 + }, 1785 + ),]), 1786 + }, 1787 + ),]), 1788 + } 1789 + ); 1790 + 1791 + // TODO: handle leaf node counts explicitly, since parent NSIDs can be leaves themselves 1792 + 1793 + Ok(()) 1794 + } 1795 + }
+1841
ufos/src/storage_mem.rs
··· 1 + use std::ops::Bound; 2 + use std::sync::Arc; 3 + 4 + use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr}; 5 + use crate::error::StorageError; 6 + use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter}; 7 + use crate::store_types::{ 8 + AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal, 9 + HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue, 10 + JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey, 11 + NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey, 12 + RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue, 13 + WeekTruncatedCursor, WeeklyRollupKey, 14 + }; 15 + use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord}; 16 + use async_trait::async_trait; 17 + use jetstream::events::Cursor; 18 + use lsm_tree::range::prefix_to_range; 19 + use std::collections::BTreeMap; 20 + use std::collections::HashMap; 21 + use std::path::Path; 22 + use std::sync::Mutex; 23 + use std::sync::RwLock; 24 + use std::time::SystemTime; 25 + 26 + const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds 27 + const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024; 28 + const MAX_BATCHED_ROLLUP_COUNTS: usize = 256; 29 + 30 + /// 31 + /// new data format, roughly: 32 + /// 33 + /// Partion: 'global' 34 + /// 35 + /// - Global sequence counter (is the jetstream cursor -- monotonic with many gaps) 36 + /// - key: "js_cursor" (literal) 37 + /// - val: u64 38 + /// 39 + /// - Jetstream server endpoint (persisted because the cursor can't be used on another instance without data loss) 40 + /// - key: "js_endpoint" (literal) 41 + /// - val: string (URL of the instance) 42 + /// 43 + /// - Launch date 44 + /// - key: "takeoff" (literal) 45 + /// - val: u64 (micros timestamp, not from jetstream for now so not precise) 46 + /// 47 + /// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes) 48 + /// - key: "rollup_cursor" (literal) 49 + /// - val: u64 (tracks behind js_cursor) 50 + /// 51 + /// 52 + /// Partition: 'feed' 53 + /// 54 + /// - Per-collection list of record references ordered by jetstream cursor 55 + /// - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor) 56 + /// - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.) 57 + /// 58 + /// 59 + /// Partition: 'records' 60 + /// 61 + /// - Actual records by their atproto location 62 + /// - key: nullstr || nullstr || nullstr (did, collection, rkey) 63 + /// - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record) 64 + /// 65 + /// 66 + /// Partition: 'rollups' 67 + /// 68 + /// - Live (batched) records counts and dids estimate per collection 69 + /// - key: "live_counts" || u64 || nullstr (js_cursor, nsid) 70 + /// - val: u64 || HLL (count (not cursor), estimator) 71 + /// 72 + /// - Hourly total record counts and dids estimate per collection 73 + /// - key: "hourly_counts" || u64 || nullstr (hour, nsid) 74 + /// - val: u64 || HLL (count (not cursor), estimator) 75 + /// 76 + /// - Weekly total record counts and dids estimate per collection 77 + /// - key: "weekly_counts" || u64 || nullstr (hour, nsid) 78 + /// - val: u64 || HLL (count (not cursor), estimator) 79 + /// 80 + /// - All-time total record counts and dids estimate per collection 81 + /// - key: "ever_counts" || nullstr (nsid) 82 + /// - val: u64 || HLL (count (not cursor), estimator) 83 + /// 84 + /// - TODO: sorted indexes for all-times? 85 + /// 86 + /// 87 + /// Partition: 'queues' 88 + /// 89 + /// - Delete account queue 90 + /// - key: "delete_acount" || u64 (js_cursor) 91 + /// - val: nullstr (did) 92 + /// 93 + /// 94 + /// TODO: moderation actions 95 + /// TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy fetching + caching on read. 96 + #[derive(Debug)] 97 + pub struct MemStorage {} 98 + 99 + #[derive(Debug, Default)] 100 + pub struct MemConfig { 101 + /// drop the db when the storage is dropped 102 + /// 103 + /// this is only meant for tests 104 + #[cfg(test)] 105 + pub temp: bool, 106 + } 107 + 108 + //////////// 109 + //////////// 110 + //////////// 111 + //////////// 112 + //////////// 113 + //////////// 114 + 115 + struct BatchSentinel {} 116 + 117 + #[derive(Clone)] 118 + struct MemKeyspace { 119 + keyspace_guard: Arc<RwLock<BatchSentinel>>, 120 + } 121 + 122 + impl MemKeyspace { 123 + pub fn open() -> Self { 124 + Self { 125 + keyspace_guard: Arc::new(RwLock::new(BatchSentinel {})), 126 + } 127 + } 128 + pub fn open_partition(&self, _name: &str) -> StorageResult<MemPartion> { 129 + Ok(MemPartion { 130 + // name: name.to_string(), 131 + keyspace_guard: self.keyspace_guard.clone(), 132 + contents: Default::default(), 133 + }) 134 + } 135 + pub fn batch(&self) -> MemBatch { 136 + MemBatch { 137 + keyspace_guard: self.keyspace_guard.clone(), 138 + tasks: Vec::new(), 139 + } 140 + } 141 + pub fn instant(&self) -> u64 { 142 + 1 143 + } 144 + } 145 + 146 + enum BatchTask { 147 + Insert { 148 + p: MemPartion, 149 + key: Vec<u8>, 150 + val: Vec<u8>, 151 + }, 152 + Remove { 153 + p: MemPartion, 154 + key: Vec<u8>, 155 + }, 156 + } 157 + struct MemBatch { 158 + keyspace_guard: Arc<RwLock<BatchSentinel>>, 159 + tasks: Vec<BatchTask>, 160 + } 161 + impl MemBatch { 162 + pub fn insert(&mut self, p: &MemPartion, key: &[u8], val: &[u8]) { 163 + self.tasks.push(BatchTask::Insert { 164 + p: p.clone(), 165 + key: key.to_vec(), 166 + val: val.to_vec(), 167 + }); 168 + } 169 + pub fn remove(&mut self, p: &MemPartion, key: &[u8]) { 170 + self.tasks.push(BatchTask::Remove { 171 + p: p.clone(), 172 + key: key.to_vec(), 173 + }); 174 + } 175 + pub fn len(&self) -> usize { 176 + self.tasks.len() 177 + } 178 + pub fn commit(&mut self) -> StorageResult<()> { 179 + let _guard = self.keyspace_guard.write().unwrap(); 180 + for task in &mut self.tasks { 181 + match task { 182 + BatchTask::Insert { p, key, val } => p 183 + .contents 184 + .try_lock() 185 + .unwrap() 186 + .insert(key.to_vec(), val.to_vec()), 187 + BatchTask::Remove { p, key } => p.contents.try_lock().unwrap().remove(key), 188 + }; 189 + } 190 + Ok(()) 191 + } 192 + } 193 + 194 + #[derive(Clone)] 195 + struct MemPartion { 196 + // name: String, 197 + keyspace_guard: Arc<RwLock<BatchSentinel>>, 198 + contents: Arc<Mutex<BTreeMap<Vec<u8>, Vec<u8>>>>, 199 + } 200 + impl MemPartion { 201 + pub fn get(&self, key: &[u8]) -> StorageResult<Option<Vec<u8>>> { 202 + let _guard = self.keyspace_guard.read().unwrap(); 203 + Ok(self.contents.lock().unwrap().get(key).cloned()) 204 + } 205 + pub fn prefix(&self, pre: &[u8]) -> Vec<StorageResult<(Vec<u8>, Vec<u8>)>> { 206 + // let prefix_bytes = prefix.to_db_bytes()?; 207 + let (_, Bound::Excluded(range_end)) = prefix_to_range(pre) else { 208 + panic!("bad range thing"); 209 + }; 210 + 211 + return self.range(pre.to_vec()..range_end.to_vec()); 212 + } 213 + pub fn range(&self, r: std::ops::Range<Vec<u8>>) -> Vec<StorageResult<(Vec<u8>, Vec<u8>)>> { 214 + let _guard = self.keyspace_guard.read().unwrap(); 215 + self.contents 216 + .lock() 217 + .unwrap() 218 + .range(r) 219 + .map(|(k, v)| Ok((k.clone(), v.clone()))) 220 + .collect() 221 + } 222 + pub fn insert(&self, key: &[u8], val: &[u8]) -> StorageResult<()> { 223 + let _guard = self.keyspace_guard.read().unwrap(); 224 + self.contents 225 + .lock() 226 + .unwrap() 227 + .insert(key.to_vec(), val.to_vec()); 228 + Ok(()) 229 + } 230 + // pub fn remove(&self, key: &[u8]) -> StorageResult<()> { 231 + // let _guard = self.keyspace_guard.read().unwrap(); 232 + // self.contents 233 + // .lock() 234 + // .unwrap() 235 + // .remove(key); 236 + // Ok(()) 237 + // } 238 + pub fn snapshot_at(&self, _instant: u64) -> Self { 239 + self.clone() 240 + } 241 + pub fn snapshot(&self) -> Self { 242 + self.clone() 243 + } 244 + } 245 + 246 + //////////// 247 + //////////// 248 + //////////// 249 + //////////// 250 + //////////// 251 + //////////// 252 + 253 + impl StorageWhatever<MemReader, MemWriter, MemConfig> for MemStorage { 254 + fn init( 255 + _path: impl AsRef<Path>, 256 + endpoint: String, 257 + force_endpoint: bool, 258 + _config: MemConfig, 259 + ) -> StorageResult<(MemReader, MemWriter, Option<Cursor>)> { 260 + let keyspace = MemKeyspace::open(); 261 + 262 + let global = keyspace.open_partition("global")?; 263 + let feeds = keyspace.open_partition("feeds")?; 264 + let records = keyspace.open_partition("records")?; 265 + let rollups = keyspace.open_partition("rollups")?; 266 + let queues = keyspace.open_partition("queues")?; 267 + 268 + let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?; 269 + 270 + if js_cursor.is_some() { 271 + let stored_endpoint = 272 + get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?; 273 + 274 + let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError( 275 + "found cursor but missing js_endpoint, refusing to start.".to_string(), 276 + ))?; 277 + 278 + if stored != endpoint { 279 + if force_endpoint { 280 + log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}"); 281 + insert_static_neu::<JetstreamEndpointKey>( 282 + &global, 283 + JetstreamEndpointValue(endpoint.to_string()), 284 + )?; 285 + } else { 286 + return Err(StorageError::InitError(format!( 287 + "stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start."))); 288 + } 289 + } 290 + } else { 291 + insert_static_neu::<JetstreamEndpointKey>( 292 + &global, 293 + JetstreamEndpointValue(endpoint.to_string()), 294 + )?; 295 + insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?; 296 + insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?; 297 + } 298 + 299 + let reader = MemReader { 300 + keyspace: keyspace.clone(), 301 + global: global.clone(), 302 + feeds: feeds.clone(), 303 + records: records.clone(), 304 + rollups: rollups.clone(), 305 + }; 306 + let writer = MemWriter { 307 + keyspace, 308 + global, 309 + feeds, 310 + records, 311 + rollups, 312 + queues, 313 + }; 314 + Ok((reader, writer, js_cursor)) 315 + } 316 + } 317 + 318 + type MemRKV = StorageResult<(Vec<u8>, Vec<u8>)>; 319 + 320 + #[derive(Clone)] 321 + pub struct MemReader { 322 + keyspace: MemKeyspace, 323 + global: MemPartion, 324 + feeds: MemPartion, 325 + records: MemPartion, 326 + rollups: MemPartion, 327 + } 328 + 329 + /// An iterator that knows how to skip over deleted/invalidated records 330 + struct RecordIterator { 331 + db_iter: Box<dyn Iterator<Item = MemRKV>>, 332 + records: MemPartion, 333 + limit: usize, 334 + fetched: usize, 335 + } 336 + impl RecordIterator { 337 + pub fn new( 338 + feeds: &MemPartion, 339 + records: MemPartion, 340 + collection: &Nsid, 341 + limit: usize, 342 + ) -> StorageResult<Self> { 343 + let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?; 344 + let db_iter = feeds.prefix(&prefix).into_iter().rev(); 345 + Ok(Self { 346 + db_iter: Box::new(db_iter), 347 + records, 348 + limit, 349 + fetched: 0, 350 + }) 351 + } 352 + fn get_record(&self, db_next: MemRKV) -> StorageResult<Option<UFOsRecord>> { 353 + let (key_bytes, val_bytes) = db_next?; 354 + let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?; 355 + let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?; 356 + let location_key: RecordLocationKey = (&feed_key, &feed_val).into(); 357 + 358 + let Some(location_val_bytes) = self.records.get(&location_key.to_db_bytes()?)? else { 359 + // record was deleted (hopefully) 360 + return Ok(None); 361 + }; 362 + 363 + let (meta, n) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?; 364 + 365 + if meta.cursor() != feed_key.cursor() { 366 + // older/different version 367 + return Ok(None); 368 + } 369 + if meta.rev != feed_val.rev() { 370 + // weird... 371 + log::warn!("record lookup: cursor match but rev did not...? excluding."); 372 + return Ok(None); 373 + } 374 + let Some(raw_value_bytes) = location_val_bytes.get(n..) else { 375 + log::warn!( 376 + "record lookup: found record but could not get bytes to decode the record??" 377 + ); 378 + return Ok(None); 379 + }; 380 + let rawval = db_complete::<RecordRawValue>(raw_value_bytes)?; 381 + Ok(Some(UFOsRecord { 382 + collection: feed_key.collection().clone(), 383 + cursor: feed_key.cursor(), 384 + did: feed_val.did().clone(), 385 + rkey: feed_val.rkey().clone(), 386 + rev: meta.rev.to_string(), 387 + record: rawval.try_into()?, 388 + is_update: meta.is_update, 389 + })) 390 + } 391 + } 392 + impl Iterator for RecordIterator { 393 + type Item = StorageResult<Option<UFOsRecord>>; 394 + fn next(&mut self) -> Option<Self::Item> { 395 + if self.fetched == self.limit { 396 + return Some(Ok(None)); 397 + } 398 + let record = loop { 399 + let db_next = self.db_iter.next()?; // None short-circuits here 400 + match self.get_record(db_next) { 401 + Err(e) => return Some(Err(e)), 402 + Ok(Some(record)) => break record, 403 + Ok(None) => continue, 404 + } 405 + }; 406 + self.fetched += 1; 407 + Some(Ok(Some(record))) 408 + } 409 + } 410 + 411 + impl MemReader { 412 + fn get_storage_stats(&self) -> StorageResult<serde_json::Value> { 413 + let rollup_cursor = 414 + get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)? 415 + .map(|c| c.to_raw_u64()); 416 + 417 + Ok(serde_json::json!({ 418 + "rollup_cursor": rollup_cursor, 419 + })) 420 + } 421 + 422 + fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> { 423 + let global = self.global.snapshot(); 424 + 425 + let endpoint = 426 + get_snapshot_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)? 427 + .ok_or(StorageError::BadStateError( 428 + "Could not find jetstream endpoint".to_string(), 429 + ))? 430 + .0; 431 + 432 + let started_at = get_snapshot_static_neu::<TakeoffKey, TakeoffValue>(&global)? 433 + .ok_or(StorageError::BadStateError( 434 + "Could not find jetstream takeoff time".to_string(), 435 + ))? 436 + .to_raw_u64(); 437 + 438 + let latest_cursor = 439 + get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)? 440 + .map(|c| c.to_raw_u64()); 441 + 442 + Ok(ConsumerInfo::Jetstream { 443 + endpoint, 444 + started_at, 445 + latest_cursor, 446 + }) 447 + } 448 + 449 + fn get_top_collections(&self) -> Result<TopCollections, StorageError> { 450 + // TODO: limit nsid traversal depth 451 + // TODO: limit nsid traversal breadth 452 + // TODO: be serious about anything 453 + 454 + // TODO: probably use a stack of segments to reduce to ~log-n merges 455 + 456 + #[derive(Default)] 457 + struct Blah { 458 + counts: CountsValue, 459 + children: HashMap<String, Blah>, 460 + } 461 + impl From<&Blah> for TopCollections { 462 + fn from(bla: &Blah) -> Self { 463 + Self { 464 + total_records: bla.counts.records(), 465 + dids_estimate: bla.counts.dids().estimate() as u64, 466 + nsid_child_segments: HashMap::from_iter( 467 + bla.children.iter().map(|(k, v)| (k.to_string(), v.into())), 468 + ), 469 + } 470 + } 471 + } 472 + 473 + let mut b = Blah::default(); 474 + let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?; 475 + for kv in self.rollups.prefix(&prefix.to_db_bytes()?) { 476 + let (key_bytes, val_bytes) = kv?; 477 + let key = db_complete::<AllTimeRollupKey>(&key_bytes)?; 478 + let val = db_complete::<CountsValue>(&val_bytes)?; 479 + 480 + let mut node = &mut b; 481 + node.counts.merge(&val); 482 + for segment in key.collection().split('.') { 483 + node = node.children.entry(segment.to_string()).or_default(); 484 + node.counts.merge(&val); 485 + } 486 + } 487 + 488 + Ok((&b).into()) 489 + } 490 + 491 + fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> { 492 + // 0. grab a snapshot in case rollups happen while we're working 493 + let instant = self.keyspace.instant(); 494 + let global = self.global.snapshot_at(instant); 495 + let rollups = self.rollups.snapshot_at(instant); 496 + 497 + // 1. all-time counts 498 + let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?; 499 + let mut total_counts = rollups 500 + .get(&all_time_key)? 501 + .as_deref() 502 + .map(db_complete::<CountsValue>) 503 + .transpose()? 504 + .unwrap_or_default(); 505 + 506 + // 2. live counts that haven't been rolled into all-time yet. 507 + let rollup_cursor = 508 + get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or( 509 + StorageError::BadStateError("Could not find current rollup cursor".to_string()), 510 + )?; 511 + 512 + let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?; 513 + for kv in rollups.range(full_range) { 514 + let (key_bytes, val_bytes) = kv?; 515 + let key = db_complete::<LiveCountsKey>(&key_bytes)?; 516 + if key.collection() == collection { 517 + let counts = db_complete::<CountsValue>(&val_bytes)?; 518 + total_counts.merge(&counts); 519 + } 520 + } 521 + Ok(( 522 + total_counts.records(), 523 + total_counts.dids().estimate() as u64, 524 + )) 525 + } 526 + 527 + fn get_records_by_collections( 528 + &self, 529 + collections: &[Nsid], 530 + limit: usize, 531 + _expand_each_collection: bool, 532 + ) -> StorageResult<Vec<UFOsRecord>> { 533 + if collections.is_empty() { 534 + return Ok(vec![]); 535 + } 536 + let mut record_iterators = Vec::new(); 537 + for collection in collections { 538 + let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?; 539 + record_iterators.push(iter.peekable()); 540 + } 541 + let mut merged = Vec::new(); 542 + loop { 543 + let mut latest: Option<(Cursor, usize)> = None; // ugh 544 + for (i, iter) in record_iterators.iter_mut().enumerate() { 545 + let Some(it) = iter.peek_mut() else { 546 + continue; 547 + }; 548 + let it = match it { 549 + Ok(v) => v, 550 + Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?, 551 + }; 552 + let Some(rec) = it else { 553 + break; 554 + }; 555 + if let Some((cursor, _)) = latest { 556 + if rec.cursor > cursor { 557 + latest = Some((rec.cursor, i)) 558 + } 559 + } else { 560 + latest = Some((rec.cursor, i)); 561 + } 562 + } 563 + let Some((_, idx)) = latest else { 564 + break; 565 + }; 566 + // yeah yeah whateverrrrrrrrrrrrrrrr 567 + merged.push(record_iterators[idx].next().unwrap().unwrap().unwrap()); 568 + } 569 + Ok(merged) 570 + } 571 + } 572 + 573 + #[async_trait] 574 + impl StoreReader for MemReader { 575 + async fn get_storage_stats(&self) -> StorageResult<serde_json::Value> { 576 + let s = self.clone(); 577 + tokio::task::spawn_blocking(move || MemReader::get_storage_stats(&s)).await? 578 + } 579 + async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> { 580 + let s = self.clone(); 581 + tokio::task::spawn_blocking(move || MemReader::get_consumer_info(&s)).await? 582 + } 583 + async fn get_top_collections(&self) -> Result<TopCollections, StorageError> { 584 + let s = self.clone(); 585 + tokio::task::spawn_blocking(move || MemReader::get_top_collections(&s)).await? 586 + } 587 + async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> { 588 + let s = self.clone(); 589 + let collection = collection.clone(); 590 + tokio::task::spawn_blocking(move || MemReader::get_counts_by_collection(&s, &collection)) 591 + .await? 592 + } 593 + async fn get_records_by_collections( 594 + &self, 595 + collections: &[Nsid], 596 + limit: usize, 597 + expand_each_collection: bool, 598 + ) -> StorageResult<Vec<UFOsRecord>> { 599 + let s = self.clone(); 600 + let collections = collections.to_vec(); 601 + tokio::task::spawn_blocking(move || { 602 + MemReader::get_records_by_collections(&s, &collections, limit, expand_each_collection) 603 + }) 604 + .await? 605 + } 606 + } 607 + 608 + pub struct MemWriter { 609 + keyspace: MemKeyspace, 610 + global: MemPartion, 611 + feeds: MemPartion, 612 + records: MemPartion, 613 + rollups: MemPartion, 614 + queues: MemPartion, 615 + } 616 + 617 + impl MemWriter { 618 + fn rollup_delete_account( 619 + &mut self, 620 + cursor: Cursor, 621 + key_bytes: &[u8], 622 + val_bytes: &[u8], 623 + ) -> StorageResult<usize> { 624 + let did = db_complete::<DeleteAccountQueueVal>(val_bytes)?; 625 + self.delete_account(&did)?; 626 + let mut batch = self.keyspace.batch(); 627 + batch.remove(&self.queues, key_bytes); 628 + insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, cursor)?; 629 + batch.commit()?; 630 + Ok(1) 631 + } 632 + 633 + fn rollup_live_counts( 634 + &mut self, 635 + timelies: impl Iterator<Item = Result<(Vec<u8>, Vec<u8>), StorageError>>, 636 + cursor_exclusive_limit: Option<Cursor>, 637 + rollup_limit: usize, 638 + ) -> StorageResult<usize> { 639 + // current strategy is to buffer counts in mem before writing the rollups 640 + // we *could* read+write every single batch to rollup.. but their merge is associative so 641 + // ...so save the db some work up front? is this worth it? who knows... 642 + 643 + log::warn!("sup!!!"); 644 + 645 + #[derive(Eq, Hash, PartialEq)] 646 + enum Rollup { 647 + Hourly(HourTruncatedCursor), 648 + Weekly(WeekTruncatedCursor), 649 + AllTime, 650 + } 651 + 652 + let mut batch = self.keyspace.batch(); 653 + let mut cursors_advanced = 0; 654 + let mut last_cursor = Cursor::from_start(); 655 + let mut counts_by_rollup: HashMap<(Nsid, Rollup), CountsValue> = HashMap::new(); 656 + 657 + log::warn!("about to loop...."); 658 + for (i, kv) in timelies.enumerate() { 659 + log::warn!("loop {i} {kv:?}..."); 660 + if i >= rollup_limit { 661 + break; 662 + } 663 + 664 + let (key_bytes, val_bytes) = kv?; 665 + let key = db_complete::<LiveCountsKey>(&key_bytes) 666 + .inspect_err(|e| log::warn!("rlc: key: {e:?}"))?; 667 + 668 + if cursor_exclusive_limit 669 + .map(|limit| key.cursor() > limit) 670 + .unwrap_or(false) 671 + { 672 + break; 673 + } 674 + 675 + batch.remove(&self.rollups, &key_bytes); 676 + let val = db_complete::<CountsValue>(&val_bytes) 677 + .inspect_err(|e| log::warn!("rlc: val: {e:?}"))?; 678 + counts_by_rollup 679 + .entry(( 680 + key.collection().clone(), 681 + Rollup::Hourly(key.cursor().into()), 682 + )) 683 + .or_default() 684 + .merge(&val); 685 + counts_by_rollup 686 + .entry(( 687 + key.collection().clone(), 688 + Rollup::Weekly(key.cursor().into()), 689 + )) 690 + .or_default() 691 + .merge(&val); 692 + counts_by_rollup 693 + .entry((key.collection().clone(), Rollup::AllTime)) 694 + .or_default() 695 + .merge(&val); 696 + 697 + cursors_advanced += 1; 698 + last_cursor = key.cursor(); 699 + } 700 + log::warn!("done looping. looping cbr counts(?).."); 701 + 702 + for ((nsid, rollup), counts) in counts_by_rollup { 703 + log::warn!( 704 + "######################## cbr loop {nsid:?} {counts:?} ########################" 705 + ); 706 + let key_bytes = match rollup { 707 + Rollup::Hourly(hourly_cursor) => { 708 + let k = HourlyRollupKey::new(hourly_cursor, &nsid); 709 + log::info!("hrly k: {k:?}"); 710 + k.to_db_bytes()? 711 + } 712 + Rollup::Weekly(weekly_cursor) => { 713 + let k = WeeklyRollupKey::new(weekly_cursor, &nsid); 714 + log::info!("weekly k: {k:?}"); 715 + k.to_db_bytes()? 716 + } 717 + Rollup::AllTime => { 718 + let k = AllTimeRollupKey::new(&nsid); 719 + log::info!("alltime k: {k:?}"); 720 + k.to_db_bytes()? 721 + } 722 + }; 723 + // log::info!("key bytes: {key_bytes:?}"); 724 + let mut rolled: CountsValue = self 725 + .rollups 726 + .get(&key_bytes)? 727 + .inspect(|v| { 728 + let lax = CountsValue::from_db_bytes(v); 729 + log::info!( 730 + "val: len={}, lax={lax:?} first32={:?}", 731 + v.len(), 732 + v.get(..32) 733 + ); 734 + }) 735 + .as_deref() 736 + .map(db_complete::<CountsValue>) 737 + .transpose() 738 + .inspect_err(|e| log::warn!("oooh did we break on the rolled thing? {e:?}"))? 739 + .unwrap_or_default(); 740 + 741 + // try to round-trip before inserting, for funsies 742 + let tripppin = counts.to_db_bytes()?; 743 + let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?; 744 + assert_eq!(n, tripppin.len()); 745 + assert_eq!(counts.prefix, and_back.prefix); 746 + assert_eq!(counts.dids().estimate(), and_back.dids().estimate()); 747 + if counts.records() > 20000000 { 748 + panic!("COUNTS maybe wtf? {counts:?}") 749 + } 750 + // assert_eq!(rolled, and_back); 751 + 752 + rolled.merge(&counts); 753 + 754 + // try to round-trip before inserting, for funsies 755 + let tripppin = rolled.to_db_bytes()?; 756 + let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?; 757 + assert_eq!(n, tripppin.len()); 758 + assert_eq!(rolled.prefix, and_back.prefix); 759 + assert_eq!(rolled.dids().estimate(), and_back.dids().estimate()); 760 + if rolled.records() > 20000000 { 761 + panic!("maybe wtf? {rolled:?}") 762 + } 763 + // assert_eq!(rolled, and_back); 764 + 765 + batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?); 766 + } 767 + 768 + log::warn!("done cbr loop."); 769 + 770 + insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor) 771 + .inspect_err(|e| log::warn!("insert neu: {e:?}"))?; 772 + 773 + batch.commit()?; 774 + 775 + log::warn!("ok finished rlc stuff. huh."); 776 + Ok(cursors_advanced) 777 + } 778 + } 779 + 780 + impl StoreWriter for MemWriter { 781 + fn insert_batch<const LIMIT: usize>( 782 + &mut self, 783 + event_batch: EventBatch<LIMIT>, 784 + ) -> StorageResult<()> { 785 + if event_batch.is_empty() { 786 + return Ok(()); 787 + } 788 + 789 + let mut batch = self.keyspace.batch(); 790 + 791 + // would be nice not to have to iterate everything at once here 792 + let latest = event_batch.latest_cursor().unwrap(); 793 + 794 + for (nsid, commits) in event_batch.commits_by_nsid { 795 + for commit in commits.commits { 796 + let location_key: RecordLocationKey = (&commit, &nsid).into(); 797 + 798 + match commit.action { 799 + CommitAction::Cut => { 800 + batch.remove(&self.records, &location_key.to_db_bytes()?); 801 + } 802 + CommitAction::Put(put_action) => { 803 + let feed_key = NsidRecordFeedKey::from_pair(nsid.clone(), commit.cursor); 804 + let feed_val: NsidRecordFeedVal = 805 + (&commit.did, &commit.rkey, commit.rev.as_str()).into(); 806 + batch.insert( 807 + &self.feeds, 808 + &feed_key.to_db_bytes()?, 809 + &feed_val.to_db_bytes()?, 810 + ); 811 + 812 + let location_val: RecordLocationVal = 813 + (commit.cursor, commit.rev.as_str(), put_action).into(); 814 + batch.insert( 815 + &self.records, 816 + &location_key.to_db_bytes()?, 817 + &location_val.to_db_bytes()?, 818 + ); 819 + } 820 + } 821 + } 822 + let live_counts_key: LiveCountsKey = (latest, &nsid).into(); 823 + let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate); 824 + batch.insert( 825 + &self.rollups, 826 + &live_counts_key.to_db_bytes()?, 827 + &counts_value.to_db_bytes()?, 828 + ); 829 + } 830 + 831 + for remove in event_batch.account_removes { 832 + let queue_key = DeleteAccountQueueKey::new(remove.cursor); 833 + let queue_val: DeleteAccountQueueVal = remove.did; 834 + batch.insert( 835 + &self.queues, 836 + &queue_key.to_db_bytes()?, 837 + &queue_val.to_db_bytes()?, 838 + ); 839 + } 840 + 841 + batch.insert( 842 + &self.global, 843 + &DbStaticStr::<JetstreamCursorKey>::default().to_db_bytes()?, 844 + &latest.to_db_bytes()?, 845 + ); 846 + 847 + batch.commit()?; 848 + Ok(()) 849 + } 850 + 851 + fn step_rollup(&mut self) -> StorageResult<usize> { 852 + let rollup_cursor = 853 + get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)? 854 + .ok_or(StorageError::BadStateError( 855 + "Could not find current rollup cursor".to_string(), 856 + )) 857 + .inspect_err(|e| log::warn!("failed getting rollup cursor: {e:?}"))?; 858 + 859 + // timelies 860 + let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor) 861 + .inspect_err(|e| log::warn!("live counts range: {e:?}"))?; 862 + let mut timely_iter = self.rollups.range(live_counts_range).into_iter().peekable(); 863 + 864 + let timely_next_cursor = timely_iter 865 + .peek_mut() 866 + .map(|kv| -> StorageResult<Cursor> { 867 + match kv { 868 + Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?, 869 + Ok((key_bytes, _)) => { 870 + let key = db_complete::<LiveCountsKey>(key_bytes).inspect_err(|e| { 871 + log::warn!("failed getting key for next timely: {e:?}") 872 + })?; 873 + Ok(key.cursor()) 874 + } 875 + } 876 + }) 877 + .transpose() 878 + .inspect_err(|e| log::warn!("something about timely: {e:?}"))?; 879 + 880 + // delete accounts 881 + let delete_accounts_range = 882 + DeleteAccountQueueKey::new(rollup_cursor).range_to_prefix_end()?; 883 + 884 + let next_delete = self 885 + .queues 886 + .range(delete_accounts_range) 887 + .into_iter() 888 + .next() 889 + .transpose() 890 + .inspect_err(|e| log::warn!("range for next delete: {e:?}"))? 891 + .map(|(key_bytes, val_bytes)| { 892 + db_complete::<DeleteAccountQueueKey>(&key_bytes) 893 + .inspect_err(|e| log::warn!("failed inside next delete thing????: {e:?}")) 894 + .map(|k| (k.suffix, key_bytes, val_bytes)) 895 + }) 896 + .transpose() 897 + .inspect_err(|e| log::warn!("failed getting next delete: {e:?}"))?; 898 + 899 + let cursors_stepped = match (timely_next_cursor, next_delete) { 900 + ( 901 + Some(timely_next_cursor), 902 + Some((delete_cursor, delete_key_bytes, delete_val_bytes)), 903 + ) => { 904 + if timely_next_cursor < delete_cursor { 905 + self.rollup_live_counts( 906 + timely_iter, 907 + Some(delete_cursor), 908 + MAX_BATCHED_ROLLUP_COUNTS, 909 + ) 910 + .inspect_err(|e| log::warn!("rolling up live counts: {e:?}"))? 911 + } else { 912 + self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes) 913 + .inspect_err(|e| log::warn!("deleting acocunt: {e:?}"))? 914 + } 915 + } 916 + (Some(_), None) => self 917 + .rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS) 918 + .inspect_err(|e| log::warn!("rolling up (lasjdflkajs): {e:?}"))?, 919 + (None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => self 920 + .rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes) 921 + .inspect_err(|e| log::warn!("deleting acocunt other branch: {e:?}"))?, 922 + (None, None) => 0, 923 + }; 924 + 925 + Ok(cursors_stepped) 926 + } 927 + 928 + fn trim_collection( 929 + &mut self, 930 + collection: &Nsid, 931 + limit: usize, 932 + // TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end) 933 + ) -> StorageResult<()> { 934 + let mut dangling_feed_keys_cleaned = 0; 935 + let mut records_deleted = 0; 936 + 937 + let mut batch = self.keyspace.batch(); 938 + 939 + let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?; 940 + let mut found = 0; 941 + for kv in self.feeds.prefix(&prefix).into_iter().rev() { 942 + let (key_bytes, val_bytes) = kv?; 943 + let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?; 944 + let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?; 945 + let location_key: RecordLocationKey = (&feed_key, &feed_val).into(); 946 + let location_key_bytes = location_key.to_db_bytes()?; 947 + 948 + let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else { 949 + // record was deleted (hopefully) 950 + batch.remove(&self.feeds, &location_key_bytes); 951 + dangling_feed_keys_cleaned += 1; 952 + continue; 953 + }; 954 + 955 + let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?; 956 + 957 + if meta.cursor() != feed_key.cursor() { 958 + // older/different version 959 + batch.remove(&self.feeds, &location_key_bytes); 960 + dangling_feed_keys_cleaned += 1; 961 + continue; 962 + } 963 + if meta.rev != feed_val.rev() { 964 + // weird... 965 + log::warn!("record lookup: cursor match but rev did not...? removing."); 966 + batch.remove(&self.feeds, &location_key_bytes); 967 + dangling_feed_keys_cleaned += 1; 968 + continue; 969 + } 970 + 971 + if batch.len() >= MAX_BATCHED_CLEANUP_SIZE { 972 + batch.commit()?; 973 + batch = self.keyspace.batch(); 974 + } 975 + 976 + found += 1; 977 + if found <= limit { 978 + continue; 979 + } 980 + 981 + batch.remove(&self.feeds, &location_key_bytes); 982 + batch.remove(&self.records, &location_key_bytes); 983 + records_deleted += 1; 984 + } 985 + 986 + batch.commit()?; 987 + 988 + log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records"); 989 + Ok(()) 990 + } 991 + 992 + fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> { 993 + let mut records_deleted = 0; 994 + let mut batch = self.keyspace.batch(); 995 + let prefix = RecordLocationKey::from_prefix_to_db_bytes(did)?; 996 + for kv in self.records.prefix(&prefix) { 997 + let (key_bytes, _) = kv?; 998 + batch.remove(&self.records, &key_bytes); 999 + records_deleted += 1; 1000 + if batch.len() >= MAX_BATCHED_ACCOUNT_DELETE_RECORDS { 1001 + batch.commit()?; 1002 + batch = self.keyspace.batch(); 1003 + } 1004 + } 1005 + batch.commit()?; 1006 + Ok(records_deleted) 1007 + } 1008 + } 1009 + 1010 + /// Get a value from a fixed key 1011 + fn get_static_neu<K: StaticStr, V: DbBytes>(global: &MemPartion) -> StorageResult<Option<V>> { 1012 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 1013 + let value = global 1014 + .get(&key_bytes)? 1015 + .map(|value_bytes| db_complete(&value_bytes)) 1016 + .transpose()?; 1017 + Ok(value) 1018 + } 1019 + 1020 + /// Get a value from a fixed key 1021 + fn get_snapshot_static_neu<K: StaticStr, V: DbBytes>( 1022 + global: &MemPartion, 1023 + ) -> StorageResult<Option<V>> { 1024 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 1025 + let value = global 1026 + .get(&key_bytes)? 1027 + .map(|value_bytes| db_complete(&value_bytes)) 1028 + .transpose()?; 1029 + Ok(value) 1030 + } 1031 + 1032 + /// Set a value to a fixed key 1033 + fn insert_static_neu<K: StaticStr>(global: &MemPartion, value: impl DbBytes) -> StorageResult<()> { 1034 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 1035 + let value_bytes = value.to_db_bytes()?; 1036 + global.insert(&key_bytes, &value_bytes)?; 1037 + Ok(()) 1038 + } 1039 + 1040 + /// Set a value to a fixed key 1041 + fn insert_batch_static_neu<K: StaticStr>( 1042 + batch: &mut MemBatch, 1043 + global: &MemPartion, 1044 + value: impl DbBytes, 1045 + ) -> StorageResult<()> { 1046 + let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 1047 + let value_bytes = value.to_db_bytes()?; 1048 + batch.insert(global, &key_bytes, &value_bytes); 1049 + Ok(()) 1050 + } 1051 + 1052 + #[derive(Debug, serde::Serialize, schemars::JsonSchema)] 1053 + pub struct StorageInfo { 1054 + pub keyspace_disk_space: u64, 1055 + pub keyspace_journal_count: usize, 1056 + pub keyspace_sequence: u64, 1057 + pub global_approximate_len: usize, 1058 + } 1059 + 1060 + #[cfg(test)] 1061 + mod tests { 1062 + use super::*; 1063 + use crate::{DeleteAccount, RecordKey, UFOsCommit}; 1064 + use jetstream::events::{CommitEvent, CommitOp}; 1065 + use jetstream::exports::Cid; 1066 + use serde_json::value::RawValue; 1067 + 1068 + fn fjall_db() -> (MemReader, MemWriter) { 1069 + let (read, write, _) = MemStorage::init( 1070 + tempfile::tempdir().unwrap(), 1071 + "offline test (no real jetstream endpoint)".to_string(), 1072 + false, 1073 + MemConfig { temp: true }, 1074 + ) 1075 + .unwrap(); 1076 + (read, write) 1077 + } 1078 + 1079 + const TEST_BATCH_LIMIT: usize = 16; 1080 + 1081 + #[derive(Debug, Default)] 1082 + struct TestBatch { 1083 + pub batch: EventBatch<TEST_BATCH_LIMIT>, 1084 + } 1085 + 1086 + impl TestBatch { 1087 + #[allow(clippy::too_many_arguments)] 1088 + pub fn create( 1089 + &mut self, 1090 + did: &str, 1091 + collection: &str, 1092 + rkey: &str, 1093 + record: &str, 1094 + rev: Option<&str>, 1095 + cid: Option<Cid>, 1096 + cursor: u64, 1097 + ) -> Nsid { 1098 + let did = Did::new(did.to_string()).unwrap(); 1099 + let collection = Nsid::new(collection.to_string()).unwrap(); 1100 + let record = RawValue::from_string(record.to_string()).unwrap(); 1101 + let cid = cid.unwrap_or( 1102 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy" 1103 + .parse() 1104 + .unwrap(), 1105 + ); 1106 + 1107 + let event = CommitEvent { 1108 + collection, 1109 + rkey: RecordKey::new(rkey.to_string()).unwrap(), 1110 + rev: rev.unwrap_or("asdf").to_string(), 1111 + operation: CommitOp::Create, 1112 + record: Some(record), 1113 + cid: Some(cid), 1114 + }; 1115 + 1116 + let (commit, collection) = 1117 + UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor)) 1118 + .unwrap(); 1119 + 1120 + self.batch 1121 + .commits_by_nsid 1122 + .entry(collection.clone()) 1123 + .or_default() 1124 + .truncating_insert(commit) 1125 + .unwrap(); 1126 + 1127 + collection 1128 + } 1129 + #[allow(clippy::too_many_arguments)] 1130 + pub fn update( 1131 + &mut self, 1132 + did: &str, 1133 + collection: &str, 1134 + rkey: &str, 1135 + record: &str, 1136 + rev: Option<&str>, 1137 + cid: Option<Cid>, 1138 + cursor: u64, 1139 + ) -> Nsid { 1140 + let did = Did::new(did.to_string()).unwrap(); 1141 + let collection = Nsid::new(collection.to_string()).unwrap(); 1142 + let record = RawValue::from_string(record.to_string()).unwrap(); 1143 + let cid = cid.unwrap_or( 1144 + "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy" 1145 + .parse() 1146 + .unwrap(), 1147 + ); 1148 + 1149 + let event = CommitEvent { 1150 + collection, 1151 + rkey: RecordKey::new(rkey.to_string()).unwrap(), 1152 + rev: rev.unwrap_or("asdf").to_string(), 1153 + operation: CommitOp::Update, 1154 + record: Some(record), 1155 + cid: Some(cid), 1156 + }; 1157 + 1158 + let (commit, collection) = 1159 + UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor)) 1160 + .unwrap(); 1161 + 1162 + self.batch 1163 + .commits_by_nsid 1164 + .entry(collection.clone()) 1165 + .or_default() 1166 + .truncating_insert(commit) 1167 + .unwrap(); 1168 + 1169 + collection 1170 + } 1171 + #[allow(clippy::too_many_arguments)] 1172 + pub fn delete( 1173 + &mut self, 1174 + did: &str, 1175 + collection: &str, 1176 + rkey: &str, 1177 + rev: Option<&str>, 1178 + cursor: u64, 1179 + ) -> Nsid { 1180 + let did = Did::new(did.to_string()).unwrap(); 1181 + let collection = Nsid::new(collection.to_string()).unwrap(); 1182 + let event = CommitEvent { 1183 + collection, 1184 + rkey: RecordKey::new(rkey.to_string()).unwrap(), 1185 + rev: rev.unwrap_or("asdf").to_string(), 1186 + operation: CommitOp::Delete, 1187 + record: None, 1188 + cid: None, 1189 + }; 1190 + 1191 + let (commit, collection) = 1192 + UFOsCommit::from_commit_info(event, did, Cursor::from_raw_u64(cursor)).unwrap(); 1193 + 1194 + self.batch 1195 + .commits_by_nsid 1196 + .entry(collection.clone()) 1197 + .or_default() 1198 + .truncating_insert(commit) 1199 + .unwrap(); 1200 + 1201 + collection 1202 + } 1203 + pub fn delete_account(&mut self, did: &str, cursor: u64) -> Did { 1204 + let did = Did::new(did.to_string()).unwrap(); 1205 + self.batch.account_removes.push(DeleteAccount { 1206 + did: did.clone(), 1207 + cursor: Cursor::from_raw_u64(cursor), 1208 + }); 1209 + did 1210 + } 1211 + } 1212 + 1213 + #[test] 1214 + fn test_hello() -> anyhow::Result<()> { 1215 + let (read, mut write) = fjall_db(); 1216 + write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?; 1217 + let (records, dids) = 1218 + read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?; 1219 + assert_eq!(records, 0); 1220 + assert_eq!(dids, 0); 1221 + Ok(()) 1222 + } 1223 + 1224 + #[test] 1225 + fn test_insert_one() -> anyhow::Result<()> { 1226 + let (read, mut write) = fjall_db(); 1227 + 1228 + let mut batch = TestBatch::default(); 1229 + let collection = batch.create( 1230 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1231 + "a.b.c", 1232 + "asdf", 1233 + "{}", 1234 + Some("rev-z"), 1235 + None, 1236 + 100, 1237 + ); 1238 + write.insert_batch(batch.batch)?; 1239 + 1240 + let (records, dids) = read.get_counts_by_collection(&collection)?; 1241 + assert_eq!(records, 1); 1242 + assert_eq!(dids, 1); 1243 + let (records, dids) = 1244 + read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?; 1245 + assert_eq!(records, 0); 1246 + assert_eq!(dids, 0); 1247 + 1248 + let records = read.get_records_by_collections(&[collection], 2, false)?; 1249 + assert_eq!(records.len(), 1); 1250 + let rec = &records[0]; 1251 + assert_eq!(rec.record.get(), "{}"); 1252 + assert!(!rec.is_update); 1253 + 1254 + let records = 1255 + read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?; 1256 + assert_eq!(records.len(), 0); 1257 + 1258 + Ok(()) 1259 + } 1260 + 1261 + #[test] 1262 + fn test_get_multi_collection() -> anyhow::Result<()> { 1263 + let (read, mut write) = fjall_db(); 1264 + 1265 + let mut batch = TestBatch::default(); 1266 + batch.create( 1267 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1268 + "a.a.a", 1269 + "aaa", 1270 + r#""earliest""#, 1271 + Some("rev-a"), 1272 + None, 1273 + 100, 1274 + ); 1275 + batch.create( 1276 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1277 + "a.a.b", 1278 + "aab", 1279 + r#""in between""#, 1280 + Some("rev-ab"), 1281 + None, 1282 + 101, 1283 + ); 1284 + batch.create( 1285 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1286 + "a.a.a", 1287 + "aaa-2", 1288 + r#""last""#, 1289 + Some("rev-a-2"), 1290 + None, 1291 + 102, 1292 + ); 1293 + write.insert_batch(batch.batch)?; 1294 + 1295 + let records = read.get_records_by_collections( 1296 + &[ 1297 + Nsid::new("a.a.a".to_string()).unwrap(), 1298 + Nsid::new("a.a.b".to_string()).unwrap(), 1299 + Nsid::new("a.a.c".to_string()).unwrap(), 1300 + ], 1301 + 100, 1302 + false, 1303 + )?; 1304 + assert_eq!(records.len(), 3); 1305 + assert_eq!(records[0].record.get(), r#""last""#); 1306 + assert_eq!( 1307 + records[0].collection, 1308 + Nsid::new("a.a.a".to_string()).unwrap() 1309 + ); 1310 + assert_eq!(records[1].record.get(), r#""in between""#); 1311 + assert_eq!( 1312 + records[1].collection, 1313 + Nsid::new("a.a.b".to_string()).unwrap() 1314 + ); 1315 + assert_eq!(records[2].record.get(), r#""earliest""#); 1316 + assert_eq!( 1317 + records[2].collection, 1318 + Nsid::new("a.a.a".to_string()).unwrap() 1319 + ); 1320 + 1321 + Ok(()) 1322 + } 1323 + 1324 + #[test] 1325 + fn test_update_one() -> anyhow::Result<()> { 1326 + let (read, mut write) = fjall_db(); 1327 + 1328 + let mut batch = TestBatch::default(); 1329 + let collection = batch.create( 1330 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1331 + "a.b.c", 1332 + "rkey-asdf", 1333 + "{}", 1334 + Some("rev-a"), 1335 + None, 1336 + 100, 1337 + ); 1338 + write.insert_batch(batch.batch)?; 1339 + 1340 + let mut batch = TestBatch::default(); 1341 + batch.update( 1342 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1343 + "a.b.c", 1344 + "rkey-asdf", 1345 + r#"{"ch": "ch-ch-ch-changes"}"#, 1346 + Some("rev-z"), 1347 + None, 1348 + 101, 1349 + ); 1350 + write.insert_batch(batch.batch)?; 1351 + 1352 + let (records, dids) = read.get_counts_by_collection(&collection)?; 1353 + assert_eq!(records, 1); 1354 + assert_eq!(dids, 1); 1355 + 1356 + let records = read.get_records_by_collections(&[collection], 2, false)?; 1357 + assert_eq!(records.len(), 1); 1358 + let rec = &records[0]; 1359 + assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#); 1360 + assert!(rec.is_update); 1361 + Ok(()) 1362 + } 1363 + 1364 + #[test] 1365 + fn test_delete_one() -> anyhow::Result<()> { 1366 + let (read, mut write) = fjall_db(); 1367 + 1368 + let mut batch = TestBatch::default(); 1369 + let collection = batch.create( 1370 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1371 + "a.b.c", 1372 + "rkey-asdf", 1373 + "{}", 1374 + Some("rev-a"), 1375 + None, 1376 + 100, 1377 + ); 1378 + write.insert_batch(batch.batch)?; 1379 + 1380 + let mut batch = TestBatch::default(); 1381 + batch.delete( 1382 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1383 + "a.b.c", 1384 + "rkey-asdf", 1385 + Some("rev-z"), 1386 + 101, 1387 + ); 1388 + write.insert_batch(batch.batch)?; 1389 + 1390 + let (records, dids) = read.get_counts_by_collection(&collection)?; 1391 + assert_eq!(records, 1); 1392 + assert_eq!(dids, 1); 1393 + 1394 + let records = read.get_records_by_collections(&[collection], 2, false)?; 1395 + assert_eq!(records.len(), 0); 1396 + 1397 + Ok(()) 1398 + } 1399 + 1400 + #[test] 1401 + fn test_collection_trim() -> anyhow::Result<()> { 1402 + let (read, mut write) = fjall_db(); 1403 + 1404 + let mut batch = TestBatch::default(); 1405 + batch.create( 1406 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1407 + "a.a.a", 1408 + "rkey-aaa", 1409 + "{}", 1410 + Some("rev-aaa"), 1411 + None, 1412 + 10_000, 1413 + ); 1414 + let mut last_b_cursor; 1415 + for i in 1..=10 { 1416 + last_b_cursor = 11_000 + i; 1417 + batch.create( 1418 + &format!("did:plc:inze6wrmsm7pjl7yta3oig7{}", i % 3), 1419 + "a.a.b", 1420 + &format!("rkey-bbb-{i}"), 1421 + &format!(r#"{{"n": {i}}}"#), 1422 + Some(&format!("rev-bbb-{i}")), 1423 + None, 1424 + last_b_cursor, 1425 + ); 1426 + } 1427 + batch.create( 1428 + "did:plc:inze6wrmsm7pjl7yta3oig77", 1429 + "a.a.c", 1430 + "rkey-ccc", 1431 + "{}", 1432 + Some("rev-ccc"), 1433 + None, 1434 + 12_000, 1435 + ); 1436 + 1437 + write.insert_batch(batch.batch)?; 1438 + 1439 + let records = read.get_records_by_collections( 1440 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1441 + 100, 1442 + false, 1443 + )?; 1444 + assert_eq!(records.len(), 1); 1445 + let records = read.get_records_by_collections( 1446 + &[Nsid::new("a.a.b".to_string()).unwrap()], 1447 + 100, 1448 + false, 1449 + )?; 1450 + assert_eq!(records.len(), 10); 1451 + let records = read.get_records_by_collections( 1452 + &[Nsid::new("a.a.c".to_string()).unwrap()], 1453 + 100, 1454 + false, 1455 + )?; 1456 + assert_eq!(records.len(), 1); 1457 + let records = read.get_records_by_collections( 1458 + &[Nsid::new("a.a.d".to_string()).unwrap()], 1459 + 100, 1460 + false, 1461 + )?; 1462 + assert_eq!(records.len(), 0); 1463 + 1464 + write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?; 1465 + write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?; 1466 + write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?; 1467 + write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?; 1468 + 1469 + let records = read.get_records_by_collections( 1470 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1471 + 100, 1472 + false, 1473 + )?; 1474 + assert_eq!(records.len(), 1); 1475 + let records = read.get_records_by_collections( 1476 + &[Nsid::new("a.a.b".to_string()).unwrap()], 1477 + 100, 1478 + false, 1479 + )?; 1480 + assert_eq!(records.len(), 6); 1481 + let records = read.get_records_by_collections( 1482 + &[Nsid::new("a.a.c".to_string()).unwrap()], 1483 + 100, 1484 + false, 1485 + )?; 1486 + assert_eq!(records.len(), 1); 1487 + let records = read.get_records_by_collections( 1488 + &[Nsid::new("a.a.d".to_string()).unwrap()], 1489 + 100, 1490 + false, 1491 + )?; 1492 + assert_eq!(records.len(), 0); 1493 + 1494 + Ok(()) 1495 + } 1496 + 1497 + #[test] 1498 + fn test_delete_account() -> anyhow::Result<()> { 1499 + let (read, mut write) = fjall_db(); 1500 + 1501 + let mut batch = TestBatch::default(); 1502 + batch.create( 1503 + "did:plc:person-a", 1504 + "a.a.a", 1505 + "rkey-aaa", 1506 + "{}", 1507 + Some("rev-aaa"), 1508 + None, 1509 + 10_000, 1510 + ); 1511 + for i in 1..=2 { 1512 + batch.create( 1513 + "did:plc:person-b", 1514 + "a.a.a", 1515 + &format!("rkey-bbb-{i}"), 1516 + &format!(r#"{{"n": {i}}}"#), 1517 + Some(&format!("rev-bbb-{i}")), 1518 + None, 1519 + 11_000 + i, 1520 + ); 1521 + } 1522 + write.insert_batch(batch.batch)?; 1523 + 1524 + let records = read.get_records_by_collections( 1525 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1526 + 100, 1527 + false, 1528 + )?; 1529 + assert_eq!(records.len(), 3); 1530 + 1531 + let records_deleted = 1532 + write.delete_account(&Did::new("did:plc:person-b".to_string()).unwrap())?; 1533 + assert_eq!(records_deleted, 2); 1534 + 1535 + let records = read.get_records_by_collections( 1536 + &[Nsid::new("a.a.a".to_string()).unwrap()], 1537 + 100, 1538 + false, 1539 + )?; 1540 + assert_eq!(records.len(), 1); 1541 + 1542 + Ok(()) 1543 + } 1544 + 1545 + #[test] 1546 + fn rollup_delete_account_removes_record() -> anyhow::Result<()> { 1547 + let (read, mut write) = fjall_db(); 1548 + 1549 + let mut batch = TestBatch::default(); 1550 + batch.create( 1551 + "did:plc:person-a", 1552 + "a.a.a", 1553 + "rkey-aaa", 1554 + "{}", 1555 + Some("rev-aaa"), 1556 + None, 1557 + 10_000, 1558 + ); 1559 + write.insert_batch(batch.batch)?; 1560 + 1561 + let mut batch = TestBatch::default(); 1562 + batch.delete_account("did:plc:person-a", 9_999); // queue it before the rollup 1563 + write.insert_batch(batch.batch)?; 1564 + 1565 + write.step_rollup()?; 1566 + 1567 + let records = 1568 + read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?; 1569 + assert_eq!(records.len(), 0); 1570 + 1571 + Ok(()) 1572 + } 1573 + 1574 + #[test] 1575 + fn rollup_delete_live_count_step() -> anyhow::Result<()> { 1576 + let (read, mut write) = fjall_db(); 1577 + 1578 + let mut batch = TestBatch::default(); 1579 + batch.create( 1580 + "did:plc:person-a", 1581 + "a.a.a", 1582 + "rkey-aaa", 1583 + "{}", 1584 + Some("rev-aaa"), 1585 + None, 1586 + 10_000, 1587 + ); 1588 + write.insert_batch(batch.batch)?; 1589 + 1590 + let n = write.step_rollup()?; 1591 + assert_eq!(n, 1); 1592 + 1593 + let mut batch = TestBatch::default(); 1594 + batch.delete_account("did:plc:person-a", 10_001); 1595 + write.insert_batch(batch.batch)?; 1596 + 1597 + let records = 1598 + read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?; 1599 + assert_eq!(records.len(), 1); 1600 + 1601 + let n = write.step_rollup()?; 1602 + assert_eq!(n, 1); 1603 + 1604 + let records = 1605 + read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?; 1606 + assert_eq!(records.len(), 0); 1607 + 1608 + let mut batch = TestBatch::default(); 1609 + batch.delete_account("did:plc:person-a", 9_999); 1610 + write.insert_batch(batch.batch)?; 1611 + 1612 + let n = write.step_rollup()?; 1613 + assert_eq!(n, 0); 1614 + 1615 + Ok(()) 1616 + } 1617 + 1618 + #[test] 1619 + fn rollup_multiple_count_batches() -> anyhow::Result<()> { 1620 + let (_read, mut write) = fjall_db(); 1621 + 1622 + let mut batch = TestBatch::default(); 1623 + batch.create( 1624 + "did:plc:person-a", 1625 + "a.a.a", 1626 + "rkey-aaa", 1627 + "{}", 1628 + Some("rev-aaa"), 1629 + None, 1630 + 10_000, 1631 + ); 1632 + write.insert_batch(batch.batch)?; 1633 + 1634 + let mut batch = TestBatch::default(); 1635 + batch.create( 1636 + "did:plc:person-a", 1637 + "a.a.a", 1638 + "rkey-aab", 1639 + "{}", 1640 + Some("rev-aab"), 1641 + None, 1642 + 10_001, 1643 + ); 1644 + write.insert_batch(batch.batch)?; 1645 + 1646 + let n = write.step_rollup()?; 1647 + assert_eq!(n, 2); 1648 + 1649 + let n = write.step_rollup()?; 1650 + assert_eq!(n, 0); 1651 + 1652 + Ok(()) 1653 + } 1654 + 1655 + #[test] 1656 + fn counts_before_and_after_rollup() -> anyhow::Result<()> { 1657 + let (read, mut write) = fjall_db(); 1658 + 1659 + let mut batch = TestBatch::default(); 1660 + batch.create( 1661 + "did:plc:person-a", 1662 + "a.a.a", 1663 + "rkey-aaa", 1664 + "{}", 1665 + Some("rev-aaa"), 1666 + None, 1667 + 10_000, 1668 + ); 1669 + batch.create( 1670 + "did:plc:person-b", 1671 + "a.a.a", 1672 + "rkey-bbb", 1673 + "{}", 1674 + Some("rev-bbb"), 1675 + None, 1676 + 10_001, 1677 + ); 1678 + write.insert_batch(batch.batch)?; 1679 + 1680 + let mut batch = TestBatch::default(); 1681 + batch.delete_account("did:plc:person-a", 11_000); 1682 + write.insert_batch(batch.batch)?; 1683 + 1684 + let mut batch = TestBatch::default(); 1685 + batch.create( 1686 + "did:plc:person-a", 1687 + "a.a.a", 1688 + "rkey-aac", 1689 + "{}", 1690 + Some("rev-aac"), 1691 + None, 1692 + 12_000, 1693 + ); 1694 + write.insert_batch(batch.batch)?; 1695 + 1696 + // before any rollup 1697 + let (records, dids) = 1698 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1699 + assert_eq!(records, 3); 1700 + assert_eq!(dids, 2); 1701 + 1702 + // first batch rolled up 1703 + let n = write.step_rollup()?; 1704 + assert_eq!(n, 1); 1705 + 1706 + let (records, dids) = 1707 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1708 + assert_eq!(records, 3); 1709 + assert_eq!(dids, 2); 1710 + 1711 + // delete account rolled up 1712 + let n = write.step_rollup()?; 1713 + assert_eq!(n, 1); 1714 + 1715 + let (records, dids) = 1716 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1717 + assert_eq!(records, 3); 1718 + assert_eq!(dids, 2); 1719 + 1720 + // second batch rolled up 1721 + let n = write.step_rollup()?; 1722 + assert_eq!(n, 1); 1723 + 1724 + let (records, dids) = 1725 + read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?; 1726 + assert_eq!(records, 3); 1727 + assert_eq!(dids, 2); 1728 + 1729 + // no more rollups left 1730 + let n = write.step_rollup()?; 1731 + assert_eq!(n, 0); 1732 + 1733 + Ok(()) 1734 + } 1735 + 1736 + #[test] 1737 + fn get_top_collections() -> anyhow::Result<()> { 1738 + let (read, mut write) = fjall_db(); 1739 + 1740 + let mut batch = TestBatch::default(); 1741 + batch.create( 1742 + "did:plc:person-a", 1743 + "a.a.a", 1744 + "rkey-aaa", 1745 + "{}", 1746 + Some("rev-aaa"), 1747 + None, 1748 + 10_000, 1749 + ); 1750 + batch.create( 1751 + "did:plc:person-b", 1752 + "a.a.b", 1753 + "rkey-bbb", 1754 + "{}", 1755 + Some("rev-bbb"), 1756 + None, 1757 + 10_001, 1758 + ); 1759 + batch.create( 1760 + "did:plc:person-c", 1761 + "a.b.c", 1762 + "rkey-ccc", 1763 + "{}", 1764 + Some("rev-ccc"), 1765 + None, 1766 + 10_002, 1767 + ); 1768 + batch.create( 1769 + "did:plc:person-a", 1770 + "a.a.a", 1771 + "rkey-aaa-2", 1772 + "{}", 1773 + Some("rev-aaa-2"), 1774 + None, 1775 + 10_003, 1776 + ); 1777 + write.insert_batch(batch.batch)?; 1778 + 1779 + let n = write.step_rollup()?; 1780 + assert_eq!(n, 3); // 3 collections 1781 + 1782 + let tops = read.get_top_collections()?; 1783 + assert_eq!( 1784 + tops, 1785 + TopCollections { 1786 + total_records: 4, 1787 + dids_estimate: 3, 1788 + nsid_child_segments: HashMap::from([( 1789 + "a".to_string(), 1790 + TopCollections { 1791 + total_records: 4, 1792 + dids_estimate: 3, 1793 + nsid_child_segments: HashMap::from([ 1794 + ( 1795 + "a".to_string(), 1796 + TopCollections { 1797 + total_records: 3, 1798 + dids_estimate: 2, 1799 + nsid_child_segments: HashMap::from([ 1800 + ( 1801 + "a".to_string(), 1802 + TopCollections { 1803 + total_records: 2, 1804 + dids_estimate: 1, 1805 + nsid_child_segments: HashMap::from([]), 1806 + }, 1807 + ), 1808 + ( 1809 + "b".to_string(), 1810 + TopCollections { 1811 + total_records: 1, 1812 + dids_estimate: 1, 1813 + nsid_child_segments: HashMap::from([]), 1814 + } 1815 + ), 1816 + ]), 1817 + }, 1818 + ), 1819 + ( 1820 + "b".to_string(), 1821 + TopCollections { 1822 + total_records: 1, 1823 + dids_estimate: 1, 1824 + nsid_child_segments: HashMap::from([( 1825 + "c".to_string(), 1826 + TopCollections { 1827 + total_records: 1, 1828 + dids_estimate: 1, 1829 + nsid_child_segments: HashMap::from([]), 1830 + }, 1831 + ),]), 1832 + }, 1833 + ), 1834 + ]), 1835 + }, 1836 + ),]), 1837 + } 1838 + ); 1839 + Ok(()) 1840 + } 1841 + }
-802
ufos/src/store.rs
··· 1 - use crate::db_types::{db_complete, DbBytes, DbStaticStr, EncodingError, StaticStr}; 2 - use crate::store_types::{ 3 - ByCollectionKey, ByCollectionValue, ByCursorSeenKey, ByCursorSeenValue, ByIdKey, ByIdValue, 4 - JetstreamCursorKey, JetstreamCursorValue, JetstreamEndpointKey, JetstreamEndpointValue, 5 - ModCursorKey, ModCursorValue, ModQueueItemKey, ModQueueItemStringValue, ModQueueItemValue, 6 - RollupCursorKey, RollupCursorValue, SeenCounter, 7 - }; 8 - use crate::{ 9 - CollectionSamples, CreateRecord, DeleteAccount, Did, EventBatch, ModifyRecord, Nsid, RecordKey, 10 - }; 11 - use fjall::{ 12 - Batch as FjallBatch, CompressionType, Config, Keyspace, PartitionCreateOptions, PartitionHandle, 13 - }; 14 - use jetstream::events::Cursor; 15 - use std::collections::HashMap; 16 - use std::path::{Path, PathBuf}; 17 - use std::time::{Duration, Instant}; 18 - use tokio::sync::mpsc::Receiver; 19 - use tokio::time::{interval_at, sleep}; 20 - 21 - /// Commit the RW batch immediately if this number of events have been read off the mod queue 22 - const MAX_BATCHED_RW_EVENTS: usize = 18; 23 - 24 - /// Commit the RW batch immediately if this number of records is reached 25 - /// 26 - /// there are probably some efficiency gains for higher, at cost of more memory. 27 - /// interestingly, this kind of sets a priority weight for the RW loop: 28 - /// - doing more work whenever scheduled means getting more CPU time in general 29 - /// 30 - /// this is higher than [MAX_BATCHED_RW_EVENTS] because account-deletes can have lots of items 31 - const MAX_BATCHED_RW_ITEMS: usize = 24; 32 - 33 - #[derive(Clone)] 34 - struct Db { 35 - keyspace: Keyspace, 36 - partition: PartitionHandle, 37 - } 38 - 39 - /** 40 - * data format, roughly: 41 - * 42 - * Global Meta: 43 - * ["js_cursor"] => js_cursor(u64), // used as global sequence 44 - * ["js_endpoint"] => &str, // checked on startup because jetstream instance cursors are not interchangeable 45 - * ["mod_cursor"] => js_cursor(u64); 46 - * ["rollup_cursor"] => [js_cursor|collection]; // how far the rollup helper has progressed 47 - * Mod queue 48 - * ["mod_queue"|js_cursor] => one of { 49 - * DeleteAccount(did) // delete all account content older than cursor 50 - * DeleteRecord(did, collection, rkey) // delete record older than cursor 51 - * UpdateRecord(did, collection, rkey, new_record) // delete + put, but don't delete if cursor is newer 52 - * } 53 - * Collection and rollup meta: 54 - * ["seen_by_js_cursor_collection"|js_cursor|collection] => u64 // batched total, gets cleaned up by rollup 55 - * ["total_by_collection"|collection] => [u64, js_cursor] // rollup; live total requires scanning seen_by_collection after js_cursor 56 - * ["hour_by_collection"|hour(u64)|collection] => u64 // rollup from seen_by_js_cursor_collection 57 - * Samples: 58 - * ["by_collection"|collection|js_cursor] => [did|rkey|record] 59 - * ["by_id"|did|collection|rkey|js_cursor] => [] // required to support deletes; did first prefix for account deletes. 60 - * 61 - * TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy 62 - * fetching + caching on read. 63 - **/ 64 - #[derive(Clone)] 65 - pub struct Storage { 66 - /// horrible: gate all db access behind this to force global serialization to avoid deadlock 67 - db: Db, 68 - } 69 - 70 - impl Storage { 71 - fn init_self(path: impl AsRef<Path>) -> anyhow::Result<Self> { 72 - let keyspace = Config::new(path).fsync_ms(Some(4_000)).open()?; 73 - let partition = keyspace.open_partition( 74 - "default", 75 - PartitionCreateOptions::default().compression(CompressionType::None), 76 - )?; 77 - Ok(Self { 78 - db: Db { 79 - keyspace, 80 - partition, 81 - }, 82 - }) 83 - } 84 - 85 - pub async fn open( 86 - path: PathBuf, 87 - endpoint: &str, 88 - force_endpoint: bool, 89 - ) -> anyhow::Result<(Self, Option<Cursor>)> { 90 - let me = tokio::task::spawn_blocking(move || Storage::init_self(path)).await??; 91 - 92 - let js_cursor = me.get_jetstream_cursor().await?; 93 - 94 - if js_cursor.is_some() { 95 - let Some(JetstreamEndpointValue(stored)) = me.get_jetstream_endpoint().await? else { 96 - anyhow::bail!("found cursor but missing js_endpoint, refusing to start."); 97 - }; 98 - if stored != endpoint { 99 - if force_endpoint { 100 - log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}"); 101 - me.set_jetstream_endpoint(endpoint).await?; 102 - } else { 103 - anyhow::bail!("stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start."); 104 - } 105 - } 106 - } else { 107 - me.set_jetstream_endpoint(endpoint).await?; 108 - } 109 - 110 - Ok((me, js_cursor)) 111 - } 112 - 113 - /// Jetstream event batch receiver: writes without any reads 114 - /// 115 - /// Events that require reads like record updates or delets are written to a queue 116 - pub async fn receive(&self, mut receiver: Receiver<EventBatch>) -> anyhow::Result<()> { 117 - // TODO: see rw_loop: enforce single-thread. 118 - loop { 119 - let t_sleep = Instant::now(); 120 - sleep(Duration::from_secs_f64(0.8)).await; // TODO: minimize during replay 121 - let slept_for = t_sleep.elapsed(); 122 - let queue_size = receiver.len(); 123 - 124 - if let Some(event_batch) = receiver.recv().await { 125 - log::trace!("write: received write batch"); 126 - let batch_summary = summarize_batch(&event_batch); 127 - 128 - let last = event_batch.last_jetstream_cursor.clone(); // TODO: get this from the data. track last in consumer. compute or track first. 129 - 130 - let db = &self.db; 131 - let keyspace = db.keyspace.clone(); 132 - let partition = db.partition.clone(); 133 - 134 - let writer_t0 = Instant::now(); 135 - log::trace!("spawn_blocking for write batch"); 136 - tokio::task::spawn_blocking(move || { 137 - DBWriter { 138 - keyspace, 139 - partition, 140 - } 141 - .write_batch(event_batch, last) 142 - }) 143 - .await??; 144 - log::trace!("write: back from blocking task, successfully wrote batch"); 145 - let wrote_for = writer_t0.elapsed(); 146 - 147 - println!("{batch_summary}, slept {slept_for: <12?}, wrote {wrote_for: <11?}, queue: {queue_size}"); 148 - } else { 149 - log::error!("store consumer: receive channel failed (dropped/closed?)"); 150 - anyhow::bail!("receive channel closed"); 151 - } 152 - } 153 - } 154 - 155 - /// Read-write loop reads from the queue for record-modifying events and does rollups 156 - pub async fn rw_loop(&self) -> anyhow::Result<()> { 157 - // TODO: lock so that only one rw loop can possibly be run. or even better, take a mutable resource thing to enforce at compile time. 158 - 159 - let now = tokio::time::Instant::now(); 160 - let mut time_to_update_events = interval_at(now, Duration::from_secs_f64(0.051)); 161 - let mut time_to_trim_surplus = interval_at( 162 - now + Duration::from_secs_f64(1.0), 163 - Duration::from_secs_f64(3.3), 164 - ); 165 - let mut time_to_roll_up = interval_at( 166 - now + Duration::from_secs_f64(0.4), 167 - Duration::from_secs_f64(0.9), 168 - ); 169 - 170 - time_to_update_events.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); 171 - time_to_trim_surplus.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); 172 - time_to_roll_up.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); 173 - 174 - loop { 175 - let keyspace = self.db.keyspace.clone(); 176 - let partition = self.db.partition.clone(); 177 - tokio::select! { 178 - _ = time_to_update_events.tick() => { 179 - log::debug!("beginning event update task"); 180 - tokio::task::spawn_blocking(move || Self::update_events(keyspace, partition)).await??; 181 - log::debug!("finished event update task"); 182 - } 183 - _ = time_to_trim_surplus.tick() => { 184 - log::debug!("beginning record trim task"); 185 - tokio::task::spawn_blocking(move || Self::trim_old_events(keyspace, partition)).await??; 186 - log::debug!("finished record trim task"); 187 - } 188 - _ = time_to_roll_up.tick() => { 189 - log::debug!("beginning rollup task"); 190 - tokio::task::spawn_blocking(move || Self::roll_up_counts(keyspace, partition)).await??; 191 - log::debug!("finished rollup task"); 192 - }, 193 - } 194 - } 195 - } 196 - 197 - fn update_events(keyspace: Keyspace, partition: PartitionHandle) -> anyhow::Result<()> { 198 - // TODO: lock this to prevent concurrent rw 199 - 200 - log::trace!("rw: getting rw cursor..."); 201 - let mod_cursor = 202 - get_static::<ModCursorKey, ModCursorValue>(&partition)?.unwrap_or(Cursor::from_start()); 203 - let range = ModQueueItemKey::new(mod_cursor.clone()).range_to_prefix_end()?; 204 - 205 - let mut db_batch = keyspace.batch(); 206 - let mut batched_rw_items = 0; 207 - let mut any_tasks_found = false; 208 - 209 - log::trace!("rw: iterating newer rw items..."); 210 - 211 - for (i, pair) in partition.range(range.clone()).enumerate() { 212 - log::trace!("rw: iterating {i}"); 213 - any_tasks_found = true; 214 - 215 - if i >= MAX_BATCHED_RW_EVENTS { 216 - break; 217 - } 218 - 219 - let (key_bytes, val_bytes) = pair?; 220 - let mod_key = match db_complete::<ModQueueItemKey>(&key_bytes) { 221 - Ok(k) => k, 222 - Err(EncodingError::WrongStaticPrefix(_, _)) => { 223 - panic!("wsp: mod queue empty."); 224 - } 225 - otherwise => otherwise?, 226 - }; 227 - 228 - let mod_value: ModQueueItemValue = 229 - db_complete::<ModQueueItemStringValue>(&val_bytes)?.try_into()?; 230 - 231 - log::trace!("rw: iterating {i}: sending to batcher {mod_key:?} => {mod_value:?}"); 232 - batched_rw_items += DBWriter { 233 - keyspace: keyspace.clone(), 234 - partition: partition.clone(), 235 - } 236 - .write_rw(&mut db_batch, mod_key, mod_value)?; 237 - log::trace!("rw: iterating {i}: back from batcher."); 238 - 239 - if batched_rw_items >= MAX_BATCHED_RW_ITEMS { 240 - log::trace!("rw: iterating {i}: batch big enough, breaking out."); 241 - break; 242 - } 243 - } 244 - 245 - if !any_tasks_found { 246 - log::trace!("rw: skipping batch commit since apparently no items were added (this is normal, skipping is new)"); 247 - // TODO: is this missing a chance to update the cursor? 248 - return Ok(()); 249 - } 250 - 251 - log::info!("rw: committing rw batch with {batched_rw_items} items (items != total inserts/deletes)..."); 252 - let r = db_batch.commit(); 253 - log::info!("rw: commit result: {r:?}"); 254 - r?; 255 - Ok(()) 256 - } 257 - 258 - fn trim_old_events(_keyspace: Keyspace, _partition: PartitionHandle) -> anyhow::Result<()> { 259 - // we *could* keep a collection dirty list in memory to reduce the amount of searching here 260 - // actually can we use seen_by_js_cursor_collection?? 261 - // * ["seen_by_js_cursor_collection"|js_cursor|collection] => u64 262 - // -> the rollup cursor could handle trims. 263 - 264 - // key structure: 265 - // * ["by_collection"|collection|js_cursor] => [did|rkey|record] 266 - 267 - // *new* strategy: 268 - // 1. collect `collection`s seen during rollup 269 - // 2. for each collected collection: 270 - // 3. set up prefix iterator 271 - // 4. reverse and try to walk back MAX_RETAINED steps 272 - // 5. if we didn't end iteration yet, start deleting records (and their forward links) until we get to the end 273 - 274 - // ... we can probably do even better with cursor ranges too, since we'll have a cursor range from rollup and it's in the by_collection key 275 - 276 - Ok(()) 277 - } 278 - 279 - fn roll_up_counts(_keyspace: Keyspace, _partition: PartitionHandle) -> anyhow::Result<()> { 280 - Ok(()) 281 - } 282 - 283 - pub async fn get_collection_records( 284 - &self, 285 - collection: &Nsid, 286 - limit: usize, 287 - ) -> anyhow::Result<Vec<CreateRecord>> { 288 - let partition = self.db.partition.clone(); 289 - let prefix = ByCollectionKey::prefix_from_collection(collection.clone())?; 290 - tokio::task::spawn_blocking(move || { 291 - let mut output = Vec::new(); 292 - 293 - for pair in partition.prefix(&prefix).rev().take(limit) { 294 - let (k_bytes, v_bytes) = pair?; 295 - let (_, cursor) = db_complete::<ByCollectionKey>(&k_bytes)?.into(); 296 - let (did, rkey, record) = db_complete::<ByCollectionValue>(&v_bytes)?.into(); 297 - output.push(CreateRecord { 298 - did, 299 - rkey, 300 - record, 301 - cursor, 302 - }) 303 - } 304 - Ok(output) 305 - }) 306 - .await? 307 - } 308 - 309 - pub async fn get_meta_info(&self) -> anyhow::Result<StorageInfo> { 310 - let db = &self.db; 311 - let keyspace = db.keyspace.clone(); 312 - let partition = db.partition.clone(); 313 - tokio::task::spawn_blocking(move || { 314 - Ok(StorageInfo { 315 - keyspace_disk_space: keyspace.disk_space(), 316 - keyspace_journal_count: keyspace.journal_count(), 317 - keyspace_sequence: keyspace.instant(), 318 - partition_approximate_len: partition.approximate_len(), 319 - }) 320 - }) 321 - .await? 322 - } 323 - 324 - pub async fn get_collection_total_seen(&self, collection: &Nsid) -> anyhow::Result<u64> { 325 - let partition = self.db.partition.clone(); 326 - let collection = collection.clone(); 327 - tokio::task::spawn_blocking(move || get_unrolled_collection_seen(&partition, collection)) 328 - .await? 329 - } 330 - 331 - pub async fn get_top_collections(&self) -> anyhow::Result<HashMap<String, u64>> { 332 - let partition = self.db.partition.clone(); 333 - tokio::task::spawn_blocking(move || get_unrolled_top_collections(&partition)).await? 334 - } 335 - 336 - pub async fn get_jetstream_endpoint(&self) -> anyhow::Result<Option<JetstreamEndpointValue>> { 337 - let partition = self.db.partition.clone(); 338 - tokio::task::spawn_blocking(move || { 339 - get_static::<JetstreamEndpointKey, JetstreamEndpointValue>(&partition) 340 - }) 341 - .await? 342 - } 343 - 344 - async fn set_jetstream_endpoint(&self, endpoint: &str) -> anyhow::Result<()> { 345 - let partition = self.db.partition.clone(); 346 - let endpoint = endpoint.to_string(); 347 - tokio::task::spawn_blocking(move || { 348 - insert_static::<JetstreamEndpointKey>(&partition, JetstreamEndpointValue(endpoint)) 349 - }) 350 - .await? 351 - } 352 - 353 - pub async fn get_jetstream_cursor(&self) -> anyhow::Result<Option<Cursor>> { 354 - let partition = self.db.partition.clone(); 355 - tokio::task::spawn_blocking(move || { 356 - get_static::<JetstreamCursorKey, JetstreamCursorValue>(&partition) 357 - }) 358 - .await? 359 - } 360 - 361 - pub async fn get_mod_cursor(&self) -> anyhow::Result<Option<Cursor>> { 362 - let partition = self.db.partition.clone(); 363 - tokio::task::spawn_blocking(move || get_static::<ModCursorKey, ModCursorValue>(&partition)) 364 - .await? 365 - } 366 - } 367 - 368 - /// Get a value from a fixed key 369 - fn get_static<K: StaticStr, V: DbBytes>(partition: &PartitionHandle) -> anyhow::Result<Option<V>> { 370 - let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 371 - let value = partition 372 - .get(&key_bytes)? 373 - .map(|value_bytes| db_complete(&value_bytes)) 374 - .transpose()?; 375 - Ok(value) 376 - } 377 - 378 - /// Set a value to a fixed key 379 - fn insert_static<K: StaticStr>( 380 - partition: &PartitionHandle, 381 - value: impl DbBytes, 382 - ) -> anyhow::Result<()> { 383 - let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 384 - let value_bytes = value.to_db_bytes()?; 385 - partition.insert(&key_bytes, &value_bytes)?; 386 - Ok(()) 387 - } 388 - 389 - /// Set a value to a fixed key 390 - fn insert_batch_static<K: StaticStr>( 391 - batch: &mut FjallBatch, 392 - partition: &PartitionHandle, 393 - value: impl DbBytes, 394 - ) -> anyhow::Result<()> { 395 - let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?; 396 - let value_bytes = value.to_db_bytes()?; 397 - batch.insert(partition, &key_bytes, &value_bytes); 398 - Ok(()) 399 - } 400 - 401 - /// Remove a key 402 - fn remove_batch<K: DbBytes>( 403 - batch: &mut FjallBatch, 404 - partition: &PartitionHandle, 405 - key: K, 406 - ) -> Result<(), EncodingError> { 407 - let key_bytes = key.to_db_bytes()?; 408 - batch.remove(partition, &key_bytes); 409 - Ok(()) 410 - } 411 - 412 - /// Get stats that haven't been rolled up yet 413 - fn get_unrolled_collection_seen( 414 - partition: &PartitionHandle, 415 - collection: Nsid, 416 - ) -> anyhow::Result<u64> { 417 - let range = 418 - if let Some(cursor_value) = get_static::<RollupCursorKey, RollupCursorValue>(partition)? { 419 - eprintln!("found existing cursor"); 420 - let key: ByCursorSeenKey = cursor_value.into(); 421 - key.range_from()? 422 - } else { 423 - eprintln!("cursor from start."); 424 - ByCursorSeenKey::full_range()? 425 - }; 426 - 427 - let mut collection_total = 0; 428 - 429 - let mut scanned = 0; 430 - let mut rolled = 0; 431 - 432 - for pair in partition.range(range) { 433 - let (key_bytes, value_bytes) = pair?; 434 - let key = db_complete::<ByCursorSeenKey>(&key_bytes)?; 435 - let val = db_complete::<ByCursorSeenValue>(&value_bytes)?; 436 - 437 - if *key.collection() == collection { 438 - let SeenCounter(n) = val; 439 - collection_total += n; 440 - rolled += 1; 441 - } 442 - scanned += 1; 443 - } 444 - 445 - eprintln!("scanned: {scanned}, rolled: {rolled}"); 446 - 447 - Ok(collection_total) 448 - } 449 - 450 - fn get_unrolled_top_collections( 451 - partition: &PartitionHandle, 452 - ) -> anyhow::Result<HashMap<String, u64>> { 453 - let range = 454 - if let Some(cursor_value) = get_static::<RollupCursorKey, RollupCursorValue>(partition)? { 455 - eprintln!("found existing cursor"); 456 - let key: ByCursorSeenKey = cursor_value.into(); 457 - key.range_from()? 458 - } else { 459 - eprintln!("cursor from start."); 460 - ByCursorSeenKey::full_range()? 461 - }; 462 - 463 - let mut res = HashMap::new(); 464 - let mut scanned = 0; 465 - 466 - for pair in partition.range(range) { 467 - let (key_bytes, value_bytes) = pair?; 468 - let key = db_complete::<ByCursorSeenKey>(&key_bytes)?; 469 - let SeenCounter(n) = db_complete(&value_bytes)?; 470 - 471 - *res.entry(key.collection().to_string()).or_default() += n; 472 - 473 - scanned += 1; 474 - } 475 - 476 - eprintln!("scanned: {scanned} seen-counts."); 477 - 478 - Ok(res) 479 - } 480 - 481 - impl DBWriter { 482 - fn write_batch(self, event_batch: EventBatch, last: Option<Cursor>) -> anyhow::Result<()> { 483 - let mut db_batch = self.keyspace.batch(); 484 - self.add_record_creates(&mut db_batch, event_batch.record_creates)?; 485 - self.add_record_modifies(&mut db_batch, event_batch.record_modifies)?; 486 - self.add_account_removes(&mut db_batch, event_batch.account_removes)?; 487 - if let Some(cursor) = last { 488 - insert_batch_static::<JetstreamCursorKey>(&mut db_batch, &self.partition, cursor)?; 489 - } 490 - log::info!("write: committing write batch..."); 491 - let r = db_batch.commit(); 492 - log::info!("write: commit result: {r:?}"); 493 - r?; 494 - Ok(()) 495 - } 496 - 497 - fn write_rw( 498 - self, 499 - db_batch: &mut FjallBatch, 500 - mod_key: ModQueueItemKey, 501 - mod_value: ModQueueItemValue, 502 - ) -> anyhow::Result<usize> { 503 - // update the current rw cursor to this item (atomically with the batch if it succeeds) 504 - let mod_cursor: Cursor = (&mod_key).into(); 505 - insert_batch_static::<ModCursorKey>(db_batch, &self.partition, mod_cursor.clone())?; 506 - 507 - let items_modified = match mod_value { 508 - ModQueueItemValue::DeleteAccount(did) => { 509 - log::trace!("rw: batcher: delete account..."); 510 - let (items, finished) = self.delete_account(db_batch, mod_cursor, did)?; 511 - log::trace!("rw: batcher: back from delete account (finished? {finished})"); 512 - if finished { 513 - // only remove the queued rw task if we have actually completed its account removal work 514 - remove_batch::<ModQueueItemKey>(db_batch, &self.partition, mod_key)?; 515 - items + 1 516 - } else { 517 - items 518 - } 519 - } 520 - ModQueueItemValue::DeleteRecord(did, collection, rkey) => { 521 - log::trace!("rw: batcher: delete record..."); 522 - let items = self.delete_record(db_batch, mod_cursor, did, collection, rkey)?; 523 - log::trace!("rw: batcher: back from delete record"); 524 - remove_batch::<ModQueueItemKey>(db_batch, &self.partition, mod_key)?; 525 - items + 1 526 - } 527 - ModQueueItemValue::UpdateRecord(did, collection, rkey, record) => { 528 - let items = 529 - self.update_record(db_batch, mod_cursor, did, collection, rkey, record)?; 530 - remove_batch::<ModQueueItemKey>(db_batch, &self.partition, mod_key)?; 531 - items + 1 532 - } 533 - }; 534 - Ok(items_modified) 535 - } 536 - 537 - fn update_record( 538 - &self, 539 - db_batch: &mut FjallBatch, 540 - cursor: Cursor, 541 - did: Did, 542 - collection: Nsid, 543 - rkey: RecordKey, 544 - record: serde_json::Value, 545 - ) -> anyhow::Result<usize> { 546 - // 1. delete any existing versions older than us 547 - let items_deleted = self.delete_record( 548 - db_batch, 549 - cursor.clone(), 550 - did.clone(), 551 - collection.clone(), 552 - rkey.clone(), 553 - )?; 554 - 555 - // 2. insert the updated version, at our new cursor 556 - self.add_record(db_batch, cursor, did, collection, rkey, record)?; 557 - 558 - let items_total = items_deleted + 1; 559 - Ok(items_total) 560 - } 561 - 562 - fn delete_record( 563 - &self, 564 - db_batch: &mut FjallBatch, 565 - cursor: Cursor, 566 - did: Did, 567 - collection: Nsid, 568 - rkey: RecordKey, 569 - ) -> anyhow::Result<usize> { 570 - let key_prefix_bytes = 571 - ByIdKey::record_prefix(did.clone(), collection.clone(), rkey.clone()).to_db_bytes()?; 572 - 573 - // put the cursor of the actual deletion event in to prevent prefix iter from touching newer docs 574 - let key_limit = 575 - ByIdKey::new(did, collection.clone(), rkey, cursor.clone()).to_db_bytes()?; 576 - 577 - let mut items_removed = 0; 578 - 579 - log::trace!("delete_record: iterate over up to current cursor..."); 580 - 581 - for (i, pair) in self 582 - .partition 583 - .range(key_prefix_bytes..key_limit) 584 - .enumerate() 585 - { 586 - log::trace!("delete_record iter {i}: found"); 587 - // find all (hopefully 1) 588 - let (key_bytes, _) = pair?; 589 - let key = db_complete::<ByIdKey>(&key_bytes)?; 590 - let found_cursor = key.cursor(); 591 - if found_cursor > cursor { 592 - // we are *only* allowed to delete records that came before the record delete event 593 - // log::trace!("delete_record: found (and ignoring) newer version(s). key: {key:?}"); 594 - panic!("wtf, found newer version than cursor limit we tried to set."); 595 - // break; 596 - } 597 - 598 - // remove the by_id entry 599 - db_batch.remove(&self.partition, key_bytes); 600 - 601 - // remove its record sample 602 - let by_collection_key_bytes = 603 - ByCollectionKey::new(collection.clone(), found_cursor).to_db_bytes()?; 604 - db_batch.remove(&self.partition, by_collection_key_bytes); 605 - 606 - items_removed += 1; 607 - } 608 - 609 - // if items_removed > 1 { 610 - // log::trace!("odd, removed {items_removed} records for one record removal:"); 611 - // for (i, pair) in self.partition.prefix(&key_prefix_bytes).enumerate() { 612 - // // find all (hopefully 1) 613 - // let (key_bytes, _) = pair?; 614 - // let found_cursor = db_complete::<ByIdKey>(&key_bytes)?.cursor(); 615 - // if found_cursor > cursor { 616 - // break; 617 - // } 618 - 619 - // let key = db_complete::<ByIdKey>(&key_bytes)?; 620 - // log::trace!(" {i}: key {key:?}"); 621 - // } 622 - // } 623 - Ok(items_removed) 624 - } 625 - 626 - fn delete_account( 627 - &self, 628 - db_batch: &mut FjallBatch, 629 - cursor: Cursor, 630 - did: Did, 631 - ) -> anyhow::Result<(usize, bool)> { 632 - let key_prefix_bytes = ByIdKey::did_prefix(did).to_db_bytes()?; 633 - 634 - let mut items_added = 0; 635 - 636 - for pair in self.partition.prefix(&key_prefix_bytes) { 637 - let (key_bytes, _) = pair?; 638 - 639 - let (_, collection, _rkey, found_cursor) = db_complete::<ByIdKey>(&key_bytes)?.into(); 640 - if found_cursor > cursor { 641 - log::trace!( 642 - "delete account: found (and ignoring) newer records than the delete event??" 643 - ); 644 - continue; 645 - } 646 - 647 - // remove the by_id entry 648 - db_batch.remove(&self.partition, key_bytes); 649 - 650 - // remove its record sample 651 - let by_collection_key_bytes = 652 - ByCollectionKey::new(collection, found_cursor).to_db_bytes()?; 653 - db_batch.remove(&self.partition, by_collection_key_bytes); 654 - 655 - items_added += 1; 656 - if items_added >= MAX_BATCHED_RW_ITEMS { 657 - return Ok((items_added, false)); // there might be more records but we've done enough for this batch 658 - } 659 - } 660 - 661 - Ok((items_added, true)) 662 - } 663 - 664 - fn add_record_creates( 665 - &self, 666 - db_batch: &mut FjallBatch, 667 - record_creates: HashMap<Nsid, CollectionSamples>, 668 - ) -> anyhow::Result<()> { 669 - for ( 670 - collection, 671 - CollectionSamples { 672 - total_seen, 673 - samples, 674 - }, 675 - ) in record_creates.into_iter() 676 - { 677 - if let Some(last_record) = &samples.back() { 678 - db_batch.insert( 679 - &self.partition, 680 - ByCursorSeenKey::new(last_record.cursor.clone(), collection.clone()) 681 - .to_db_bytes()?, 682 - ByCursorSeenValue::new(total_seen as u64).to_db_bytes()?, 683 - ); 684 - } else { 685 - log::error!( 686 - "collection samples should only exist when at least one sample has been added" 687 - ); 688 - } 689 - 690 - for CreateRecord { 691 - did, 692 - rkey, 693 - cursor, 694 - record, 695 - } in samples.into_iter().rev() 696 - { 697 - self.add_record(db_batch, cursor, did, collection.clone(), rkey, record)?; 698 - } 699 - } 700 - Ok(()) 701 - } 702 - 703 - fn add_record( 704 - &self, 705 - db_batch: &mut FjallBatch, 706 - cursor: Cursor, 707 - did: Did, 708 - collection: Nsid, 709 - rkey: RecordKey, 710 - record: serde_json::Value, 711 - ) -> anyhow::Result<()> { 712 - // ["by_collection"|collection|js_cursor] => [did|rkey|record] 713 - db_batch.insert( 714 - &self.partition, 715 - ByCollectionKey::new(collection.clone(), cursor.clone()).to_db_bytes()?, 716 - ByCollectionValue::new(did.clone(), rkey.clone(), record).to_db_bytes()?, 717 - ); 718 - 719 - // ["by_id"|did|collection|rkey|js_cursor] => [] // required to support deletes; did first prefix for account deletes. 720 - db_batch.insert( 721 - &self.partition, 722 - ByIdKey::new(did, collection.clone(), rkey, cursor).to_db_bytes()?, 723 - ByIdValue::default().to_db_bytes()?, 724 - ); 725 - 726 - Ok(()) 727 - } 728 - 729 - fn add_record_modifies( 730 - &self, 731 - db_batch: &mut FjallBatch, 732 - record_modifies: Vec<ModifyRecord>, 733 - ) -> anyhow::Result<()> { 734 - for modification in record_modifies { 735 - let (cursor, db_val) = match modification { 736 - ModifyRecord::Update(u) => ( 737 - u.cursor, 738 - ModQueueItemValue::UpdateRecord(u.did, u.collection, u.rkey, u.record), 739 - ), 740 - ModifyRecord::Delete(d) => ( 741 - d.cursor, 742 - ModQueueItemValue::DeleteRecord(d.did, d.collection, d.rkey), 743 - ), 744 - }; 745 - db_batch.insert( 746 - &self.partition, 747 - ModQueueItemKey::new(cursor).to_db_bytes()?, 748 - db_val.to_db_bytes()?, 749 - ); 750 - } 751 - Ok(()) 752 - } 753 - 754 - fn add_account_removes( 755 - &self, 756 - db_batch: &mut FjallBatch, 757 - account_removes: Vec<DeleteAccount>, 758 - ) -> anyhow::Result<()> { 759 - for deletion in account_removes { 760 - db_batch.insert( 761 - &self.partition, 762 - ModQueueItemKey::new(deletion.cursor).to_db_bytes()?, 763 - ModQueueItemValue::DeleteAccount(deletion.did).to_db_bytes()?, 764 - ); 765 - } 766 - Ok(()) 767 - } 768 - } 769 - 770 - #[derive(Debug, serde::Serialize, schemars::JsonSchema)] 771 - pub struct StorageInfo { 772 - pub keyspace_disk_space: u64, 773 - pub keyspace_journal_count: usize, 774 - pub keyspace_sequence: u64, 775 - pub partition_approximate_len: usize, 776 - } 777 - 778 - struct DBWriter { 779 - keyspace: Keyspace, 780 - partition: PartitionHandle, 781 - } 782 - 783 - ////////// temp stuff to remove: 784 - 785 - fn summarize_batch(batch: &EventBatch) -> String { 786 - let EventBatch { 787 - record_creates, 788 - record_modifies, 789 - account_removes, 790 - last_jetstream_cursor, 791 - .. 792 - } = batch; 793 - let total_records: usize = record_creates.values().map(|v| v.total_seen).sum(); 794 - let total_samples: usize = record_creates.values().map(|v| v.samples.len()).sum(); 795 - format!( 796 - "batch of {total_samples: >3} samples from {total_records: >4} records in {: >2} collections, {: >3} modifies, {} acct removes, cursor {: <12?}", 797 - record_creates.len(), 798 - record_modifies.len(), 799 - account_removes.len(), 800 - last_jetstream_cursor.clone().map(|c| c.elapsed()) 801 - ) 802 - }
+312 -212
ufos/src/store_types.rs
··· 1 1 use crate::db_types::{ 2 - DbBytes, DbConcat, DbEmpty, DbStaticStr, EncodingError, StaticStr, UseBincodePlz, 2 + DbBytes, DbConcat, DbStaticStr, EncodingError, SerdeBytes, StaticStr, UseBincodePlz, 3 3 }; 4 - use crate::{Cursor, Did, Nsid, RecordKey}; 4 + use crate::{Cursor, Did, Nsid, PutAction, RecordKey, UFOsCommit}; 5 5 use bincode::{Decode, Encode}; 6 + use cardinality_estimator::CardinalityEstimator; 6 7 use std::ops::Range; 7 8 8 9 /// key format: ["js_cursor"] ··· 15 16 } 16 17 pub type JetstreamCursorValue = Cursor; 17 18 18 - /// key format: ["mod_cursor"] 19 + /// key format: ["rollup_cursor"] 19 20 #[derive(Debug, PartialEq)] 20 - pub struct ModCursorKey {} 21 - impl StaticStr for ModCursorKey { 21 + pub struct NewRollupCursorKey {} 22 + impl StaticStr for NewRollupCursorKey { 22 23 fn static_str() -> &'static str { 23 - "mod_cursor" 24 + "rollup_cursor" 24 25 } 25 26 } 26 - pub type ModCursorValue = Cursor; 27 + // pub type NewRollupCursorKey = DbStaticStr<_NewRollupCursorKey>; 28 + /// value format: [rollup_cursor(Cursor)|collection(Nsid)] 29 + pub type NewRollupCursorValue = Cursor; 27 30 28 - /// key format: ["rollup_cursor"] 31 + /// key format: ["js_endpoint"] 29 32 #[derive(Debug, PartialEq)] 30 - pub struct RollupCursorKey {} 31 - impl StaticStr for RollupCursorKey { 33 + pub struct TakeoffKey {} 34 + impl StaticStr for TakeoffKey { 32 35 fn static_str() -> &'static str { 33 - "rollup_cursor" 36 + "takeoff" 34 37 } 35 38 } 36 - /// value format: [rollup_cursor(Cursor)|collection(Nsid)] 37 - pub type RollupCursorValue = DbConcat<Cursor, Nsid>; 39 + pub type TakeoffValue = Cursor; 38 40 39 41 /// key format: ["js_endpoint"] 40 42 #[derive(Debug, PartialEq)] ··· 60 62 } 61 63 } 62 64 63 - #[derive(Debug, Clone, Encode, Decode)] 64 - pub struct SeenCounter(pub u64); 65 - impl SeenCounter { 66 - pub fn new(n: u64) -> Self { 67 - Self(n) 65 + pub type NsidRecordFeedKey = DbConcat<Nsid, Cursor>; 66 + impl NsidRecordFeedKey { 67 + pub fn collection(&self) -> &Nsid { 68 + &self.prefix 69 + } 70 + pub fn cursor(&self) -> Cursor { 71 + self.suffix 68 72 } 69 73 } 70 - impl UseBincodePlz for SeenCounter {} 74 + pub type NsidRecordFeedVal = DbConcat<Did, DbConcat<RecordKey, String>>; 75 + impl NsidRecordFeedVal { 76 + pub fn did(&self) -> &Did { 77 + &self.prefix 78 + } 79 + pub fn rkey(&self) -> &RecordKey { 80 + &self.suffix.prefix 81 + } 82 + pub fn rev(&self) -> &str { 83 + &self.suffix.suffix 84 + } 85 + } 86 + impl From<(&Did, &RecordKey, &str)> for NsidRecordFeedVal { 87 + fn from((did, rkey, rev): (&Did, &RecordKey, &str)) -> Self { 88 + Self::from_pair( 89 + did.clone(), 90 + DbConcat::from_pair(rkey.clone(), rev.to_string()), 91 + ) 92 + } 93 + } 71 94 72 - #[derive(Debug, PartialEq)] 73 - pub struct _ByCollectionStaticStr {} 74 - impl StaticStr for _ByCollectionStaticStr { 75 - fn static_str() -> &'static str { 76 - "by_collection" 95 + pub type RecordLocationKey = DbConcat<Did, DbConcat<Nsid, RecordKey>>; 96 + impl RecordLocationKey { 97 + pub fn did(&self) -> &Did { 98 + &self.prefix 99 + } 100 + pub fn collection(&self) -> &Nsid { 101 + &self.suffix.prefix 102 + } 103 + pub fn rkey(&self) -> &RecordKey { 104 + &self.suffix.suffix 77 105 } 78 106 } 79 - type ByCollectionPrefix = DbStaticStr<_ByCollectionStaticStr>; 80 - /// key format: ["by_collection"|collection|js_cursor] 81 - pub type ByCollectionKey = DbConcat<DbConcat<ByCollectionPrefix, Nsid>, Cursor>; 82 - impl ByCollectionKey { 83 - pub fn new(collection: Nsid, cursor: Cursor) -> Self { 84 - Self { 85 - prefix: DbConcat::from_pair(Default::default(), collection), 86 - suffix: cursor, 87 - } 88 - } 89 - pub fn prefix_from_collection(collection: Nsid) -> Result<Vec<u8>, EncodingError> { 90 - DbConcat::from_pair(ByCollectionPrefix::default(), collection).to_db_bytes() 107 + impl From<(&UFOsCommit, &Nsid)> for RecordLocationKey { 108 + fn from((commit, collection): (&UFOsCommit, &Nsid)) -> Self { 109 + Self::from_pair( 110 + commit.did.clone(), 111 + DbConcat::from_pair(collection.clone(), commit.rkey.clone()), 112 + ) 91 113 } 92 114 } 93 - impl From<ByCollectionKey> for (Nsid, Cursor) { 94 - fn from(k: ByCollectionKey) -> Self { 95 - (k.prefix.suffix, k.suffix) 115 + impl From<(&NsidRecordFeedKey, &NsidRecordFeedVal)> for RecordLocationKey { 116 + fn from((key, val): (&NsidRecordFeedKey, &NsidRecordFeedVal)) -> Self { 117 + Self::from_pair( 118 + val.did().clone(), 119 + DbConcat::from_pair(key.collection().clone(), val.rkey().clone()), 120 + ) 96 121 } 97 122 } 98 123 99 124 #[derive(Debug, PartialEq, Encode, Decode)] 100 - pub struct ByCollectionValueInfo { 101 - #[bincode(with_serde)] 102 - pub did: Did, 103 - #[bincode(with_serde)] 104 - pub rkey: RecordKey, 125 + pub struct RecordLocationMeta { 126 + cursor: u64, // ugh no bincode impl 127 + pub is_update: bool, 128 + pub rev: String, 105 129 } 106 - impl UseBincodePlz for ByCollectionValueInfo {} 107 - /// value format: contains did, rkey, record 108 - pub type ByCollectionValue = DbConcat<ByCollectionValueInfo, serde_json::Value>; 109 - impl ByCollectionValue { 110 - pub fn new(did: Did, rkey: RecordKey, record: serde_json::Value) -> Self { 111 - Self { 112 - prefix: ByCollectionValueInfo { did, rkey }, 113 - suffix: record, 114 - } 130 + impl RecordLocationMeta { 131 + pub fn cursor(&self) -> Cursor { 132 + Cursor::from_raw_u64(self.cursor) 115 133 } 116 134 } 117 - impl From<ByCollectionValue> for (Did, RecordKey, serde_json::Value) { 118 - fn from(v: ByCollectionValue) -> Self { 119 - (v.prefix.did, v.prefix.rkey, v.suffix) 135 + impl UseBincodePlz for RecordLocationMeta {} 136 + 137 + #[derive(Debug, Clone, PartialEq)] 138 + pub struct RecordRawValue(Vec<u8>); 139 + impl DbBytes for RecordRawValue { 140 + fn to_db_bytes(&self) -> Result<std::vec::Vec<u8>, EncodingError> { 141 + self.0.to_db_bytes() 142 + } 143 + fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 144 + let (v, n) = DbBytes::from_db_bytes(bytes)?; 145 + Ok((Self(v), n)) 146 + } 147 + } 148 + impl From<Box<serde_json::value::RawValue>> for RecordRawValue { 149 + fn from(v: Box<serde_json::value::RawValue>) -> Self { 150 + Self(v.get().into()) 151 + } 152 + } 153 + impl TryFrom<RecordRawValue> for Box<serde_json::value::RawValue> { 154 + type Error = EncodingError; 155 + fn try_from(rrv: RecordRawValue) -> Result<Self, Self::Error> { 156 + let s = String::from_utf8(rrv.0)?; 157 + let rv = serde_json::value::RawValue::from_string(s)?; 158 + Ok(rv) 159 + } 160 + } 161 + 162 + pub type RecordLocationVal = DbConcat<RecordLocationMeta, RecordRawValue>; 163 + impl From<(Cursor, &str, PutAction)> for RecordLocationVal { 164 + fn from((cursor, rev, put): (Cursor, &str, PutAction)) -> Self { 165 + let meta = RecordLocationMeta { 166 + cursor: cursor.to_raw_u64(), 167 + is_update: put.is_update, 168 + rev: rev.to_string(), 169 + }; 170 + Self::from_pair(meta, put.record.into()) 120 171 } 121 172 } 122 173 123 174 #[derive(Debug, PartialEq)] 124 - pub struct _ByIdStaticStr {} 125 - impl StaticStr for _ByIdStaticStr { 175 + pub struct _LiveRecordsStaticStr {} 176 + impl StaticStr for _LiveRecordsStaticStr { 126 177 fn static_str() -> &'static str { 127 - "by_id" 178 + "live_counts" 128 179 } 129 180 } 130 - type ByIdStaticPrefix = DbStaticStr<_ByIdStaticStr>; 131 - pub type ByIdDidPrefix = DbConcat<ByIdStaticPrefix, Did>; 132 - pub type ByIdCollectionPrefix = DbConcat<ByIdDidPrefix, Nsid>; 133 - pub type ByIdRecordPrefix = DbConcat<ByIdCollectionPrefix, RecordKey>; 134 - /// look up records by user or directly, instead of by collections 135 - /// 136 - /// required to support deletes; did first prefix for account deletes. 137 - /// key format: ["by_id"|did|collection|rkey|js_cursor] 138 - pub type ByIdKey = DbConcat<ByIdRecordPrefix, Cursor>; 139 - impl ByIdKey { 140 - pub fn new(did: Did, collection: Nsid, rkey: RecordKey, cursor: Cursor) -> Self { 141 - Self::from_pair(Self::record_prefix(did, collection, rkey), cursor) 181 + 182 + type LiveCountsStaticPrefix = DbStaticStr<_LiveRecordsStaticStr>; 183 + type LiveCountsCursorPrefix = DbConcat<LiveCountsStaticPrefix, Cursor>; 184 + pub type LiveCountsKey = DbConcat<LiveCountsCursorPrefix, Nsid>; 185 + impl LiveCountsKey { 186 + pub fn range_from_cursor(cursor: Cursor) -> Result<Range<Vec<u8>>, EncodingError> { 187 + let prefix = LiveCountsCursorPrefix::from_pair(Default::default(), cursor); 188 + prefix.range_to_prefix_end() 142 189 } 143 - pub fn record_prefix(did: Did, collection: Nsid, rkey: RecordKey) -> ByIdRecordPrefix { 144 - ByIdRecordPrefix { 145 - prefix: ByIdCollectionPrefix { 146 - prefix: Self::did_prefix(did), 147 - suffix: collection, 148 - }, 149 - suffix: rkey, 150 - } 190 + pub fn cursor(&self) -> Cursor { 191 + self.prefix.suffix 151 192 } 152 - pub fn did_prefix(did: Did) -> ByIdDidPrefix { 153 - ByIdDidPrefix::from_pair(Default::default(), did) 154 - } 155 - pub fn cursor(&self) -> Cursor { 156 - self.suffix.clone() 193 + pub fn collection(&self) -> &Nsid { 194 + &self.suffix 157 195 } 158 196 } 159 - impl From<ByIdKey> for (Did, Nsid, RecordKey, Cursor) { 160 - fn from(k: ByIdKey) -> Self { 161 - ( 162 - k.prefix.prefix.prefix.suffix, 163 - k.prefix.prefix.suffix, 164 - k.prefix.suffix, 165 - k.suffix, 197 + impl From<(Cursor, &Nsid)> for LiveCountsKey { 198 + fn from((cursor, collection): (Cursor, &Nsid)) -> Self { 199 + Self::from_pair( 200 + LiveCountsCursorPrefix::from_pair(Default::default(), cursor), 201 + collection.clone(), 166 202 ) 167 203 } 168 204 } 205 + #[derive(Debug, PartialEq, Decode, Encode)] 206 + pub struct TotalRecordsValue(pub u64); 207 + impl UseBincodePlz for TotalRecordsValue {} 169 208 170 - pub type ByIdValue = DbEmpty; 209 + #[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)] 210 + pub struct EstimatedDidsValue(pub CardinalityEstimator<Did>); 211 + impl SerdeBytes for EstimatedDidsValue {} 212 + impl DbBytes for EstimatedDidsValue { 213 + #[cfg(test)] 214 + fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> { 215 + SerdeBytes::to_bytes(self) 216 + } 217 + #[cfg(test)] 218 + fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 219 + SerdeBytes::from_bytes(bytes) 220 + } 171 221 172 - #[derive(Debug, PartialEq)] 173 - pub struct _ByCursorSeenStaticStr {} 174 - impl StaticStr for _ByCursorSeenStaticStr { 175 - fn static_str() -> &'static str { 176 - "seen_by_js_cursor" 222 + #[cfg(not(test))] 223 + fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> { 224 + Ok(vec![1, 2, 3]) // TODO: un-stub when their heap overflow is fixed 225 + } 226 + #[cfg(not(test))] 227 + fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 228 + if bytes.len() < 3 { 229 + return Err(EncodingError::DecodeNotEnoughBytes); 230 + } 231 + Ok((Self(CardinalityEstimator::new()), 3)) // TODO: un-stub when their heap overflow is fixed 177 232 } 178 233 } 179 - type ByCursorSeenPrefix = DbStaticStr<_ByCursorSeenStaticStr>; 180 - type ByCursorSeenCursorPrefix = DbConcat<ByCursorSeenPrefix, Cursor>; 181 - /// key format: ["seen_by_js_cursor"|js_cursor|collection] 182 - pub type ByCursorSeenKey = DbConcat<ByCursorSeenCursorPrefix, Nsid>; 183 - impl ByCursorSeenKey { 184 - pub fn new(cursor: Cursor, nsid: Nsid) -> Self { 234 + 235 + pub type CountsValue = DbConcat<TotalRecordsValue, EstimatedDidsValue>; 236 + impl CountsValue { 237 + pub fn new(total: u64, dids: CardinalityEstimator<Did>) -> Self { 185 238 Self { 186 - prefix: DbConcat::from_pair(Default::default(), cursor), 187 - suffix: nsid, 239 + prefix: TotalRecordsValue(total), 240 + suffix: EstimatedDidsValue(dids), 188 241 } 189 242 } 190 - pub fn full_range() -> Result<Range<Vec<u8>>, EncodingError> { 191 - let prefix = ByCursorSeenCursorPrefix::from_pair(Default::default(), Cursor::from_start()); 192 - prefix.range() 243 + pub fn records(&self) -> u64 { 244 + self.prefix.0 193 245 } 194 - pub fn range_from(&self) -> Result<Range<Vec<u8>>, EncodingError> { 195 - let start = self.to_db_bytes()?; 196 - let end = self.prefix.range_end()?; 197 - Ok(start..end) 246 + pub fn dids(&self) -> &CardinalityEstimator<Did> { 247 + &self.suffix.0 198 248 } 199 - pub fn collection(&self) -> &Nsid { 200 - &self.suffix 249 + pub fn merge(&mut self, other: &Self) { 250 + self.prefix.0 += other.records(); 251 + self.suffix.0.merge(other.dids()); 201 252 } 202 253 } 203 - impl From<RollupCursorValue> for ByCursorSeenKey { 204 - fn from(v: RollupCursorValue) -> Self { 205 - Self::new(v.prefix, v.suffix) 206 - } 207 - } 208 - impl From<ByCursorSeenKey> for (Cursor, Nsid) { 209 - fn from(k: ByCursorSeenKey) -> Self { 210 - (k.prefix.suffix, k.suffix) 254 + impl Default for CountsValue { 255 + fn default() -> Self { 256 + Self { 257 + prefix: TotalRecordsValue(0), 258 + suffix: EstimatedDidsValue(CardinalityEstimator::new()), 259 + } 211 260 } 212 261 } 213 - 214 - pub type ByCursorSeenValue = SeenCounter; 215 262 216 263 #[derive(Debug, PartialEq)] 217 - pub struct _ModQueueItemStaticStr {} 218 - impl StaticStr for _ModQueueItemStaticStr { 264 + pub struct _DeleteAccountStaticStr {} 265 + impl StaticStr for _DeleteAccountStaticStr { 219 266 fn static_str() -> &'static str { 220 - "mod_queue" 267 + "delete_acount" 221 268 } 222 269 } 223 - pub type ModQueueItemPrefix = DbStaticStr<_ModQueueItemStaticStr>; 224 - /// key format: ["mod_queue"|js_cursor] 225 - pub type ModQueueItemKey = DbConcat<ModQueueItemPrefix, Cursor>; 226 - impl ModQueueItemKey { 270 + pub type DeleteAccountStaticPrefix = DbStaticStr<_DeleteAccountStaticStr>; 271 + pub type DeleteAccountQueueKey = DbConcat<DeleteAccountStaticPrefix, Cursor>; 272 + impl DeleteAccountQueueKey { 227 273 pub fn new(cursor: Cursor) -> Self { 228 274 Self::from_pair(Default::default(), cursor) 229 275 } 230 276 } 231 - // todo: remove this? all we need is the ModCursorValue version? 232 - impl From<ModQueueItemKey> for Cursor { 233 - fn from(k: ModQueueItemKey) -> Self { 234 - k.suffix 277 + pub type DeleteAccountQueueVal = Did; 278 + 279 + #[derive(Debug, PartialEq)] 280 + pub struct _HourlyRollupStaticStr {} 281 + impl StaticStr for _HourlyRollupStaticStr { 282 + fn static_str() -> &'static str { 283 + "hourly_counts" 235 284 } 236 285 } 237 - impl From<&ModQueueItemKey> for ModCursorValue { 238 - fn from(k: &ModQueueItemKey) -> Self { 239 - k.suffix.clone() 286 + pub type HourlyRollupStaticPrefix = DbStaticStr<_HourlyRollupStaticStr>; 287 + pub type HourlyRollupKey = DbConcat<DbConcat<HourlyRollupStaticPrefix, HourTruncatedCursor>, Nsid>; 288 + impl HourlyRollupKey { 289 + pub fn new(hourly_cursor: HourTruncatedCursor, nsid: &Nsid) -> Self { 290 + Self::from_pair( 291 + DbConcat::from_pair(Default::default(), hourly_cursor), 292 + nsid.clone(), 293 + ) 240 294 } 241 295 } 296 + pub type HourlyRollupVal = CountsValue; 242 297 243 - #[derive(Debug, Encode, Decode)] 244 - pub enum ModQueueItemStringValue { 245 - DeleteAccount(String), // did 246 - DeleteRecord(String, String, String), // did, collection, rkey 247 - UpdateRecord(String, String, String, String), // did, collection, rkey, json record 298 + #[derive(Debug, PartialEq)] 299 + pub struct _WeeklyRollupStaticStr {} 300 + impl StaticStr for _WeeklyRollupStaticStr { 301 + fn static_str() -> &'static str { 302 + "weekly_counts" 303 + } 248 304 } 249 - impl UseBincodePlz for ModQueueItemStringValue {} 250 - #[derive(Debug, Clone, PartialEq)] 251 - pub enum ModQueueItemValue { 252 - DeleteAccount(Did), 253 - DeleteRecord(Did, Nsid, RecordKey), 254 - UpdateRecord(Did, Nsid, RecordKey, serde_json::Value), 305 + pub type WeeklyRollupStaticPrefix = DbStaticStr<_WeeklyRollupStaticStr>; 306 + pub type WeeklyRollupKey = DbConcat<DbConcat<WeeklyRollupStaticPrefix, WeekTruncatedCursor>, Nsid>; 307 + impl WeeklyRollupKey { 308 + pub fn new(weekly_cursor: WeekTruncatedCursor, nsid: &Nsid) -> Self { 309 + Self::from_pair( 310 + DbConcat::from_pair(Default::default(), weekly_cursor), 311 + nsid.clone(), 312 + ) 313 + } 255 314 } 256 - impl From<ModQueueItemValue> for ModQueueItemStringValue { 257 - fn from(v: ModQueueItemValue) -> Self { 258 - match v { 259 - ModQueueItemValue::DeleteAccount(did) => { 260 - ModQueueItemStringValue::DeleteAccount(did.to_string()) 261 - } 262 - ModQueueItemValue::DeleteRecord(did, collection, rkey) => { 263 - ModQueueItemStringValue::DeleteRecord( 264 - did.to_string(), 265 - collection.to_string(), 266 - rkey.to_string(), 267 - ) 268 - } 269 - ModQueueItemValue::UpdateRecord(did, collection, rkey, record) => { 270 - ModQueueItemStringValue::UpdateRecord( 271 - did.to_string(), 272 - collection.to_string(), 273 - rkey.to_string(), 274 - record.to_string(), 275 - ) 276 - } 277 - } 315 + pub type WeeklyRollupVal = CountsValue; 316 + 317 + #[derive(Debug, PartialEq)] 318 + pub struct _AllTimeRollupStaticStr {} 319 + impl StaticStr for _AllTimeRollupStaticStr { 320 + fn static_str() -> &'static str { 321 + "ever_counts" 278 322 } 279 323 } 280 - impl TryFrom<ModQueueItemStringValue> for ModQueueItemValue { 281 - type Error = EncodingError; 282 - fn try_from(v: ModQueueItemStringValue) -> Result<Self, Self::Error> { 283 - match v { 284 - ModQueueItemStringValue::DeleteAccount(did) => Ok(ModQueueItemValue::DeleteAccount( 285 - Did::new(did).map_err(EncodingError::BadAtriumStringType)?, 286 - )), 287 - ModQueueItemStringValue::DeleteRecord(did, collection, rkey) => { 288 - Ok(ModQueueItemValue::DeleteRecord( 289 - Did::new(did).map_err(EncodingError::BadAtriumStringType)?, 290 - Nsid::new(collection).map_err(EncodingError::BadAtriumStringType)?, 291 - RecordKey::new(rkey).map_err(EncodingError::BadAtriumStringType)?, 292 - )) 293 - } 294 - ModQueueItemStringValue::UpdateRecord(did, collection, rkey, record) => { 295 - Ok(ModQueueItemValue::UpdateRecord( 296 - Did::new(did).map_err(EncodingError::BadAtriumStringType)?, 297 - Nsid::new(collection).map_err(EncodingError::BadAtriumStringType)?, 298 - RecordKey::new(rkey).map_err(EncodingError::BadAtriumStringType)?, 299 - record.parse()?, 300 - )) 301 - } 324 + pub type AllTimeRollupStaticPrefix = DbStaticStr<_AllTimeRollupStaticStr>; 325 + pub type AllTimeRollupKey = DbConcat<AllTimeRollupStaticPrefix, Nsid>; 326 + impl AllTimeRollupKey { 327 + pub fn new(nsid: &Nsid) -> Self { 328 + Self::from_pair(Default::default(), nsid.clone()) 329 + } 330 + pub fn collection(&self) -> &Nsid { 331 + &self.suffix 332 + } 333 + } 334 + pub type AllTimeRollupVal = CountsValue; 335 + 336 + #[derive(Debug, Copy, Clone, PartialEq, Hash, PartialOrd, Eq)] 337 + pub struct TruncatedCursor<const MOD: u64>(u64); 338 + impl<const MOD: u64> TruncatedCursor<MOD> { 339 + fn truncate(raw: u64) -> u64 { 340 + (raw / MOD) * MOD 341 + } 342 + pub fn try_from_raw_u64(time_us: u64) -> Result<Self, EncodingError> { 343 + let rem = time_us % MOD; 344 + if rem != 0 { 345 + return Err(EncodingError::InvalidTruncated(MOD, rem)); 302 346 } 347 + Ok(Self(time_us)) 348 + } 349 + pub fn try_from_cursor(cursor: Cursor) -> Result<Self, EncodingError> { 350 + Self::try_from_raw_u64(cursor.to_raw_u64()) 351 + } 352 + pub fn truncate_cursor(cursor: Cursor) -> Self { 353 + let raw = cursor.to_raw_u64(); 354 + let truncated = Self::truncate(raw); 355 + Self(truncated) 303 356 } 304 357 } 305 - impl DbBytes for ModQueueItemValue { 306 - fn to_db_bytes(&self) -> Result<std::vec::Vec<u8>, EncodingError> { 307 - Into::<ModQueueItemStringValue>::into(self.clone()).to_db_bytes() 358 + impl<const MOD: u64> From<TruncatedCursor<MOD>> for Cursor { 359 + fn from(truncated: TruncatedCursor<MOD>) -> Self { 360 + Cursor::from_raw_u64(truncated.0) 361 + } 362 + } 363 + impl<const MOD: u64> From<Cursor> for TruncatedCursor<MOD> { 364 + fn from(cursor: Cursor) -> Self { 365 + Self::truncate_cursor(cursor) 366 + } 367 + } 368 + impl<const MOD: u64> DbBytes for TruncatedCursor<MOD> { 369 + fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> { 370 + let as_cursor: Cursor = (*self).into(); 371 + as_cursor.to_db_bytes() 308 372 } 309 373 fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> { 310 - let (stringy, n) = ModQueueItemStringValue::from_db_bytes(bytes)?; 311 - let me = TryInto::<ModQueueItemValue>::try_into(stringy)?; 374 + let (cursor, n) = Cursor::from_db_bytes(bytes)?; 375 + let me = Self::try_from_cursor(cursor)?; 312 376 Ok((me, n)) 313 377 } 314 378 } 315 379 380 + const HOUR_IN_MICROS: u64 = 1_000_000 * 3600; 381 + pub type HourTruncatedCursor = TruncatedCursor<HOUR_IN_MICROS>; 382 + 383 + const WEEK_IN_MICROS: u64 = HOUR_IN_MICROS * 24 * 7; 384 + pub type WeekTruncatedCursor = TruncatedCursor<WEEK_IN_MICROS>; 385 + 316 386 #[cfg(test)] 317 387 mod test { 318 - use super::{ByCollectionKey, ByCollectionValue, Cursor, Did, EncodingError, Nsid, RecordKey}; 388 + use super::{ 389 + CardinalityEstimator, CountsValue, Cursor, Did, EncodingError, HourTruncatedCursor, 390 + HourlyRollupKey, Nsid, HOUR_IN_MICROS, 391 + }; 319 392 use crate::db_types::DbBytes; 320 393 321 394 #[test] 322 - fn test_by_collection_key() -> Result<(), EncodingError> { 395 + fn test_by_hourly_rollup_key() -> Result<(), EncodingError> { 323 396 let nsid = Nsid::new("ab.cd.efg".to_string()).unwrap(); 324 - let original = ByCollectionKey::new(nsid.clone(), Cursor::from_raw_u64(456)); 397 + let original = HourlyRollupKey::new(Cursor::from_raw_u64(4567890).into(), &nsid); 325 398 let serialized = original.to_db_bytes()?; 326 - let (restored, bytes_consumed) = ByCollectionKey::from_db_bytes(&serialized)?; 399 + let (restored, bytes_consumed) = HourlyRollupKey::from_db_bytes(&serialized)?; 327 400 assert_eq!(restored, original); 328 401 assert_eq!(bytes_consumed, serialized.len()); 329 402 330 403 let serialized_prefix = original.to_prefix_db_bytes()?; 331 - assert!(serialized.starts_with(&serialized_prefix)); 332 - let just_prefix = ByCollectionKey::prefix_from_collection(nsid)?; 333 - assert_eq!(just_prefix, serialized_prefix); 334 - assert!(just_prefix.starts_with("by_collection".as_bytes())); 404 + assert!(serialized_prefix.starts_with("hourly_counts".as_bytes())); 405 + assert!(serialized_prefix.starts_with(&serialized_prefix)); 335 406 336 407 Ok(()) 337 408 } 338 409 339 410 #[test] 340 - fn test_by_collection_value() -> Result<(), EncodingError> { 341 - let did = Did::new("did:plc:inze6wrmsm7pjl7yta3oig77".to_string()).unwrap(); 342 - let rkey = RecordKey::new("asdfasdf".to_string()).unwrap(); 343 - let record = serde_json::Value::String("hellooooo".into()); 411 + fn test_by_hourly_rollup_value() -> Result<(), EncodingError> { 412 + let mut estimator = CardinalityEstimator::new(); 413 + for i in 0..10 { 414 + estimator.insert(&Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig7{i}")).unwrap()); 415 + } 416 + let original = CountsValue::new(123, estimator.clone()); 417 + let serialized = original.to_db_bytes()?; 418 + let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?; 419 + assert_eq!(restored, original); 420 + assert_eq!(bytes_consumed, serialized.len()); 344 421 345 - let original = ByCollectionValue::new(did, rkey, record); 422 + for i in 10..1_000 { 423 + estimator.insert(&Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig{i}")).unwrap()); 424 + } 425 + let original = CountsValue::new(123, estimator); 346 426 let serialized = original.to_db_bytes()?; 347 - let (restored, bytes_consumed) = ByCollectionValue::from_db_bytes(&serialized)?; 427 + let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?; 348 428 assert_eq!(restored, original); 349 429 assert_eq!(bytes_consumed, serialized.len()); 350 430 351 431 Ok(()) 432 + } 433 + 434 + #[test] 435 + fn test_hour_truncated_cursor() { 436 + let us = Cursor::from_raw_u64(1_743_778_483_483_895); 437 + let hr = HourTruncatedCursor::truncate_cursor(us); 438 + let back: Cursor = hr.into(); 439 + assert!(back < us); 440 + let diff = us.to_raw_u64() - back.to_raw_u64(); 441 + assert!(diff < HOUR_IN_MICROS); 442 + } 443 + 444 + #[test] 445 + fn test_hour_truncated_cursor_already_truncated() { 446 + let us = Cursor::from_raw_u64(1_743_775_200_000_000); 447 + let hr = HourTruncatedCursor::truncate_cursor(us); 448 + let back: Cursor = hr.into(); 449 + assert_eq!(back, us); 450 + let diff = us.to_raw_u64() - back.to_raw_u64(); 451 + assert_eq!(diff, 0); 352 452 } 353 453 }