Coffee journaling on ATProto (alpha) alpha.arabica.social
coffee

refactor: swap feed over to sqlite (#18)

* refactor: move suggestions to their own package

* feat: initial fuzzy matching on name for suggestions

* refactor: sqlite feed

authored by

Patrick Dewey and committed by
GitHub
452b9197 48cf573a

+1836 -2889
+15 -10
cmd/server/main.go
··· 16 16 17 17 "arabica/internal/atproto" 18 18 "arabica/internal/database/boltstore" 19 + "arabica/internal/database/sqlitestore" 19 20 "arabica/internal/email" 20 21 "arabica/internal/feed" 21 22 "arabica/internal/firehose" ··· 103 104 104 105 // Get specialized stores 105 106 sessionStore := store.SessionStore() 106 - feedStore := store.FeedStore() 107 107 108 108 // Initialize OAuth manager with persistent session store 109 109 // For local development, localhost URLs trigger special localhost mode in indigo ··· 130 130 log.Fatal().Err(err).Msg("Failed to initialize OAuth") 131 131 } 132 132 133 - // Initialize feed registry with persistent store 134 - // This loads existing registered DIDs from the database 135 - feedRegistry := feed.NewPersistentRegistry(feedStore) 133 + // Initialize feed registry (in-memory; populated from SQLite after feedIndex opens) 134 + feedRegistry := feed.NewRegistry() 136 135 feedService := feed.NewService(feedRegistry) 137 136 138 - log.Info(). 139 - Int("registered_users", feedRegistry.Count()). 140 - Msg("Feed service initialized with persistent registry") 141 - 142 137 // Setup context for graceful shutdown 143 138 ctx, cancel := context.WithCancel(context.Background()) 144 139 defer cancel() ··· 159 154 } 160 155 dataDir = filepath.Join(home, ".local", "share") 161 156 } 162 - feedIndexPath = filepath.Join(dataDir, "arabica", "feed-index.db") 157 + feedIndexPath = filepath.Join(dataDir, "arabica", "feed-index.sqlite") 163 158 } 164 159 165 160 // Create firehose config ··· 181 176 182 177 log.Info().Str("path", feedIndexPath).Msg("Feed index opened") 183 178 179 + // Populate feed registry from SQLite known_dids (replaces BoltDB feed registry) 180 + if knownDIDs, err := feedIndex.GetKnownDIDs(); err == nil { 181 + for _, did := range knownDIDs { 182 + feedRegistry.Register(did) 183 + } 184 + log.Info().Int("registered_users", feedRegistry.Count()).Msg("Feed registry populated from index") 185 + } else { 186 + log.Warn().Err(err).Msg("Failed to load known DIDs into feed registry") 187 + } 188 + 184 189 // Create and start consumer 185 190 firehoseConsumer := firehose.NewConsumer(firehoseConfig, feedIndex) 186 191 firehoseConsumer.Start(ctx) ··· 190 195 feedService.SetFirehoseIndex(adapter) 191 196 192 197 // Wire up moderation filtering for the feed 193 - moderationStore := store.ModerationStore() 198 + moderationStore := sqlitestore.NewModerationStore(feedIndex.DB()) 194 199 feedService.SetModerationFilter(moderationStore) 195 200 196 201 log.Info().Msg("Firehose consumer started")
+10 -2
go.mod
··· 14 14 github.com/stretchr/testify v1.11.1 15 15 go.etcd.io/bbolt v1.3.8 16 16 golang.org/x/sync v0.19.0 17 + modernc.org/sqlite v1.46.1 17 18 ) 18 19 19 20 require ( 20 21 github.com/beorn7/perks v1.0.1 // indirect 21 22 github.com/cespare/xxhash/v2 v2.3.0 // indirect 22 23 github.com/davecgh/go-spew v1.1.1 // indirect 24 + github.com/dustin/go-humanize v1.0.1 // indirect 23 25 github.com/earthboundkid/versioninfo/v2 v2.24.1 // indirect 24 26 github.com/felixge/httpsnoop v1.0.4 // indirect 25 27 github.com/go-logr/logr v1.4.1 // indirect 26 28 github.com/go-logr/stdr v1.2.2 // indirect 27 29 github.com/gogo/protobuf v1.3.2 // indirect 28 30 github.com/golang-jwt/jwt/v5 v5.2.2 // indirect 29 - github.com/google/uuid v1.4.0 // indirect 31 + github.com/google/uuid v1.6.0 // indirect 30 32 github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 31 33 github.com/hashicorp/go-retryablehttp v0.7.5 // indirect 32 34 github.com/hashicorp/golang-lru v1.0.2 // indirect ··· 55 57 github.com/multiformats/go-multihash v0.2.3 // indirect 56 58 github.com/multiformats/go-varint v0.0.7 // indirect 57 59 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 60 + github.com/ncruces/go-strftime v1.0.0 // indirect 58 61 github.com/opentracing/opentracing-go v1.2.0 // indirect 59 62 github.com/pmezard/go-difflib v1.0.0 // indirect 60 63 github.com/polydawn/refmt v0.89.1-0.20221221234430-40501e09de1f // indirect 61 64 github.com/prometheus/common v0.66.1 // indirect 62 65 github.com/prometheus/procfs v0.16.1 // indirect 66 + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 63 67 github.com/spaolacci/murmur3 v1.1.0 // indirect 64 68 github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e // indirect 65 69 gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b // indirect ··· 73 77 go.uber.org/zap v1.26.0 // indirect 74 78 go.yaml.in/yaml/v2 v2.4.2 // indirect 75 79 golang.org/x/crypto v0.40.0 // indirect 76 - golang.org/x/sys v0.36.0 // indirect 80 + golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect 81 + golang.org/x/sys v0.37.0 // indirect 77 82 golang.org/x/time v0.3.0 // indirect 78 83 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect 79 84 google.golang.org/protobuf v1.36.8 // indirect 80 85 gopkg.in/yaml.v3 v3.0.1 // indirect 81 86 lukechampine.com/blake3 v1.2.1 // indirect 87 + modernc.org/libc v1.67.6 // indirect 88 + modernc.org/mathutil v1.7.1 // indirect 89 + modernc.org/memory v1.11.0 // indirect 82 90 )
+46 -4
go.sum
··· 13 13 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 14 14 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 15 15 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 16 + github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 17 + github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 16 18 github.com/earthboundkid/versioninfo/v2 v2.24.1 h1:SJTMHaoUx3GzjjnUO1QzP3ZXK6Ee/nbWyCm58eY3oUg= 17 19 github.com/earthboundkid/versioninfo/v2 v2.24.1/go.mod h1:VcWEooDEuyUJnMfbdTh0uFN4cfEIg+kHMuWB2CDCLjw= 18 20 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= ··· 33 35 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 34 36 github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= 35 37 github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= 38 + github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= 39 + github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= 36 40 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= 37 - github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= 38 - github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 41 + github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 42 + github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 39 43 github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= 40 44 github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= 41 45 github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= ··· 120 124 github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU= 121 125 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 122 126 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 127 + github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= 128 + github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= 123 129 github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= 124 130 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= 125 131 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= ··· 136 142 github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= 137 143 github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= 138 144 github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= 145 + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= 146 + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= 139 147 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 140 148 github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 141 149 github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= ··· 203 211 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 204 212 golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= 205 213 golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= 214 + golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= 215 + golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= 206 216 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 207 217 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= 208 218 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 209 219 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 210 220 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 221 + golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= 222 + golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= 211 223 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 212 224 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 213 225 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= ··· 231 243 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 232 244 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 233 245 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 234 - golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= 235 - golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= 246 + golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= 247 + golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= 236 248 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 237 249 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 238 250 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= ··· 248 260 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 249 261 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 250 262 golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= 263 + golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= 264 + golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= 251 265 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 252 266 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 253 267 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= ··· 270 284 honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= 271 285 lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= 272 286 lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= 287 + modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= 288 + modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0= 289 + modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc= 290 + modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM= 291 + modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA= 292 + modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc= 293 + modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= 294 + modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= 295 + modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE= 296 + modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= 297 + modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= 298 + modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= 299 + modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI= 300 + modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE= 301 + modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= 302 + modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= 303 + modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= 304 + modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= 305 + modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= 306 + modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= 307 + modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= 308 + modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= 309 + modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= 310 + modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= 311 + modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= 312 + modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= 313 + modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= 314 + modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
-157
internal/database/boltstore/feed_store.go
··· 1 - package boltstore 2 - 3 - import ( 4 - "encoding/json" 5 - "time" 6 - 7 - bolt "go.etcd.io/bbolt" 8 - ) 9 - 10 - // FeedUser represents a registered user in the feed registry. 11 - type FeedUser struct { 12 - DID string `json:"did"` 13 - RegisteredAt time.Time `json:"registered_at"` 14 - } 15 - 16 - // FeedStore provides persistent storage for the feed registry. 17 - // It stores DIDs of users who have logged in and should appear in the community feed. 18 - type FeedStore struct { 19 - db *bolt.DB 20 - } 21 - 22 - // Register adds a DID to the feed registry. 23 - // If the DID already exists, this is a no-op. 24 - func (s *FeedStore) Register(did string) error { 25 - return s.db.Update(func(tx *bolt.Tx) error { 26 - bucket := tx.Bucket(BucketFeedRegistry) 27 - if bucket == nil { 28 - return nil 29 - } 30 - 31 - // Check if already registered 32 - existing := bucket.Get([]byte(did)) 33 - if existing != nil { 34 - // Already registered, no-op 35 - return nil 36 - } 37 - 38 - // Create new registration 39 - user := FeedUser{ 40 - DID: did, 41 - RegisteredAt: time.Now(), 42 - } 43 - 44 - data, err := json.Marshal(user) 45 - if err != nil { 46 - return err 47 - } 48 - 49 - return bucket.Put([]byte(did), data) 50 - }) 51 - } 52 - 53 - // Unregister removes a DID from the feed registry. 54 - func (s *FeedStore) Unregister(did string) error { 55 - return s.db.Update(func(tx *bolt.Tx) error { 56 - bucket := tx.Bucket(BucketFeedRegistry) 57 - if bucket == nil { 58 - return nil 59 - } 60 - 61 - return bucket.Delete([]byte(did)) 62 - }) 63 - } 64 - 65 - // IsRegistered checks if a DID is in the feed registry. 66 - func (s *FeedStore) IsRegistered(did string) bool { 67 - var registered bool 68 - 69 - s.db.View(func(tx *bolt.Tx) error { 70 - bucket := tx.Bucket(BucketFeedRegistry) 71 - if bucket == nil { 72 - return nil 73 - } 74 - 75 - registered = bucket.Get([]byte(did)) != nil 76 - return nil 77 - }) 78 - 79 - return registered 80 - } 81 - 82 - // List returns all registered DIDs. 83 - func (s *FeedStore) List() []string { 84 - var dids []string 85 - 86 - s.db.View(func(tx *bolt.Tx) error { 87 - bucket := tx.Bucket(BucketFeedRegistry) 88 - if bucket == nil { 89 - return nil 90 - } 91 - 92 - return bucket.ForEach(func(k, v []byte) error { 93 - dids = append(dids, string(k)) 94 - return nil 95 - }) 96 - }) 97 - 98 - return dids 99 - } 100 - 101 - // ListWithMetadata returns all registered users with their metadata. 102 - func (s *FeedStore) ListWithMetadata() []FeedUser { 103 - var users []FeedUser 104 - 105 - s.db.View(func(tx *bolt.Tx) error { 106 - bucket := tx.Bucket(BucketFeedRegistry) 107 - if bucket == nil { 108 - return nil 109 - } 110 - 111 - return bucket.ForEach(func(k, v []byte) error { 112 - var user FeedUser 113 - if err := json.Unmarshal(v, &user); err != nil { 114 - // Fallback for simple keys without metadata 115 - user = FeedUser{DID: string(k)} 116 - } 117 - users = append(users, user) 118 - return nil 119 - }) 120 - }) 121 - 122 - return users 123 - } 124 - 125 - // Count returns the number of registered users. 126 - func (s *FeedStore) Count() int { 127 - var count int 128 - 129 - s.db.View(func(tx *bolt.Tx) error { 130 - bucket := tx.Bucket(BucketFeedRegistry) 131 - if bucket == nil { 132 - return nil 133 - } 134 - 135 - count = bucket.Stats().KeyN 136 - return nil 137 - }) 138 - 139 - return count 140 - } 141 - 142 - // Clear removes all entries from the feed registry. 143 - // Use with caution - primarily for testing. 144 - func (s *FeedStore) Clear() error { 145 - return s.db.Update(func(tx *bolt.Tx) error { 146 - // Delete and recreate the bucket 147 - if err := tx.DeleteBucket(BucketFeedRegistry); err != nil { 148 - // Bucket might not exist, that's ok 149 - if err != bolt.ErrBucketNotFound { 150 - return err 151 - } 152 - } 153 - 154 - _, err := tx.CreateBucket(BucketFeedRegistry) 155 - return err 156 - }) 157 - }
-139
internal/database/boltstore/feed_store_test.go
··· 1 - package boltstore 2 - 3 - import ( 4 - "path/filepath" 5 - "testing" 6 - 7 - "github.com/stretchr/testify/assert" 8 - "github.com/stretchr/testify/require" 9 - ) 10 - 11 - func setupTestFeedStore(t *testing.T) *FeedStore { 12 - tmpDir := t.TempDir() 13 - dbPath := filepath.Join(tmpDir, "test.db") 14 - 15 - store, err := Open(Options{Path: dbPath}) 16 - require.NoError(t, err) 17 - 18 - t.Cleanup(func() { 19 - store.Close() 20 - }) 21 - 22 - return store.FeedStore() 23 - } 24 - 25 - func TestFeedStore_Register(t *testing.T) { 26 - store := setupTestFeedStore(t) 27 - 28 - t.Run("register new DID", func(t *testing.T) { 29 - err := store.Register("did:plc:user1") 30 - require.NoError(t, err) 31 - assert.True(t, store.IsRegistered("did:plc:user1")) 32 - }) 33 - 34 - t.Run("register is idempotent", func(t *testing.T) { 35 - err := store.Register("did:plc:user2") 36 - require.NoError(t, err) 37 - 38 - err = store.Register("did:plc:user2") 39 - require.NoError(t, err) 40 - 41 - assert.Equal(t, 1, countDID(store, "did:plc:user2")) 42 - }) 43 - } 44 - 45 - // countDID counts how many times a DID appears in the list (should be 0 or 1). 46 - func countDID(store *FeedStore, did string) int { 47 - count := 0 48 - for _, d := range store.List() { 49 - if d == did { 50 - count++ 51 - } 52 - } 53 - return count 54 - } 55 - 56 - func TestFeedStore_Unregister(t *testing.T) { 57 - store := setupTestFeedStore(t) 58 - 59 - err := store.Register("did:plc:unreg") 60 - require.NoError(t, err) 61 - assert.True(t, store.IsRegistered("did:plc:unreg")) 62 - 63 - err = store.Unregister("did:plc:unreg") 64 - require.NoError(t, err) 65 - assert.False(t, store.IsRegistered("did:plc:unreg")) 66 - } 67 - 68 - func TestFeedStore_IsRegistered(t *testing.T) { 69 - store := setupTestFeedStore(t) 70 - 71 - assert.False(t, store.IsRegistered("did:plc:nobody")) 72 - 73 - store.Register("did:plc:somebody") 74 - assert.True(t, store.IsRegistered("did:plc:somebody")) 75 - } 76 - 77 - func TestFeedStore_List(t *testing.T) { 78 - store := setupTestFeedStore(t) 79 - 80 - t.Run("empty store", func(t *testing.T) { 81 - dids := store.List() 82 - assert.Empty(t, dids) 83 - }) 84 - 85 - t.Run("multiple registrations", func(t *testing.T) { 86 - store.Register("did:plc:a") 87 - store.Register("did:plc:b") 88 - store.Register("did:plc:c") 89 - 90 - dids := store.List() 91 - assert.Len(t, dids, 3) 92 - assert.Contains(t, dids, "did:plc:a") 93 - assert.Contains(t, dids, "did:plc:b") 94 - assert.Contains(t, dids, "did:plc:c") 95 - }) 96 - } 97 - 98 - func TestFeedStore_ListWithMetadata(t *testing.T) { 99 - store := setupTestFeedStore(t) 100 - 101 - store.Register("did:plc:meta1") 102 - store.Register("did:plc:meta2") 103 - 104 - users := store.ListWithMetadata() 105 - assert.Len(t, users, 2) 106 - 107 - for _, u := range users { 108 - assert.NotEmpty(t, u.DID) 109 - assert.False(t, u.RegisteredAt.IsZero()) 110 - } 111 - } 112 - 113 - func TestFeedStore_Count(t *testing.T) { 114 - store := setupTestFeedStore(t) 115 - 116 - assert.Equal(t, 0, store.Count()) 117 - 118 - store.Register("did:plc:c1") 119 - assert.Equal(t, 1, store.Count()) 120 - 121 - store.Register("did:plc:c2") 122 - assert.Equal(t, 2, store.Count()) 123 - 124 - store.Unregister("did:plc:c1") 125 - assert.Equal(t, 1, store.Count()) 126 - } 127 - 128 - func TestFeedStore_Clear(t *testing.T) { 129 - store := setupTestFeedStore(t) 130 - 131 - store.Register("did:plc:clear1") 132 - store.Register("did:plc:clear2") 133 - assert.Equal(t, 2, store.Count()) 134 - 135 - err := store.Clear() 136 - require.NoError(t, err) 137 - assert.Equal(t, 0, store.Count()) 138 - assert.False(t, store.IsRegistered("did:plc:clear1")) 139 - }
-591
internal/database/boltstore/moderation_store.go
··· 1 - package boltstore 2 - 3 - import ( 4 - "context" 5 - "encoding/json" 6 - "fmt" 7 - "time" 8 - 9 - "arabica/internal/moderation" 10 - 11 - bolt "go.etcd.io/bbolt" 12 - ) 13 - 14 - // ModerationStore provides persistent storage for moderation data. 15 - type ModerationStore struct { 16 - db *bolt.DB 17 - } 18 - 19 - // HideRecord stores a hidden record entry. 20 - func (s *ModerationStore) HideRecord(ctx context.Context, entry moderation.HiddenRecord) error { 21 - return s.db.Update(func(tx *bolt.Tx) error { 22 - bucket := tx.Bucket(BucketModerationHiddenRecords) 23 - if bucket == nil { 24 - return fmt.Errorf("bucket not found: %s", BucketModerationHiddenRecords) 25 - } 26 - 27 - data, err := json.Marshal(entry) 28 - if err != nil { 29 - return fmt.Errorf("failed to marshal hidden record: %w", err) 30 - } 31 - 32 - return bucket.Put([]byte(entry.ATURI), data) 33 - }) 34 - } 35 - 36 - // UnhideRecord removes a record from the hidden list. 37 - func (s *ModerationStore) UnhideRecord(ctx context.Context, atURI string) error { 38 - return s.db.Update(func(tx *bolt.Tx) error { 39 - bucket := tx.Bucket(BucketModerationHiddenRecords) 40 - if bucket == nil { 41 - return nil 42 - } 43 - 44 - return bucket.Delete([]byte(atURI)) 45 - }) 46 - } 47 - 48 - // IsRecordHidden checks if a record is hidden. 49 - func (s *ModerationStore) IsRecordHidden(ctx context.Context, atURI string) bool { 50 - var hidden bool 51 - 52 - s.db.View(func(tx *bolt.Tx) error { 53 - bucket := tx.Bucket(BucketModerationHiddenRecords) 54 - if bucket == nil { 55 - return nil 56 - } 57 - 58 - hidden = bucket.Get([]byte(atURI)) != nil 59 - return nil 60 - }) 61 - 62 - return hidden 63 - } 64 - 65 - // GetHiddenRecord retrieves a hidden record entry by AT-URI. 66 - func (s *ModerationStore) GetHiddenRecord(ctx context.Context, atURI string) (*moderation.HiddenRecord, error) { 67 - var record *moderation.HiddenRecord 68 - 69 - err := s.db.View(func(tx *bolt.Tx) error { 70 - bucket := tx.Bucket(BucketModerationHiddenRecords) 71 - if bucket == nil { 72 - return nil 73 - } 74 - 75 - data := bucket.Get([]byte(atURI)) 76 - if data == nil { 77 - return nil 78 - } 79 - 80 - record = &moderation.HiddenRecord{} 81 - return json.Unmarshal(data, record) 82 - }) 83 - 84 - return record, err 85 - } 86 - 87 - // ListHiddenRecords returns all hidden records. 88 - func (s *ModerationStore) ListHiddenRecords(ctx context.Context) ([]moderation.HiddenRecord, error) { 89 - var records []moderation.HiddenRecord 90 - 91 - err := s.db.View(func(tx *bolt.Tx) error { 92 - bucket := tx.Bucket(BucketModerationHiddenRecords) 93 - if bucket == nil { 94 - return nil 95 - } 96 - 97 - return bucket.ForEach(func(k, v []byte) error { 98 - var record moderation.HiddenRecord 99 - if err := json.Unmarshal(v, &record); err != nil { 100 - return err 101 - } 102 - records = append(records, record) 103 - return nil 104 - }) 105 - }) 106 - 107 - return records, err 108 - } 109 - 110 - // BlacklistUser adds a user to the blacklist. 111 - func (s *ModerationStore) BlacklistUser(ctx context.Context, entry moderation.BlacklistedUser) error { 112 - return s.db.Update(func(tx *bolt.Tx) error { 113 - bucket := tx.Bucket(BucketModerationBlacklist) 114 - if bucket == nil { 115 - return fmt.Errorf("bucket not found: %s", BucketModerationBlacklist) 116 - } 117 - 118 - data, err := json.Marshal(entry) 119 - if err != nil { 120 - return fmt.Errorf("failed to marshal blacklisted user: %w", err) 121 - } 122 - 123 - return bucket.Put([]byte(entry.DID), data) 124 - }) 125 - } 126 - 127 - // UnblacklistUser removes a user from the blacklist. 128 - func (s *ModerationStore) UnblacklistUser(ctx context.Context, did string) error { 129 - return s.db.Update(func(tx *bolt.Tx) error { 130 - bucket := tx.Bucket(BucketModerationBlacklist) 131 - if bucket == nil { 132 - return nil 133 - } 134 - 135 - return bucket.Delete([]byte(did)) 136 - }) 137 - } 138 - 139 - // IsBlacklisted checks if a user is blacklisted. 140 - func (s *ModerationStore) IsBlacklisted(ctx context.Context, did string) bool { 141 - var blacklisted bool 142 - 143 - s.db.View(func(tx *bolt.Tx) error { 144 - bucket := tx.Bucket(BucketModerationBlacklist) 145 - if bucket == nil { 146 - return nil 147 - } 148 - 149 - blacklisted = bucket.Get([]byte(did)) != nil 150 - return nil 151 - }) 152 - 153 - return blacklisted 154 - } 155 - 156 - // GetBlacklistedUser retrieves a blacklisted user entry by DID. 157 - func (s *ModerationStore) GetBlacklistedUser(ctx context.Context, did string) (*moderation.BlacklistedUser, error) { 158 - var user *moderation.BlacklistedUser 159 - 160 - err := s.db.View(func(tx *bolt.Tx) error { 161 - bucket := tx.Bucket(BucketModerationBlacklist) 162 - if bucket == nil { 163 - return nil 164 - } 165 - 166 - data := bucket.Get([]byte(did)) 167 - if data == nil { 168 - return nil 169 - } 170 - 171 - user = &moderation.BlacklistedUser{} 172 - return json.Unmarshal(data, user) 173 - }) 174 - 175 - return user, err 176 - } 177 - 178 - // ListBlacklistedUsers returns all blacklisted users. 179 - func (s *ModerationStore) ListBlacklistedUsers(ctx context.Context) ([]moderation.BlacklistedUser, error) { 180 - var users []moderation.BlacklistedUser 181 - 182 - err := s.db.View(func(tx *bolt.Tx) error { 183 - bucket := tx.Bucket(BucketModerationBlacklist) 184 - if bucket == nil { 185 - return nil 186 - } 187 - 188 - return bucket.ForEach(func(k, v []byte) error { 189 - var user moderation.BlacklistedUser 190 - if err := json.Unmarshal(v, &user); err != nil { 191 - return err 192 - } 193 - users = append(users, user) 194 - return nil 195 - }) 196 - }) 197 - 198 - return users, err 199 - } 200 - 201 - // CreateReport stores a new report. 202 - func (s *ModerationStore) CreateReport(ctx context.Context, report moderation.Report) error { 203 - return s.db.Update(func(tx *bolt.Tx) error { 204 - // Store the report 205 - bucket := tx.Bucket(BucketModerationReports) 206 - if bucket == nil { 207 - return fmt.Errorf("bucket not found: %s", BucketModerationReports) 208 - } 209 - 210 - data, err := json.Marshal(report) 211 - if err != nil { 212 - return fmt.Errorf("failed to marshal report: %w", err) 213 - } 214 - 215 - if err := bucket.Put([]byte(report.ID), data); err != nil { 216 - return err 217 - } 218 - 219 - // Index by subject URI 220 - uriIndex := tx.Bucket(BucketModerationReportsByURI) 221 - if uriIndex != nil { 222 - // Store report ID in a list for this URI 223 - key := []byte(report.SubjectURI + ":" + report.ID) 224 - if err := uriIndex.Put(key, []byte(report.ID)); err != nil { 225 - return err 226 - } 227 - } 228 - 229 - // Index by subject DID 230 - didIndex := tx.Bucket(BucketModerationReportsByDID) 231 - if didIndex != nil { 232 - // Store report ID in a list for this DID 233 - key := []byte(report.SubjectDID + ":" + report.ID) 234 - if err := didIndex.Put(key, []byte(report.ID)); err != nil { 235 - return err 236 - } 237 - } 238 - 239 - return nil 240 - }) 241 - } 242 - 243 - // GetReport retrieves a report by ID. 244 - func (s *ModerationStore) GetReport(ctx context.Context, id string) (*moderation.Report, error) { 245 - var report *moderation.Report 246 - 247 - err := s.db.View(func(tx *bolt.Tx) error { 248 - bucket := tx.Bucket(BucketModerationReports) 249 - if bucket == nil { 250 - return nil 251 - } 252 - 253 - data := bucket.Get([]byte(id)) 254 - if data == nil { 255 - return nil 256 - } 257 - 258 - report = &moderation.Report{} 259 - return json.Unmarshal(data, report) 260 - }) 261 - 262 - return report, err 263 - } 264 - 265 - // ListPendingReports returns all reports with pending status. 266 - func (s *ModerationStore) ListPendingReports(ctx context.Context) ([]moderation.Report, error) { 267 - var reports []moderation.Report 268 - 269 - err := s.db.View(func(tx *bolt.Tx) error { 270 - bucket := tx.Bucket(BucketModerationReports) 271 - if bucket == nil { 272 - return nil 273 - } 274 - 275 - return bucket.ForEach(func(k, v []byte) error { 276 - var report moderation.Report 277 - if err := json.Unmarshal(v, &report); err != nil { 278 - return err 279 - } 280 - if report.Status == moderation.ReportStatusPending { 281 - reports = append(reports, report) 282 - } 283 - return nil 284 - }) 285 - }) 286 - 287 - return reports, err 288 - } 289 - 290 - // ListAllReports returns all reports regardless of status. 291 - func (s *ModerationStore) ListAllReports(ctx context.Context) ([]moderation.Report, error) { 292 - var reports []moderation.Report 293 - 294 - err := s.db.View(func(tx *bolt.Tx) error { 295 - bucket := tx.Bucket(BucketModerationReports) 296 - if bucket == nil { 297 - return nil 298 - } 299 - 300 - return bucket.ForEach(func(k, v []byte) error { 301 - var report moderation.Report 302 - if err := json.Unmarshal(v, &report); err != nil { 303 - return err 304 - } 305 - reports = append(reports, report) 306 - return nil 307 - }) 308 - }) 309 - 310 - return reports, err 311 - } 312 - 313 - // ResolveReport updates a report's status and resolution info. 314 - func (s *ModerationStore) ResolveReport(ctx context.Context, id string, status moderation.ReportStatus, resolvedBy string) error { 315 - return s.db.Update(func(tx *bolt.Tx) error { 316 - bucket := tx.Bucket(BucketModerationReports) 317 - if bucket == nil { 318 - return fmt.Errorf("bucket not found: %s", BucketModerationReports) 319 - } 320 - 321 - data := bucket.Get([]byte(id)) 322 - if data == nil { 323 - return fmt.Errorf("report not found: %s", id) 324 - } 325 - 326 - var report moderation.Report 327 - if err := json.Unmarshal(data, &report); err != nil { 328 - return err 329 - } 330 - 331 - report.Status = status 332 - report.ResolvedBy = resolvedBy 333 - now := time.Now() 334 - report.ResolvedAt = &now 335 - 336 - newData, err := json.Marshal(report) 337 - if err != nil { 338 - return err 339 - } 340 - 341 - return bucket.Put([]byte(id), newData) 342 - }) 343 - } 344 - 345 - // CountReportsForURI returns the number of reports for a given AT-URI. 346 - func (s *ModerationStore) CountReportsForURI(ctx context.Context, atURI string) (int, error) { 347 - var count int 348 - 349 - err := s.db.View(func(tx *bolt.Tx) error { 350 - bucket := tx.Bucket(BucketModerationReportsByURI) 351 - if bucket == nil { 352 - return nil 353 - } 354 - 355 - cursor := bucket.Cursor() 356 - prefix := []byte(atURI + ":") 357 - 358 - for k, _ := cursor.Seek(prefix); k != nil && hasPrefix(k, prefix); k, _ = cursor.Next() { 359 - count++ 360 - } 361 - 362 - return nil 363 - }) 364 - 365 - return count, err 366 - } 367 - 368 - // CountReportsForDID returns the number of reports for content by a given DID. 369 - func (s *ModerationStore) CountReportsForDID(ctx context.Context, did string) (int, error) { 370 - var count int 371 - 372 - err := s.db.View(func(tx *bolt.Tx) error { 373 - bucket := tx.Bucket(BucketModerationReportsByDID) 374 - if bucket == nil { 375 - return nil 376 - } 377 - 378 - cursor := bucket.Cursor() 379 - prefix := []byte(did + ":") 380 - 381 - for k, _ := cursor.Seek(prefix); k != nil && hasPrefix(k, prefix); k, _ = cursor.Next() { 382 - count++ 383 - } 384 - 385 - return nil 386 - }) 387 - 388 - return count, err 389 - } 390 - 391 - // HasReportedURI checks if a user has already reported a specific URI. 392 - func (s *ModerationStore) HasReportedURI(ctx context.Context, reporterDID, subjectURI string) (bool, error) { 393 - var found bool 394 - 395 - err := s.db.View(func(tx *bolt.Tx) error { 396 - bucket := tx.Bucket(BucketModerationReports) 397 - if bucket == nil { 398 - return nil 399 - } 400 - 401 - return bucket.ForEach(func(k, v []byte) error { 402 - var report moderation.Report 403 - if err := json.Unmarshal(v, &report); err != nil { 404 - return nil // Skip malformed entries 405 - } 406 - if report.ReporterDID == reporterDID && report.SubjectURI == subjectURI { 407 - found = true 408 - } 409 - return nil 410 - }) 411 - }) 412 - 413 - return found, err 414 - } 415 - 416 - // LogAction stores a moderation action in the audit log. 417 - func (s *ModerationStore) LogAction(ctx context.Context, entry moderation.AuditEntry) error { 418 - return s.db.Update(func(tx *bolt.Tx) error { 419 - bucket := tx.Bucket(BucketModerationAuditLog) 420 - if bucket == nil { 421 - return fmt.Errorf("bucket not found: %s", BucketModerationAuditLog) 422 - } 423 - 424 - data, err := json.Marshal(entry) 425 - if err != nil { 426 - return fmt.Errorf("failed to marshal audit entry: %w", err) 427 - } 428 - 429 - // Use timestamp-based key for chronological ordering 430 - // Format: timestamp:id for uniqueness 431 - key := fmt.Sprintf("%d:%s", entry.Timestamp.UnixNano(), entry.ID) 432 - 433 - return bucket.Put([]byte(key), data) 434 - }) 435 - } 436 - 437 - // ListAuditLog returns the most recent audit log entries. 438 - // Entries are returned in reverse chronological order (newest first). 439 - func (s *ModerationStore) ListAuditLog(ctx context.Context, limit int) ([]moderation.AuditEntry, error) { 440 - var entries []moderation.AuditEntry 441 - 442 - err := s.db.View(func(tx *bolt.Tx) error { 443 - bucket := tx.Bucket(BucketModerationAuditLog) 444 - if bucket == nil { 445 - return nil 446 - } 447 - 448 - // Collect all entries first (BoltDB cursors iterate in key order) 449 - var all []moderation.AuditEntry 450 - err := bucket.ForEach(func(k, v []byte) error { 451 - var entry moderation.AuditEntry 452 - if err := json.Unmarshal(v, &entry); err != nil { 453 - return nil // Skip malformed entries 454 - } 455 - all = append(all, entry) 456 - return nil 457 - }) 458 - if err != nil { 459 - return err 460 - } 461 - 462 - // Reverse to get newest first 463 - for i := len(all) - 1; i >= 0 && len(entries) < limit; i-- { 464 - entries = append(entries, all[i]) 465 - } 466 - 467 - return nil 468 - }) 469 - 470 - return entries, err 471 - } 472 - 473 - // CountReportsFromUserSince counts reports submitted by a user since a given time. 474 - // Used for rate limiting report submissions. 475 - func (s *ModerationStore) CountReportsFromUserSince(ctx context.Context, reporterDID string, since time.Time) (int, error) { 476 - var count int 477 - 478 - err := s.db.View(func(tx *bolt.Tx) error { 479 - bucket := tx.Bucket(BucketModerationReports) 480 - if bucket == nil { 481 - return nil 482 - } 483 - 484 - return bucket.ForEach(func(k, v []byte) error { 485 - var report moderation.Report 486 - if err := json.Unmarshal(v, &report); err != nil { 487 - return nil // Skip malformed entries 488 - } 489 - if report.ReporterDID == reporterDID && report.CreatedAt.After(since) { 490 - count++ 491 - } 492 - return nil 493 - }) 494 - }) 495 - 496 - return count, err 497 - } 498 - 499 - // SetAutoHideReset stores a reset timestamp for a user's auto-hide counter. 500 - // Reports created before this timestamp are ignored when checking the per-user auto-hide threshold. 501 - func (s *ModerationStore) SetAutoHideReset(ctx context.Context, did string, resetAt time.Time) error { 502 - return s.db.Update(func(tx *bolt.Tx) error { 503 - bucket := tx.Bucket(BucketModerationAutoHideResets) 504 - if bucket == nil { 505 - return fmt.Errorf("bucket not found: %s", BucketModerationAutoHideResets) 506 - } 507 - 508 - data, err := resetAt.MarshalBinary() 509 - if err != nil { 510 - return fmt.Errorf("failed to marshal reset time: %w", err) 511 - } 512 - 513 - return bucket.Put([]byte(did), data) 514 - }) 515 - } 516 - 517 - // GetAutoHideReset returns the auto-hide reset timestamp for a user, or zero time if none set. 518 - func (s *ModerationStore) GetAutoHideReset(ctx context.Context, did string) (time.Time, error) { 519 - var resetAt time.Time 520 - 521 - err := s.db.View(func(tx *bolt.Tx) error { 522 - bucket := tx.Bucket(BucketModerationAutoHideResets) 523 - if bucket == nil { 524 - return nil 525 - } 526 - 527 - data := bucket.Get([]byte(did)) 528 - if data == nil { 529 - return nil 530 - } 531 - 532 - return resetAt.UnmarshalBinary(data) 533 - }) 534 - 535 - return resetAt, err 536 - } 537 - 538 - // CountReportsForDIDSince returns the number of reports for content by a given DID 539 - // created after the specified time. 540 - func (s *ModerationStore) CountReportsForDIDSince(ctx context.Context, did string, since time.Time) (int, error) { 541 - var count int 542 - 543 - err := s.db.View(func(tx *bolt.Tx) error { 544 - didIndex := tx.Bucket(BucketModerationReportsByDID) 545 - if didIndex == nil { 546 - return nil 547 - } 548 - 549 - reportsBucket := tx.Bucket(BucketModerationReports) 550 - if reportsBucket == nil { 551 - return nil 552 - } 553 - 554 - cursor := didIndex.Cursor() 555 - prefix := []byte(did + ":") 556 - 557 - for k, v := cursor.Seek(prefix); k != nil && hasPrefix(k, prefix); k, v = cursor.Next() { 558 - // v is the report ID 559 - reportData := reportsBucket.Get(v) 560 - if reportData == nil { 561 - continue 562 - } 563 - 564 - var report moderation.Report 565 - if err := json.Unmarshal(reportData, &report); err != nil { 566 - continue 567 - } 568 - 569 - if report.CreatedAt.After(since) { 570 - count++ 571 - } 572 - } 573 - 574 - return nil 575 - }) 576 - 577 - return count, err 578 - } 579 - 580 - // hasPrefix checks if a byte slice has a given prefix. 581 - func hasPrefix(s, prefix []byte) bool { 582 - if len(s) < len(prefix) { 583 - return false 584 - } 585 - for i, b := range prefix { 586 - if s[i] != b { 587 - return false 588 - } 589 - } 590 - return true 591 - }
-453
internal/database/boltstore/moderation_store_test.go
··· 1 - package boltstore 2 - 3 - import ( 4 - "context" 5 - "path/filepath" 6 - "testing" 7 - "time" 8 - 9 - "arabica/internal/moderation" 10 - 11 - "github.com/stretchr/testify/assert" 12 - "github.com/stretchr/testify/require" 13 - ) 14 - 15 - func setupTestModerationStore(t *testing.T) *ModerationStore { 16 - tmpDir := t.TempDir() 17 - dbPath := filepath.Join(tmpDir, "test.db") 18 - 19 - store, err := Open(Options{Path: dbPath}) 20 - require.NoError(t, err) 21 - 22 - t.Cleanup(func() { 23 - store.Close() 24 - }) 25 - 26 - return store.ModerationStore() 27 - } 28 - 29 - func TestHiddenRecords(t *testing.T) { 30 - ctx := context.Background() 31 - store := setupTestModerationStore(t) 32 - 33 - t.Run("hide and check record", func(t *testing.T) { 34 - entry := moderation.HiddenRecord{ 35 - ATURI: "at://did:plc:test/app.bsky.feed.post/abc123", 36 - HiddenAt: time.Now(), 37 - HiddenBy: "did:plc:admin", 38 - Reason: "Spam content", 39 - AutoHidden: false, 40 - } 41 - 42 - err := store.HideRecord(ctx, entry) 43 - require.NoError(t, err) 44 - 45 - assert.True(t, store.IsRecordHidden(ctx, entry.ATURI)) 46 - assert.False(t, store.IsRecordHidden(ctx, "at://did:plc:other/app.bsky.feed.post/xyz")) 47 - }) 48 - 49 - t.Run("get hidden record", func(t *testing.T) { 50 - uri := "at://did:plc:test/social.arabica.alpha.brew/get123" 51 - entry := moderation.HiddenRecord{ 52 - ATURI: uri, 53 - HiddenAt: time.Now(), 54 - HiddenBy: "did:plc:mod", 55 - Reason: "Inappropriate", 56 - AutoHidden: true, 57 - } 58 - 59 - err := store.HideRecord(ctx, entry) 60 - require.NoError(t, err) 61 - 62 - retrieved, err := store.GetHiddenRecord(ctx, uri) 63 - require.NoError(t, err) 64 - require.NotNil(t, retrieved) 65 - 66 - assert.Equal(t, uri, retrieved.ATURI) 67 - assert.Equal(t, "did:plc:mod", retrieved.HiddenBy) 68 - assert.Equal(t, "Inappropriate", retrieved.Reason) 69 - assert.True(t, retrieved.AutoHidden) 70 - }) 71 - 72 - t.Run("unhide record", func(t *testing.T) { 73 - uri := "at://did:plc:test/social.arabica.alpha.brew/unhide123" 74 - entry := moderation.HiddenRecord{ 75 - ATURI: uri, 76 - HiddenAt: time.Now(), 77 - HiddenBy: "did:plc:admin", 78 - } 79 - 80 - err := store.HideRecord(ctx, entry) 81 - require.NoError(t, err) 82 - assert.True(t, store.IsRecordHidden(ctx, uri)) 83 - 84 - err = store.UnhideRecord(ctx, uri) 85 - require.NoError(t, err) 86 - assert.False(t, store.IsRecordHidden(ctx, uri)) 87 - }) 88 - 89 - t.Run("list hidden records", func(t *testing.T) { 90 - // Clear by unhiding previous test records 91 - store.UnhideRecord(ctx, "at://did:plc:test/app.bsky.feed.post/abc123") 92 - store.UnhideRecord(ctx, "at://did:plc:test/social.arabica.alpha.brew/get123") 93 - 94 - // Add fresh records 95 - for i := 0; i < 3; i++ { 96 - entry := moderation.HiddenRecord{ 97 - ATURI: "at://did:plc:list/social.arabica.alpha.brew/list" + string(rune('0'+i)), 98 - HiddenAt: time.Now(), 99 - HiddenBy: "did:plc:admin", 100 - } 101 - require.NoError(t, store.HideRecord(ctx, entry)) 102 - } 103 - 104 - records, err := store.ListHiddenRecords(ctx) 105 - require.NoError(t, err) 106 - assert.GreaterOrEqual(t, len(records), 3) 107 - }) 108 - } 109 - 110 - func TestBlacklist(t *testing.T) { 111 - ctx := context.Background() 112 - store := setupTestModerationStore(t) 113 - 114 - t.Run("blacklist and check user", func(t *testing.T) { 115 - entry := moderation.BlacklistedUser{ 116 - DID: "did:plc:baduser", 117 - BlacklistedAt: time.Now(), 118 - BlacklistedBy: "did:plc:admin", 119 - Reason: "Repeated violations", 120 - } 121 - 122 - err := store.BlacklistUser(ctx, entry) 123 - require.NoError(t, err) 124 - 125 - assert.True(t, store.IsBlacklisted(ctx, "did:plc:baduser")) 126 - assert.False(t, store.IsBlacklisted(ctx, "did:plc:gooduser")) 127 - }) 128 - 129 - t.Run("get blacklisted user", func(t *testing.T) { 130 - did := "did:plc:getblacklist" 131 - entry := moderation.BlacklistedUser{ 132 - DID: did, 133 - BlacklistedAt: time.Now(), 134 - BlacklistedBy: "did:plc:admin", 135 - Reason: "Test reason", 136 - } 137 - 138 - err := store.BlacklistUser(ctx, entry) 139 - require.NoError(t, err) 140 - 141 - retrieved, err := store.GetBlacklistedUser(ctx, did) 142 - require.NoError(t, err) 143 - require.NotNil(t, retrieved) 144 - 145 - assert.Equal(t, did, retrieved.DID) 146 - assert.Equal(t, "did:plc:admin", retrieved.BlacklistedBy) 147 - assert.Equal(t, "Test reason", retrieved.Reason) 148 - }) 149 - 150 - t.Run("unblacklist user", func(t *testing.T) { 151 - did := "did:plc:unblacklist" 152 - entry := moderation.BlacklistedUser{ 153 - DID: did, 154 - BlacklistedAt: time.Now(), 155 - BlacklistedBy: "did:plc:admin", 156 - } 157 - 158 - err := store.BlacklistUser(ctx, entry) 159 - require.NoError(t, err) 160 - assert.True(t, store.IsBlacklisted(ctx, did)) 161 - 162 - err = store.UnblacklistUser(ctx, did) 163 - require.NoError(t, err) 164 - assert.False(t, store.IsBlacklisted(ctx, did)) 165 - }) 166 - 167 - t.Run("list blacklisted users", func(t *testing.T) { 168 - users, err := store.ListBlacklistedUsers(ctx) 169 - require.NoError(t, err) 170 - assert.GreaterOrEqual(t, len(users), 1) 171 - }) 172 - } 173 - 174 - func TestReports(t *testing.T) { 175 - ctx := context.Background() 176 - store := setupTestModerationStore(t) 177 - 178 - t.Run("create and get report", func(t *testing.T) { 179 - report := moderation.Report{ 180 - ID: "report001", 181 - SubjectURI: "at://did:plc:subject/social.arabica.alpha.brew/abc", 182 - SubjectDID: "did:plc:subject", 183 - ReporterDID: "did:plc:reporter", 184 - Reason: "This is spam", 185 - CreatedAt: time.Now(), 186 - Status: moderation.ReportStatusPending, 187 - } 188 - 189 - err := store.CreateReport(ctx, report) 190 - require.NoError(t, err) 191 - 192 - retrieved, err := store.GetReport(ctx, "report001") 193 - require.NoError(t, err) 194 - require.NotNil(t, retrieved) 195 - 196 - assert.Equal(t, "report001", retrieved.ID) 197 - assert.Equal(t, "did:plc:reporter", retrieved.ReporterDID) 198 - assert.Equal(t, moderation.ReportStatusPending, retrieved.Status) 199 - }) 200 - 201 - t.Run("list pending reports", func(t *testing.T) { 202 - // Create a mix of pending and resolved reports 203 - pending := moderation.Report{ 204 - ID: "report_pending", 205 - SubjectURI: "at://did:plc:sub/social.arabica.alpha.brew/p1", 206 - SubjectDID: "did:plc:sub", 207 - ReporterDID: "did:plc:rep1", 208 - Status: moderation.ReportStatusPending, 209 - CreatedAt: time.Now(), 210 - } 211 - require.NoError(t, store.CreateReport(ctx, pending)) 212 - 213 - dismissed := moderation.Report{ 214 - ID: "report_dismissed", 215 - SubjectURI: "at://did:plc:sub/social.arabica.alpha.brew/p2", 216 - SubjectDID: "did:plc:sub", 217 - ReporterDID: "did:plc:rep2", 218 - Status: moderation.ReportStatusDismissed, 219 - CreatedAt: time.Now(), 220 - } 221 - require.NoError(t, store.CreateReport(ctx, dismissed)) 222 - 223 - reports, err := store.ListPendingReports(ctx) 224 - require.NoError(t, err) 225 - 226 - // Should only include pending reports 227 - for _, r := range reports { 228 - assert.Equal(t, moderation.ReportStatusPending, r.Status) 229 - } 230 - }) 231 - 232 - t.Run("resolve report", func(t *testing.T) { 233 - report := moderation.Report{ 234 - ID: "report_to_resolve", 235 - SubjectURI: "at://did:plc:sub/social.arabica.alpha.brew/resolve", 236 - SubjectDID: "did:plc:sub", 237 - ReporterDID: "did:plc:rep", 238 - Status: moderation.ReportStatusPending, 239 - CreatedAt: time.Now(), 240 - } 241 - require.NoError(t, store.CreateReport(ctx, report)) 242 - 243 - err := store.ResolveReport(ctx, "report_to_resolve", moderation.ReportStatusActioned, "did:plc:mod") 244 - require.NoError(t, err) 245 - 246 - retrieved, err := store.GetReport(ctx, "report_to_resolve") 247 - require.NoError(t, err) 248 - 249 - assert.Equal(t, moderation.ReportStatusActioned, retrieved.Status) 250 - assert.Equal(t, "did:plc:mod", retrieved.ResolvedBy) 251 - assert.NotNil(t, retrieved.ResolvedAt) 252 - }) 253 - 254 - t.Run("count reports for URI", func(t *testing.T) { 255 - uri := "at://did:plc:counted/social.arabica.alpha.brew/count" 256 - 257 - for i := 0; i < 3; i++ { 258 - report := moderation.Report{ 259 - ID: "count_uri_" + string(rune('0'+i)), 260 - SubjectURI: uri, 261 - SubjectDID: "did:plc:counted", 262 - ReporterDID: "did:plc:reporter" + string(rune('0'+i)), 263 - Status: moderation.ReportStatusPending, 264 - CreatedAt: time.Now(), 265 - } 266 - require.NoError(t, store.CreateReport(ctx, report)) 267 - } 268 - 269 - count, err := store.CountReportsForURI(ctx, uri) 270 - require.NoError(t, err) 271 - assert.Equal(t, 3, count) 272 - }) 273 - 274 - t.Run("count reports for DID", func(t *testing.T) { 275 - did := "did:plc:counteddid" 276 - 277 - for i := 0; i < 2; i++ { 278 - report := moderation.Report{ 279 - ID: "count_did_" + string(rune('0'+i)), 280 - SubjectURI: "at://" + did + "/social.arabica.alpha.brew/post" + string(rune('0'+i)), 281 - SubjectDID: did, 282 - ReporterDID: "did:plc:reporter", 283 - Status: moderation.ReportStatusPending, 284 - CreatedAt: time.Now(), 285 - } 286 - require.NoError(t, store.CreateReport(ctx, report)) 287 - } 288 - 289 - count, err := store.CountReportsForDID(ctx, did) 290 - require.NoError(t, err) 291 - assert.Equal(t, 2, count) 292 - }) 293 - 294 - t.Run("has reported URI", func(t *testing.T) { 295 - uri := "at://did:plc:hasreported/social.arabica.alpha.brew/check" 296 - reporter := "did:plc:checker" 297 - 298 - report := moderation.Report{ 299 - ID: "has_reported_check", 300 - SubjectURI: uri, 301 - SubjectDID: "did:plc:hasreported", 302 - ReporterDID: reporter, 303 - Status: moderation.ReportStatusPending, 304 - CreatedAt: time.Now(), 305 - } 306 - require.NoError(t, store.CreateReport(ctx, report)) 307 - 308 - has, err := store.HasReportedURI(ctx, reporter, uri) 309 - require.NoError(t, err) 310 - assert.True(t, has) 311 - 312 - has, err = store.HasReportedURI(ctx, "did:plc:other", uri) 313 - require.NoError(t, err) 314 - assert.False(t, has) 315 - }) 316 - 317 - t.Run("count reports from user since", func(t *testing.T) { 318 - reporter := "did:plc:ratelimituser" 319 - now := time.Now() 320 - 321 - // Create reports at different times 322 - for i := 0; i < 5; i++ { 323 - report := moderation.Report{ 324 - ID: "ratelimit_" + string(rune('a'+i)), 325 - SubjectURI: "at://did:plc:target/social.arabica.alpha.brew/rl" + string(rune('0'+i)), 326 - SubjectDID: "did:plc:target", 327 - ReporterDID: reporter, 328 - Status: moderation.ReportStatusPending, 329 - CreatedAt: now.Add(-time.Duration(i*30) * time.Minute), // 0, -30, -60, -90, -120 mins 330 - } 331 - require.NoError(t, store.CreateReport(ctx, report)) 332 - } 333 - 334 - // Count reports in the last hour (should be 2: 0min and -30min) 335 - oneHourAgo := now.Add(-1 * time.Hour) 336 - count, err := store.CountReportsFromUserSince(ctx, reporter, oneHourAgo) 337 - require.NoError(t, err) 338 - assert.Equal(t, 2, count) 339 - 340 - // Count reports in the last 2 hours (should be 4: 0, -30, -60, -90 mins) 341 - twoHoursAgo := now.Add(-2 * time.Hour) 342 - count, err = store.CountReportsFromUserSince(ctx, reporter, twoHoursAgo) 343 - require.NoError(t, err) 344 - assert.Equal(t, 4, count) 345 - 346 - // Count reports from a different user (should be 0) 347 - count, err = store.CountReportsFromUserSince(ctx, "did:plc:otheruser", oneHourAgo) 348 - require.NoError(t, err) 349 - assert.Equal(t, 0, count) 350 - }) 351 - } 352 - 353 - func TestAuditLog(t *testing.T) { 354 - ctx := context.Background() 355 - store := setupTestModerationStore(t) 356 - 357 - t.Run("log action", func(t *testing.T) { 358 - entry := moderation.AuditEntry{ 359 - ID: "audit001", 360 - Action: moderation.AuditActionHideRecord, 361 - ActorDID: "did:plc:mod", 362 - TargetURI: "at://did:plc:target/social.arabica.alpha.brew/abc", 363 - Reason: "Spam", 364 - Timestamp: time.Now(), 365 - AutoMod: false, 366 - } 367 - 368 - err := store.LogAction(ctx, entry) 369 - require.NoError(t, err) 370 - }) 371 - 372 - t.Run("list audit log", func(t *testing.T) { 373 - // Add several entries with different timestamps 374 - now := time.Now() 375 - for i := 0; i < 5; i++ { 376 - entry := moderation.AuditEntry{ 377 - ID: "audit_list_" + string(rune('0'+i)), 378 - Action: moderation.AuditActionHideRecord, 379 - ActorDID: "did:plc:mod", 380 - TargetURI: "at://did:plc:target/social.arabica.alpha.brew/" + string(rune('0'+i)), 381 - Timestamp: now.Add(time.Duration(i) * time.Second), 382 - } 383 - require.NoError(t, store.LogAction(ctx, entry)) 384 - } 385 - 386 - entries, err := store.ListAuditLog(ctx, 3) 387 - require.NoError(t, err) 388 - assert.Len(t, entries, 3) 389 - 390 - // Should be in reverse chronological order (newest first) 391 - for i := 1; i < len(entries); i++ { 392 - assert.True(t, entries[i-1].Timestamp.After(entries[i].Timestamp) || 393 - entries[i-1].Timestamp.Equal(entries[i].Timestamp)) 394 - } 395 - }) 396 - 397 - t.Run("automod entry", func(t *testing.T) { 398 - entry := moderation.AuditEntry{ 399 - ID: "audit_automod", 400 - Action: moderation.AuditActionHideRecord, 401 - ActorDID: "automod", 402 - TargetURI: "at://did:plc:auto/social.arabica.alpha.brew/auto", 403 - Reason: "Exceeded report threshold", 404 - Timestamp: time.Now(), 405 - AutoMod: true, 406 - } 407 - 408 - err := store.LogAction(ctx, entry) 409 - require.NoError(t, err) 410 - 411 - entries, err := store.ListAuditLog(ctx, 100) 412 - require.NoError(t, err) 413 - 414 - var found bool 415 - for _, e := range entries { 416 - if e.ID == "audit_automod" { 417 - assert.True(t, e.AutoMod) 418 - found = true 419 - break 420 - } 421 - } 422 - assert.True(t, found, "automod entry not found") 423 - }) 424 - } 425 - 426 - func TestNonExistentRecords(t *testing.T) { 427 - ctx := context.Background() 428 - store := setupTestModerationStore(t) 429 - 430 - t.Run("get nonexistent hidden record", func(t *testing.T) { 431 - record, err := store.GetHiddenRecord(ctx, "at://nonexistent") 432 - require.NoError(t, err) 433 - assert.Nil(t, record) 434 - }) 435 - 436 - t.Run("get nonexistent blacklisted user", func(t *testing.T) { 437 - user, err := store.GetBlacklistedUser(ctx, "did:plc:nonexistent") 438 - require.NoError(t, err) 439 - assert.Nil(t, user) 440 - }) 441 - 442 - t.Run("get nonexistent report", func(t *testing.T) { 443 - report, err := store.GetReport(ctx, "nonexistent") 444 - require.NoError(t, err) 445 - assert.Nil(t, report) 446 - }) 447 - 448 - t.Run("resolve nonexistent report", func(t *testing.T) { 449 - err := store.ResolveReport(ctx, "nonexistent", moderation.ReportStatusDismissed, "did:plc:mod") 450 - assert.Error(t, err) 451 - assert.Contains(t, err.Error(), "not found") 452 - }) 453 - }
+1 -43
internal/database/boltstore/store.go
··· 1 1 // Package boltstore provides persistent storage using BoltDB (bbolt). 2 2 // It implements the oauth.ClientAuthStore interface for session persistence 3 - // and provides storage for the feed registry. 3 + // and provides storage for join requests. 4 4 package boltstore 5 5 6 6 import ( ··· 20 20 // BucketAuthRequests stores pending OAuth auth requests keyed by state 21 21 BucketAuthRequests = []byte("oauth_auth_requests") 22 22 23 - // BucketFeedRegistry stores registered user DIDs for the community feed 24 - BucketFeedRegistry = []byte("feed_registry") 25 - 26 - // BucketModerationHiddenRecords stores AT-URIs of hidden records 27 - BucketModerationHiddenRecords = []byte("moderation_hidden_records") 28 - 29 - // BucketModerationBlacklist stores blacklisted user DIDs 30 - BucketModerationBlacklist = []byte("moderation_blacklist") 31 - 32 - // BucketModerationReports stores user reports on content 33 - BucketModerationReports = []byte("moderation_reports") 34 - 35 - // BucketModerationReportsByURI indexes reports by subject AT-URI 36 - BucketModerationReportsByURI = []byte("moderation_reports_by_uri") 37 - 38 - // BucketModerationReportsByDID indexes reports by subject DID 39 - BucketModerationReportsByDID = []byte("moderation_reports_by_did") 40 - 41 - // BucketModerationAuditLog stores moderation action audit trail 42 - BucketModerationAuditLog = []byte("moderation_audit_log") 43 - 44 - // BucketModerationAutoHideResets stores DID -> timestamp for auto-hide counter resets 45 - BucketModerationAutoHideResets = []byte("moderation_autohide_resets") 46 - 47 23 // BucketJoinRequests stores PDS account join requests 48 24 BucketJoinRequests = []byte("join_requests") 49 25 ) ··· 110 86 buckets := [][]byte{ 111 87 BucketSessions, 112 88 BucketAuthRequests, 113 - BucketFeedRegistry, 114 - BucketModerationHiddenRecords, 115 - BucketModerationBlacklist, 116 - BucketModerationReports, 117 - BucketModerationReportsByURI, 118 - BucketModerationReportsByDID, 119 - BucketModerationAuditLog, 120 - BucketModerationAutoHideResets, 121 89 BucketJoinRequests, 122 90 } 123 91 ··· 154 122 // SessionStore returns an OAuth session store backed by this database. 155 123 func (s *Store) SessionStore() *SessionStore { 156 124 return &SessionStore{db: s.db} 157 - } 158 - 159 - // FeedStore returns a feed registry store backed by this database. 160 - func (s *Store) FeedStore() *FeedStore { 161 - return &FeedStore{db: s.db} 162 - } 163 - 164 - // ModerationStore returns a moderation store backed by this database. 165 - func (s *Store) ModerationStore() *ModerationStore { 166 - return &ModerationStore{db: s.db} 167 125 } 168 126 169 127 // JoinStore returns a join request store backed by this database.
+378
internal/database/sqlitestore/moderation.go
··· 1 + // Package sqlitestore provides SQLite-backed store implementations. 2 + package sqlitestore 3 + 4 + import ( 5 + "context" 6 + "database/sql" 7 + "encoding/json" 8 + "fmt" 9 + "time" 10 + 11 + "arabica/internal/moderation" 12 + ) 13 + 14 + // ModerationStore implements moderation.Store using SQLite. 15 + // It shares the database connection with the firehose FeedIndex. 16 + type ModerationStore struct { 17 + db *sql.DB 18 + } 19 + 20 + // NewModerationStore creates a ModerationStore backed by the given database. 21 + // The database must already have the moderation schema applied. 22 + func NewModerationStore(db *sql.DB) *ModerationStore { 23 + return &ModerationStore{db: db} 24 + } 25 + 26 + // Ensure ModerationStore implements the interface at compile time. 27 + var _ moderation.Store = (*ModerationStore)(nil) 28 + 29 + // ========== Hidden Records ========== 30 + 31 + func (s *ModerationStore) HideRecord(ctx context.Context, entry moderation.HiddenRecord) error { 32 + autoHidden := 0 33 + if entry.AutoHidden { 34 + autoHidden = 1 35 + } 36 + _, err := s.db.ExecContext(ctx, ` 37 + INSERT INTO moderation_hidden_records (uri, hidden_at, hidden_by, reason, auto_hidden) 38 + VALUES (?, ?, ?, ?, ?) 39 + ON CONFLICT(uri) DO UPDATE SET 40 + hidden_at = excluded.hidden_at, 41 + hidden_by = excluded.hidden_by, 42 + reason = excluded.reason, 43 + auto_hidden = excluded.auto_hidden 44 + `, entry.ATURI, entry.HiddenAt.Format(time.RFC3339Nano), entry.HiddenBy, entry.Reason, autoHidden) 45 + if err != nil { 46 + return fmt.Errorf("hide record: %w", err) 47 + } 48 + return nil 49 + } 50 + 51 + func (s *ModerationStore) UnhideRecord(ctx context.Context, atURI string) error { 52 + _, err := s.db.ExecContext(ctx, `DELETE FROM moderation_hidden_records WHERE uri = ?`, atURI) 53 + return err 54 + } 55 + 56 + func (s *ModerationStore) IsRecordHidden(ctx context.Context, atURI string) bool { 57 + var exists int 58 + _ = s.db.QueryRowContext(ctx, `SELECT 1 FROM moderation_hidden_records WHERE uri = ?`, atURI).Scan(&exists) 59 + return exists == 1 60 + } 61 + 62 + func (s *ModerationStore) GetHiddenRecord(ctx context.Context, atURI string) (*moderation.HiddenRecord, error) { 63 + var r moderation.HiddenRecord 64 + var hiddenAtStr string 65 + var autoHidden int 66 + err := s.db.QueryRowContext(ctx, ` 67 + SELECT uri, hidden_at, hidden_by, reason, auto_hidden 68 + FROM moderation_hidden_records WHERE uri = ? 69 + `, atURI).Scan(&r.ATURI, &hiddenAtStr, &r.HiddenBy, &r.Reason, &autoHidden) 70 + if err == sql.ErrNoRows { 71 + return nil, nil 72 + } 73 + if err != nil { 74 + return nil, err 75 + } 76 + r.HiddenAt, _ = time.Parse(time.RFC3339Nano, hiddenAtStr) 77 + r.AutoHidden = autoHidden == 1 78 + return &r, nil 79 + } 80 + 81 + func (s *ModerationStore) ListHiddenRecords(ctx context.Context) ([]moderation.HiddenRecord, error) { 82 + rows, err := s.db.QueryContext(ctx, ` 83 + SELECT uri, hidden_at, hidden_by, reason, auto_hidden 84 + FROM moderation_hidden_records ORDER BY hidden_at DESC 85 + `) 86 + if err != nil { 87 + return nil, err 88 + } 89 + defer rows.Close() 90 + 91 + var records []moderation.HiddenRecord 92 + for rows.Next() { 93 + var r moderation.HiddenRecord 94 + var hiddenAtStr string 95 + var autoHidden int 96 + if err := rows.Scan(&r.ATURI, &hiddenAtStr, &r.HiddenBy, &r.Reason, &autoHidden); err != nil { 97 + continue 98 + } 99 + r.HiddenAt, _ = time.Parse(time.RFC3339Nano, hiddenAtStr) 100 + r.AutoHidden = autoHidden == 1 101 + records = append(records, r) 102 + } 103 + return records, rows.Err() 104 + } 105 + 106 + // ========== Blacklist ========== 107 + 108 + func (s *ModerationStore) BlacklistUser(ctx context.Context, entry moderation.BlacklistedUser) error { 109 + _, err := s.db.ExecContext(ctx, ` 110 + INSERT INTO moderation_blacklist (did, blacklisted_at, blacklisted_by, reason) 111 + VALUES (?, ?, ?, ?) 112 + ON CONFLICT(did) DO UPDATE SET 113 + blacklisted_at = excluded.blacklisted_at, 114 + blacklisted_by = excluded.blacklisted_by, 115 + reason = excluded.reason 116 + `, entry.DID, entry.BlacklistedAt.Format(time.RFC3339Nano), entry.BlacklistedBy, entry.Reason) 117 + if err != nil { 118 + return fmt.Errorf("blacklist user: %w", err) 119 + } 120 + return nil 121 + } 122 + 123 + func (s *ModerationStore) UnblacklistUser(ctx context.Context, did string) error { 124 + _, err := s.db.ExecContext(ctx, `DELETE FROM moderation_blacklist WHERE did = ?`, did) 125 + return err 126 + } 127 + 128 + func (s *ModerationStore) IsBlacklisted(ctx context.Context, did string) bool { 129 + var exists int 130 + _ = s.db.QueryRowContext(ctx, `SELECT 1 FROM moderation_blacklist WHERE did = ?`, did).Scan(&exists) 131 + return exists == 1 132 + } 133 + 134 + func (s *ModerationStore) GetBlacklistedUser(ctx context.Context, did string) (*moderation.BlacklistedUser, error) { 135 + var u moderation.BlacklistedUser 136 + var blacklistedAtStr string 137 + err := s.db.QueryRowContext(ctx, ` 138 + SELECT did, blacklisted_at, blacklisted_by, reason 139 + FROM moderation_blacklist WHERE did = ? 140 + `, did).Scan(&u.DID, &blacklistedAtStr, &u.BlacklistedBy, &u.Reason) 141 + if err == sql.ErrNoRows { 142 + return nil, nil 143 + } 144 + if err != nil { 145 + return nil, err 146 + } 147 + u.BlacklistedAt, _ = time.Parse(time.RFC3339Nano, blacklistedAtStr) 148 + return &u, nil 149 + } 150 + 151 + func (s *ModerationStore) ListBlacklistedUsers(ctx context.Context) ([]moderation.BlacklistedUser, error) { 152 + rows, err := s.db.QueryContext(ctx, ` 153 + SELECT did, blacklisted_at, blacklisted_by, reason 154 + FROM moderation_blacklist ORDER BY blacklisted_at DESC 155 + `) 156 + if err != nil { 157 + return nil, err 158 + } 159 + defer rows.Close() 160 + 161 + var users []moderation.BlacklistedUser 162 + for rows.Next() { 163 + var u moderation.BlacklistedUser 164 + var blacklistedAtStr string 165 + if err := rows.Scan(&u.DID, &blacklistedAtStr, &u.BlacklistedBy, &u.Reason); err != nil { 166 + continue 167 + } 168 + u.BlacklistedAt, _ = time.Parse(time.RFC3339Nano, blacklistedAtStr) 169 + users = append(users, u) 170 + } 171 + return users, rows.Err() 172 + } 173 + 174 + // ========== Reports ========== 175 + 176 + func (s *ModerationStore) CreateReport(ctx context.Context, report moderation.Report) error { 177 + _, err := s.db.ExecContext(ctx, ` 178 + INSERT INTO moderation_reports 179 + (id, subject_uri, subject_did, reporter_did, reason, created_at, status, resolved_by, resolved_at) 180 + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) 181 + `, report.ID, report.SubjectURI, report.SubjectDID, report.ReporterDID, report.Reason, 182 + report.CreatedAt.Format(time.RFC3339Nano), string(report.Status), report.ResolvedBy, nil) 183 + if err != nil { 184 + return fmt.Errorf("create report: %w", err) 185 + } 186 + return nil 187 + } 188 + 189 + func (s *ModerationStore) GetReport(ctx context.Context, id string) (*moderation.Report, error) { 190 + var r moderation.Report 191 + var createdAtStr string 192 + var resolvedAtStr sql.NullString 193 + err := s.db.QueryRowContext(ctx, ` 194 + SELECT id, subject_uri, subject_did, reporter_did, reason, created_at, status, resolved_by, resolved_at 195 + FROM moderation_reports WHERE id = ? 196 + `, id).Scan(&r.ID, &r.SubjectURI, &r.SubjectDID, &r.ReporterDID, &r.Reason, 197 + &createdAtStr, &r.Status, &r.ResolvedBy, &resolvedAtStr) 198 + if err == sql.ErrNoRows { 199 + return nil, nil 200 + } 201 + if err != nil { 202 + return nil, err 203 + } 204 + r.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 205 + if resolvedAtStr.Valid { 206 + t, _ := time.Parse(time.RFC3339Nano, resolvedAtStr.String) 207 + r.ResolvedAt = &t 208 + } 209 + return &r, nil 210 + } 211 + 212 + func (s *ModerationStore) ListPendingReports(ctx context.Context) ([]moderation.Report, error) { 213 + return s.listReports(ctx, `WHERE status = 'pending' ORDER BY created_at DESC`) 214 + } 215 + 216 + func (s *ModerationStore) ListAllReports(ctx context.Context) ([]moderation.Report, error) { 217 + return s.listReports(ctx, `ORDER BY created_at DESC`) 218 + } 219 + 220 + func (s *ModerationStore) listReports(ctx context.Context, clause string) ([]moderation.Report, error) { 221 + rows, err := s.db.QueryContext(ctx, ` 222 + SELECT id, subject_uri, subject_did, reporter_did, reason, created_at, status, resolved_by, resolved_at 223 + FROM moderation_reports `+clause) 224 + if err != nil { 225 + return nil, err 226 + } 227 + defer rows.Close() 228 + return scanReports(rows) 229 + } 230 + 231 + func scanReports(rows *sql.Rows) ([]moderation.Report, error) { 232 + var reports []moderation.Report 233 + for rows.Next() { 234 + var r moderation.Report 235 + var createdAtStr string 236 + var resolvedAtStr sql.NullString 237 + if err := rows.Scan(&r.ID, &r.SubjectURI, &r.SubjectDID, &r.ReporterDID, &r.Reason, 238 + &createdAtStr, &r.Status, &r.ResolvedBy, &resolvedAtStr); err != nil { 239 + continue 240 + } 241 + r.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 242 + if resolvedAtStr.Valid { 243 + t, _ := time.Parse(time.RFC3339Nano, resolvedAtStr.String) 244 + r.ResolvedAt = &t 245 + } 246 + reports = append(reports, r) 247 + } 248 + return reports, rows.Err() 249 + } 250 + 251 + func (s *ModerationStore) ResolveReport(ctx context.Context, id string, status moderation.ReportStatus, resolvedBy string) error { 252 + now := time.Now().Format(time.RFC3339Nano) 253 + res, err := s.db.ExecContext(ctx, ` 254 + UPDATE moderation_reports SET status = ?, resolved_by = ?, resolved_at = ? WHERE id = ? 255 + `, string(status), resolvedBy, now, id) 256 + if err != nil { 257 + return fmt.Errorf("resolve report: %w", err) 258 + } 259 + n, _ := res.RowsAffected() 260 + if n == 0 { 261 + return fmt.Errorf("report not found: %s", id) 262 + } 263 + return nil 264 + } 265 + 266 + func (s *ModerationStore) CountReportsForURI(ctx context.Context, atURI string) (int, error) { 267 + var count int 268 + err := s.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM moderation_reports WHERE subject_uri = ?`, atURI).Scan(&count) 269 + return count, err 270 + } 271 + 272 + func (s *ModerationStore) CountReportsForDID(ctx context.Context, did string) (int, error) { 273 + var count int 274 + err := s.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM moderation_reports WHERE subject_did = ?`, did).Scan(&count) 275 + return count, err 276 + } 277 + 278 + func (s *ModerationStore) CountReportsForDIDSince(ctx context.Context, did string, since time.Time) (int, error) { 279 + var count int 280 + err := s.db.QueryRowContext(ctx, ` 281 + SELECT COUNT(*) FROM moderation_reports WHERE subject_did = ? AND created_at > ? 282 + `, did, since.Format(time.RFC3339Nano)).Scan(&count) 283 + return count, err 284 + } 285 + 286 + func (s *ModerationStore) HasReportedURI(ctx context.Context, reporterDID, subjectURI string) (bool, error) { 287 + var exists int 288 + err := s.db.QueryRowContext(ctx, ` 289 + SELECT 1 FROM moderation_reports WHERE reporter_did = ? AND subject_uri = ? LIMIT 1 290 + `, reporterDID, subjectURI).Scan(&exists) 291 + if err == sql.ErrNoRows { 292 + return false, nil 293 + } 294 + return exists == 1, err 295 + } 296 + 297 + func (s *ModerationStore) CountReportsFromUserSince(ctx context.Context, reporterDID string, since time.Time) (int, error) { 298 + var count int 299 + err := s.db.QueryRowContext(ctx, ` 300 + SELECT COUNT(*) FROM moderation_reports WHERE reporter_did = ? AND created_at > ? 301 + `, reporterDID, since.Format(time.RFC3339Nano)).Scan(&count) 302 + return count, err 303 + } 304 + 305 + // ========== Audit Log ========== 306 + 307 + func (s *ModerationStore) LogAction(ctx context.Context, entry moderation.AuditEntry) error { 308 + details, err := json.Marshal(entry.Details) 309 + if err != nil { 310 + details = []byte("{}") 311 + } 312 + autoMod := 0 313 + if entry.AutoMod { 314 + autoMod = 1 315 + } 316 + _, err = s.db.ExecContext(ctx, ` 317 + INSERT INTO moderation_audit_log (id, action, actor_did, target_uri, reason, details, timestamp, auto_mod) 318 + VALUES (?, ?, ?, ?, ?, ?, ?, ?) 319 + `, entry.ID, string(entry.Action), entry.ActorDID, entry.TargetURI, entry.Reason, 320 + string(details), entry.Timestamp.Format(time.RFC3339Nano), autoMod) 321 + if err != nil { 322 + return fmt.Errorf("log action: %w", err) 323 + } 324 + return nil 325 + } 326 + 327 + func (s *ModerationStore) ListAuditLog(ctx context.Context, limit int) ([]moderation.AuditEntry, error) { 328 + rows, err := s.db.QueryContext(ctx, ` 329 + SELECT id, action, actor_did, target_uri, reason, details, timestamp, auto_mod 330 + FROM moderation_audit_log ORDER BY timestamp DESC LIMIT ? 331 + `, limit) 332 + if err != nil { 333 + return nil, err 334 + } 335 + defer rows.Close() 336 + 337 + var entries []moderation.AuditEntry 338 + for rows.Next() { 339 + var e moderation.AuditEntry 340 + var timestampStr, detailsStr string 341 + var autoMod int 342 + if err := rows.Scan(&e.ID, &e.Action, &e.ActorDID, &e.TargetURI, &e.Reason, 343 + &detailsStr, &timestampStr, &autoMod); err != nil { 344 + continue 345 + } 346 + e.Timestamp, _ = time.Parse(time.RFC3339Nano, timestampStr) 347 + e.AutoMod = autoMod == 1 348 + _ = json.Unmarshal([]byte(detailsStr), &e.Details) 349 + entries = append(entries, e) 350 + } 351 + return entries, rows.Err() 352 + } 353 + 354 + // ========== Auto-hide Resets ========== 355 + 356 + func (s *ModerationStore) SetAutoHideReset(ctx context.Context, did string, resetAt time.Time) error { 357 + _, err := s.db.ExecContext(ctx, ` 358 + INSERT INTO moderation_autohide_resets (did, reset_at) VALUES (?, ?) 359 + ON CONFLICT(did) DO UPDATE SET reset_at = excluded.reset_at 360 + `, did, resetAt.Format(time.RFC3339Nano)) 361 + if err != nil { 362 + return fmt.Errorf("set autohide reset: %w", err) 363 + } 364 + return nil 365 + } 366 + 367 + func (s *ModerationStore) GetAutoHideReset(ctx context.Context, did string) (time.Time, error) { 368 + var resetAtStr string 369 + err := s.db.QueryRowContext(ctx, `SELECT reset_at FROM moderation_autohide_resets WHERE did = ?`, did).Scan(&resetAtStr) 370 + if err == sql.ErrNoRows { 371 + return time.Time{}, nil 372 + } 373 + if err != nil { 374 + return time.Time{}, err 375 + } 376 + t, _ := time.Parse(time.RFC3339Nano, resetAtStr) 377 + return t, nil 378 + }
+2 -2
internal/firehose/config.go
··· 1 1 // Package firehose provides real-time AT Protocol event consumption via Jetstream. 2 - // It indexes Arabica records into a local BoltDB database for fast feed queries. 2 + // It indexes Arabica records into a local SQLite database for fast feed queries. 3 3 package firehose 4 4 5 5 import ( ··· 36 36 // Compress enables zstd compression (~56% bandwidth reduction) 37 37 Compress bool 38 38 39 - // IndexPath is the path to the BoltDB feed index database 39 + // IndexPath is the path to the SQLite feed index database 40 40 IndexPath string 41 41 42 42 // ProfileCacheTTL is how long to cache profile data
+479 -890
internal/firehose/index.go
··· 1 1 package firehose 2 2 3 3 import ( 4 - "bytes" 5 4 "context" 6 - "encoding/binary" 7 - "encoding/hex" 5 + "database/sql" 8 6 "encoding/json" 9 7 "fmt" 10 8 "os" ··· 19 17 "arabica/internal/models" 20 18 21 19 "github.com/rs/zerolog/log" 22 - bolt "go.etcd.io/bbolt" 23 - ) 24 - 25 - // Bucket names for the feed index 26 - var ( 27 - // BucketRecords stores full record data: {at-uri} -> {IndexedRecord JSON} 28 - BucketRecords = []byte("records") 29 - 30 - // BucketByTime stores records by timestamp for chronological queries: {timestamp:at-uri} -> {} 31 - BucketByTime = []byte("by_time") 32 - 33 - // BucketByDID stores records by DID for user-specific queries: {did:at-uri} -> {} 34 - BucketByDID = []byte("by_did") 35 - 36 - // BucketByCollection stores records by type: {collection:timestamp:at-uri} -> {} 37 - BucketByCollection = []byte("by_collection") 38 - 39 - // BucketProfiles stores cached profile data: {did} -> {CachedProfile JSON} 40 - BucketProfiles = []byte("profiles") 41 - 42 - // BucketMeta stores metadata like cursor position: {key} -> {value} 43 - BucketMeta = []byte("meta") 44 - 45 - // BucketKnownDIDs stores all DIDs we've seen with Arabica records 46 - BucketKnownDIDs = []byte("known_dids") 47 - 48 - // BucketBackfilled stores DIDs that have been backfilled: {did} -> {timestamp} 49 - BucketBackfilled = []byte("backfilled") 50 - 51 - // BucketLikes stores like mappings: {subject_uri:actor_did} -> {rkey} 52 - BucketLikes = []byte("likes") 53 - 54 - // BucketLikeCounts stores aggregated like counts: {subject_uri} -> {uint64 count} 55 - BucketLikeCounts = []byte("like_counts") 56 - 57 - // BucketLikesByActor stores likes by actor for lookup: {actor_did:subject_uri} -> {rkey} 58 - BucketLikesByActor = []byte("likes_by_actor") 59 - 60 - // BucketComments stores comment data: {subject_uri:timestamp:actor_did} -> {comment JSON} 61 - BucketComments = []byte("comments") 62 - 63 - // BucketCommentCounts stores aggregated comment counts: {subject_uri} -> {uint64 count} 64 - BucketCommentCounts = []byte("comment_counts") 65 - 66 - // BucketCommentsByActor stores comments by actor for lookup: {actor_did:rkey} -> {subject_uri} 67 - BucketCommentsByActor = []byte("comments_by_actor") 68 - 69 - // BucketCommentChildren stores parent-child relationships: {parent_uri:child_rkey} -> {child_actor_did} 70 - BucketCommentChildren = []byte("comment_children") 20 + _ "modernc.org/sqlite" 71 21 ) 72 22 73 23 // FeedableRecordTypes are the record types that should appear as feed items. ··· 89 39 Record json.RawMessage `json:"record"` 90 40 CID string `json:"cid"` 91 41 IndexedAt time.Time `json:"indexed_at"` 92 - CreatedAt time.Time `json:"created_at"` // Parsed from record 42 + CreatedAt time.Time `json:"created_at"` 93 43 } 94 44 95 45 // CachedProfile stores profile data with TTL ··· 101 51 102 52 // FeedIndex provides persistent storage for firehose events 103 53 type FeedIndex struct { 104 - db *bolt.DB 54 + db *sql.DB 105 55 publicClient *atproto.PublicClient 106 56 profileTTL time.Duration 107 57 ··· 123 73 124 74 // FeedQuery specifies filtering, sorting, and pagination for feed queries 125 75 type FeedQuery struct { 126 - Limit int // Max items to return 127 - Cursor string // Opaque cursor for pagination (base64-encoded time key) 128 - TypeFilter lexicons.RecordType // Filter to a specific record type (empty = all) 129 - Sort FeedSort // Sort order (default: recent) 76 + Limit int // Max items to return 77 + Cursor string // Opaque cursor for pagination (created_at|uri) 78 + TypeFilter lexicons.RecordType // Filter to a specific record type (empty = all) 79 + Sort FeedSort // Sort order (default: recent) 130 80 } 131 81 132 82 // FeedResult contains feed items plus pagination info ··· 135 85 NextCursor string // Empty if no more results 136 86 } 137 87 138 - // NewFeedIndex creates a new feed index backed by BoltDB 88 + const schemaNoTrailingPragma = ` 89 + CREATE TABLE IF NOT EXISTS records ( 90 + uri TEXT PRIMARY KEY, 91 + did TEXT NOT NULL, 92 + collection TEXT NOT NULL, 93 + rkey TEXT NOT NULL, 94 + record TEXT NOT NULL, 95 + cid TEXT NOT NULL DEFAULT '', 96 + indexed_at TEXT NOT NULL, 97 + created_at TEXT NOT NULL 98 + ); 99 + CREATE INDEX IF NOT EXISTS idx_records_created ON records(created_at DESC); 100 + CREATE INDEX IF NOT EXISTS idx_records_did ON records(did); 101 + CREATE INDEX IF NOT EXISTS idx_records_coll_created ON records(collection, created_at DESC); 102 + 103 + CREATE TABLE IF NOT EXISTS meta ( 104 + key TEXT PRIMARY KEY, 105 + value BLOB 106 + ); 107 + 108 + CREATE TABLE IF NOT EXISTS known_dids (did TEXT PRIMARY KEY); 109 + CREATE TABLE IF NOT EXISTS backfilled (did TEXT PRIMARY KEY, backfilled_at TEXT NOT NULL); 110 + 111 + CREATE TABLE IF NOT EXISTS profiles ( 112 + did TEXT PRIMARY KEY, 113 + data TEXT NOT NULL, 114 + expires_at TEXT NOT NULL 115 + ); 116 + 117 + CREATE TABLE IF NOT EXISTS likes ( 118 + subject_uri TEXT NOT NULL, 119 + actor_did TEXT NOT NULL, 120 + rkey TEXT NOT NULL, 121 + PRIMARY KEY (subject_uri, actor_did) 122 + ); 123 + CREATE INDEX IF NOT EXISTS idx_likes_actor ON likes(actor_did, subject_uri); 124 + 125 + CREATE TABLE IF NOT EXISTS comments ( 126 + actor_did TEXT NOT NULL, 127 + rkey TEXT NOT NULL, 128 + subject_uri TEXT NOT NULL, 129 + parent_uri TEXT NOT NULL DEFAULT '', 130 + parent_rkey TEXT NOT NULL DEFAULT '', 131 + cid TEXT NOT NULL DEFAULT '', 132 + text TEXT NOT NULL, 133 + created_at TEXT NOT NULL, 134 + PRIMARY KEY (actor_did, rkey) 135 + ); 136 + CREATE INDEX IF NOT EXISTS idx_comments_subject ON comments(subject_uri, created_at); 137 + 138 + CREATE TABLE IF NOT EXISTS notifications ( 139 + id TEXT NOT NULL, 140 + target_did TEXT NOT NULL, 141 + type TEXT NOT NULL, 142 + actor_did TEXT NOT NULL, 143 + subject_uri TEXT NOT NULL, 144 + created_at TEXT NOT NULL 145 + ); 146 + CREATE INDEX IF NOT EXISTS idx_notif_target ON notifications(target_did, created_at DESC); 147 + CREATE UNIQUE INDEX IF NOT EXISTS idx_notif_dedup ON notifications(target_did, type, actor_did, subject_uri); 148 + 149 + CREATE TABLE IF NOT EXISTS notifications_meta ( 150 + target_did TEXT PRIMARY KEY, 151 + last_read TEXT NOT NULL 152 + ); 153 + 154 + CREATE TABLE IF NOT EXISTS moderation_hidden_records ( 155 + uri TEXT PRIMARY KEY, 156 + hidden_at TEXT NOT NULL, 157 + hidden_by TEXT NOT NULL, 158 + reason TEXT NOT NULL DEFAULT '', 159 + auto_hidden INTEGER NOT NULL DEFAULT 0 160 + ); 161 + 162 + CREATE TABLE IF NOT EXISTS moderation_blacklist ( 163 + did TEXT PRIMARY KEY, 164 + blacklisted_at TEXT NOT NULL, 165 + blacklisted_by TEXT NOT NULL, 166 + reason TEXT NOT NULL DEFAULT '' 167 + ); 168 + 169 + CREATE TABLE IF NOT EXISTS moderation_reports ( 170 + id TEXT PRIMARY KEY, 171 + subject_uri TEXT NOT NULL DEFAULT '', 172 + subject_did TEXT NOT NULL DEFAULT '', 173 + reporter_did TEXT NOT NULL, 174 + reason TEXT NOT NULL, 175 + created_at TEXT NOT NULL, 176 + status TEXT NOT NULL DEFAULT 'pending', 177 + resolved_by TEXT NOT NULL DEFAULT '', 178 + resolved_at TEXT 179 + ); 180 + CREATE INDEX IF NOT EXISTS idx_modreports_uri ON moderation_reports(subject_uri); 181 + CREATE INDEX IF NOT EXISTS idx_modreports_did ON moderation_reports(subject_did); 182 + CREATE INDEX IF NOT EXISTS idx_modreports_reporter ON moderation_reports(reporter_did, created_at); 183 + CREATE INDEX IF NOT EXISTS idx_modreports_status ON moderation_reports(status); 184 + 185 + CREATE TABLE IF NOT EXISTS moderation_audit_log ( 186 + id TEXT PRIMARY KEY, 187 + action TEXT NOT NULL, 188 + actor_did TEXT NOT NULL, 189 + target_uri TEXT NOT NULL DEFAULT '', 190 + reason TEXT NOT NULL DEFAULT '', 191 + details TEXT NOT NULL DEFAULT '{}', 192 + timestamp TEXT NOT NULL, 193 + auto_mod INTEGER NOT NULL DEFAULT 0 194 + ); 195 + CREATE INDEX IF NOT EXISTS idx_modaudit_ts ON moderation_audit_log(timestamp DESC); 196 + 197 + CREATE TABLE IF NOT EXISTS moderation_autohide_resets ( 198 + did TEXT PRIMARY KEY, 199 + reset_at TEXT NOT NULL 200 + ); 201 + ` 202 + 203 + // NewFeedIndex creates a new feed index backed by SQLite 139 204 func NewFeedIndex(path string, profileTTL time.Duration) (*FeedIndex, error) { 140 205 if path == "" { 141 206 return nil, fmt.Errorf("index path is required") ··· 149 214 } 150 215 } 151 216 152 - db, err := bolt.Open(path, 0600, &bolt.Options{ 153 - Timeout: 5 * time.Second, 154 - }) 217 + db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=foreign_keys(ON)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(134217728)&_pragma=cache_size(-65536)") 155 218 if err != nil { 156 219 return nil, fmt.Errorf("failed to open index database: %w", err) 157 220 } 158 221 159 - // Create buckets 160 - err = db.Update(func(tx *bolt.Tx) error { 161 - buckets := [][]byte{ 162 - BucketRecords, 163 - BucketByTime, 164 - BucketByDID, 165 - BucketByCollection, 166 - BucketProfiles, 167 - BucketMeta, 168 - BucketKnownDIDs, 169 - BucketBackfilled, 170 - BucketLikes, 171 - BucketLikeCounts, 172 - BucketLikesByActor, 173 - BucketComments, 174 - BucketCommentCounts, 175 - BucketCommentsByActor, 176 - BucketCommentChildren, 177 - BucketNotifications, 178 - BucketNotificationsMeta, 179 - } 180 - for _, bucket := range buckets { 181 - if _, err := tx.CreateBucketIfNotExists(bucket); err != nil { 182 - return fmt.Errorf("failed to create bucket %s: %w", bucket, err) 183 - } 184 - } 185 - return nil 186 - }) 187 - if err != nil { 222 + // WAL mode allows concurrent reads with a single writer. 223 + // Allow multiple reader connections but limit to avoid file descriptor exhaustion. 224 + db.SetMaxOpenConns(4) 225 + db.SetMaxIdleConns(4) 226 + 227 + // Execute schema (skip PRAGMAs — already set via DSN) 228 + if _, err := db.Exec(schemaNoTrailingPragma); err != nil { 188 229 _ = db.Close() 189 - return nil, err 230 + return nil, fmt.Errorf("failed to initialize schema: %w", err) 190 231 } 191 232 192 233 idx := &FeedIndex{ ··· 199 240 return idx, nil 200 241 } 201 242 243 + // DB returns the underlying database connection for shared use by other stores. 244 + func (idx *FeedIndex) DB() *sql.DB { 245 + return idx.db 246 + } 247 + 202 248 // Close closes the index database 203 249 func (idx *FeedIndex) Close() error { 204 250 if idx.db != nil { ··· 224 270 // GetCursor returns the last processed cursor (microseconds timestamp) 225 271 func (idx *FeedIndex) GetCursor() (int64, error) { 226 272 var cursor int64 227 - err := idx.db.View(func(tx *bolt.Tx) error { 228 - b := tx.Bucket(BucketMeta) 229 - v := b.Get([]byte("cursor")) 230 - if len(v) == 8 { 231 - cursor = int64(binary.BigEndian.Uint64(v)) 232 - } 233 - return nil 234 - }) 273 + err := idx.db.QueryRow(`SELECT value FROM meta WHERE key = 'cursor'`).Scan(&cursor) 274 + if err == sql.ErrNoRows { 275 + return 0, nil 276 + } 235 277 return cursor, err 236 278 } 237 279 238 280 // SetCursor stores the cursor position 239 281 func (idx *FeedIndex) SetCursor(cursor int64) error { 240 - return idx.db.Update(func(tx *bolt.Tx) error { 241 - b := tx.Bucket(BucketMeta) 242 - buf := make([]byte, 8) 243 - binary.BigEndian.PutUint64(buf, uint64(cursor)) 244 - return b.Put([]byte("cursor"), buf) 245 - }) 282 + _, err := idx.db.Exec(`INSERT OR REPLACE INTO meta (key, value) VALUES ('cursor', ?)`, cursor) 283 + return err 246 284 } 247 285 248 286 // UpsertRecord adds or updates a record in the index ··· 260 298 } 261 299 } 262 300 263 - indexed := &IndexedRecord{ 264 - URI: uri, 265 - DID: did, 266 - Collection: collection, 267 - RKey: rkey, 268 - Record: record, 269 - CID: cid, 270 - IndexedAt: time.Now(), 271 - CreatedAt: createdAt, 301 + now := time.Now() 302 + 303 + _, err := idx.db.Exec(` 304 + INSERT INTO records (uri, did, collection, rkey, record, cid, indexed_at, created_at) 305 + VALUES (?, ?, ?, ?, ?, ?, ?, ?) 306 + ON CONFLICT(uri) DO UPDATE SET 307 + record = excluded.record, 308 + cid = excluded.cid, 309 + indexed_at = excluded.indexed_at, 310 + created_at = excluded.created_at 311 + `, uri, did, collection, rkey, string(record), cid, 312 + now.Format(time.RFC3339Nano), createdAt.Format(time.RFC3339Nano)) 313 + if err != nil { 314 + return fmt.Errorf("failed to upsert record: %w", err) 272 315 } 273 316 274 - data, err := json.Marshal(indexed) 317 + // Track known DID 318 + _, err = idx.db.Exec(`INSERT OR IGNORE INTO known_dids (did) VALUES (?)`, did) 275 319 if err != nil { 276 - return fmt.Errorf("failed to marshal record: %w", err) 320 + return fmt.Errorf("failed to track known DID: %w", err) 277 321 } 278 322 279 - return idx.db.Update(func(tx *bolt.Tx) error { 280 - // Store the record 281 - records := tx.Bucket(BucketRecords) 282 - if err := records.Put([]byte(uri), data); err != nil { 283 - return err 284 - } 285 - 286 - // Index by time (use createdAt for sorting, not event time) 287 - byTime := tx.Bucket(BucketByTime) 288 - timeKey := makeTimeKey(createdAt, uri) 289 - if err := byTime.Put(timeKey, nil); err != nil { 290 - return err 291 - } 292 - 293 - // Index by DID 294 - byDID := tx.Bucket(BucketByDID) 295 - didKey := []byte(did + ":" + uri) 296 - if err := byDID.Put(didKey, nil); err != nil { 297 - return err 298 - } 299 - 300 - // Index by collection 301 - byCollection := tx.Bucket(BucketByCollection) 302 - collKey := []byte(collection + ":" + string(timeKey)) 303 - if err := byCollection.Put(collKey, nil); err != nil { 304 - return err 305 - } 306 - 307 - // Track known DID 308 - knownDIDs := tx.Bucket(BucketKnownDIDs) 309 - if err := knownDIDs.Put([]byte(did), []byte("1")); err != nil { 310 - return err 311 - } 312 - 313 - return nil 314 - }) 323 + return nil 315 324 } 316 325 317 326 // DeleteRecord removes a record from the index 318 327 func (idx *FeedIndex) DeleteRecord(did, collection, rkey string) error { 319 328 uri := atproto.BuildATURI(did, collection, rkey) 329 + _, err := idx.db.Exec(`DELETE FROM records WHERE uri = ?`, uri) 330 + return err 331 + } 320 332 321 - return idx.db.Update(func(tx *bolt.Tx) error { 322 - // Get the existing record to find its timestamp 323 - records := tx.Bucket(BucketRecords) 324 - existingData := records.Get([]byte(uri)) 325 - if existingData == nil { 326 - // Record doesn't exist, nothing to delete 327 - return nil 328 - } 333 + // GetRecord retrieves a single record by URI 334 + func (idx *FeedIndex) GetRecord(uri string) (*IndexedRecord, error) { 335 + var rec IndexedRecord 336 + var recordStr, indexedAtStr, createdAtStr string 329 337 330 - var existing IndexedRecord 331 - if err := json.Unmarshal(existingData, &existing); err != nil { 332 - // Can't parse, just delete the main record 333 - return records.Delete([]byte(uri)) 334 - } 338 + err := idx.db.QueryRow(` 339 + SELECT uri, did, collection, rkey, record, cid, indexed_at, created_at 340 + FROM records WHERE uri = ? 341 + `, uri).Scan(&rec.URI, &rec.DID, &rec.Collection, &rec.RKey, 342 + &recordStr, &rec.CID, &indexedAtStr, &createdAtStr) 343 + if err == sql.ErrNoRows { 344 + return nil, nil 345 + } 346 + if err != nil { 347 + return nil, err 348 + } 335 349 336 - // Delete from records 337 - if err := records.Delete([]byte(uri)); err != nil { 338 - return err 339 - } 340 - 341 - // Delete from by_time index 342 - byTime := tx.Bucket(BucketByTime) 343 - timeKey := makeTimeKey(existing.CreatedAt, uri) 344 - if err := byTime.Delete(timeKey); err != nil { 345 - return err 346 - } 350 + rec.Record = json.RawMessage(recordStr) 351 + rec.IndexedAt, _ = time.Parse(time.RFC3339Nano, indexedAtStr) 352 + rec.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 347 353 348 - // Delete from by_did index 349 - byDID := tx.Bucket(BucketByDID) 350 - didKey := []byte(did + ":" + uri) 351 - if err := byDID.Delete(didKey); err != nil { 352 - return err 353 - } 354 - 355 - // Delete from by_collection index 356 - byCollection := tx.Bucket(BucketByCollection) 357 - collKey := []byte(collection + ":" + string(timeKey)) 358 - if err := byCollection.Delete(collKey); err != nil { 359 - return err 360 - } 361 - 362 - return nil 363 - }) 364 - } 365 - 366 - // GetRecord retrieves a single record by URI 367 - func (idx *FeedIndex) GetRecord(uri string) (*IndexedRecord, error) { 368 - var record *IndexedRecord 369 - err := idx.db.View(func(tx *bolt.Tx) error { 370 - b := tx.Bucket(BucketRecords) 371 - data := b.Get([]byte(uri)) 372 - if data == nil { 373 - return nil 374 - } 375 - record = &IndexedRecord{} 376 - return json.Unmarshal(data, record) 377 - }) 378 - return record, err 354 + return &rec, nil 379 355 } 380 356 381 357 // FeedItem represents an item in the feed (matches feed.FeedItem structure) ··· 404 380 405 381 // GetRecentFeed returns recent feed items from the index 406 382 func (idx *FeedIndex) GetRecentFeed(ctx context.Context, limit int) ([]*FeedItem, error) { 407 - var records []*IndexedRecord 408 - err := idx.db.View(func(tx *bolt.Tx) error { 409 - byTime := tx.Bucket(BucketByTime) 410 - recordsBucket := tx.Bucket(BucketRecords) 411 - 412 - c := byTime.Cursor() 413 - 414 - // Iterate in reverse (newest first) 415 - count := 0 416 - for k, _ := c.First(); k != nil && count < limit*2; k, _ = c.Next() { 417 - // Extract URI from key (format: timestamp:uri) 418 - uri := extractURIFromTimeKey(k) 419 - if uri == "" { 420 - continue 421 - } 422 - 423 - data := recordsBucket.Get([]byte(uri)) 424 - if data == nil { 425 - continue 426 - } 427 - 428 - var record IndexedRecord 429 - if err := json.Unmarshal(data, &record); err != nil { 430 - continue 431 - } 432 - 433 - records = append(records, &record) 434 - count++ 435 - } 436 - 437 - return nil 438 - }) 439 - if err != nil { 440 - return nil, err 441 - } 442 - 443 - // Build lookup maps for reference resolution 444 - recordsByURI := make(map[string]*IndexedRecord) 445 - for _, r := range records { 446 - recordsByURI[r.URI] = r 447 - } 448 - 449 - // Also load additional records we might need for references 450 - err = idx.db.View(func(tx *bolt.Tx) error { 451 - recordsBucket := tx.Bucket(BucketRecords) 452 - return recordsBucket.ForEach(func(k, v []byte) error { 453 - uri := string(k) 454 - if _, exists := recordsByURI[uri]; exists { 455 - return nil 456 - } 457 - var record IndexedRecord 458 - if err := json.Unmarshal(v, &record); err != nil { 459 - return nil 460 - } 461 - // Only load beans, roasters, grinders, brewers for reference resolution 462 - switch record.Collection { 463 - case atproto.NSIDBean, atproto.NSIDRoaster, atproto.NSIDGrinder, atproto.NSIDBrewer: 464 - recordsByURI[uri] = &record 465 - } 466 - return nil 467 - }) 468 - }) 469 - if err != nil { 470 - return nil, err 471 - } 472 - 473 - // Convert to FeedItems 474 - items := make([]*FeedItem, 0, len(records)) 475 - for _, record := range records { 476 - // Skip likes - they're indexed for like counts but not displayed as feed items 477 - if record.Collection == atproto.NSIDLike { 478 - continue 479 - } 480 - 481 - item, err := idx.recordToFeedItem(ctx, record, recordsByURI) 482 - if err != nil { 483 - log.Warn().Err(err).Str("uri", record.URI).Msg("failed to convert record to feed item") 484 - continue 485 - } 486 - if !FeedableRecordTypes[item.RecordType] { 487 - continue 488 - } 489 - items = append(items, item) 490 - } 491 - 492 - // Sort by timestamp descending 493 - sort.Slice(items, func(i, j int) bool { 494 - return items[i].Timestamp.After(items[j].Timestamp) 495 - }) 496 - 497 - // Apply limit 498 - if len(items) > limit { 499 - items = items[:limit] 500 - } 501 - 502 - return items, nil 383 + return idx.getFeedItems(ctx, "", limit, "") 503 384 } 504 385 505 386 // recordTypeToNSID maps a lexicons.RecordType to its NSID collection string ··· 511 392 lexicons.RecordTypeBrewer: atproto.NSIDBrewer, 512 393 } 513 394 395 + // feedableCollections is the set of collection NSIDs that appear in the feed 396 + var feedableCollections = func() []string { 397 + out := make([]string, 0, len(recordTypeToNSID)) 398 + for _, nsid := range recordTypeToNSID { 399 + out = append(out, nsid) 400 + } 401 + return out 402 + }() 403 + 514 404 // GetFeedWithQuery returns feed items matching the given query with cursor-based pagination 515 405 func (idx *FeedIndex) GetFeedWithQuery(ctx context.Context, q FeedQuery) (*FeedResult, error) { 516 406 if q.Limit <= 0 { ··· 520 410 q.Sort = FeedSortRecent 521 411 } 522 412 523 - // For type-filtered queries, use BucketByCollection for efficiency 524 - // For unfiltered queries, use BucketByTime 525 - var records []*IndexedRecord 526 - var lastTimeKey []byte 527 - 528 - // Decode cursor if provided 529 - var cursorBytes []byte 530 - if q.Cursor != "" { 531 - var err error 532 - cursorBytes, err = decodeCursor(q.Cursor) 533 - if err != nil { 534 - return nil, fmt.Errorf("invalid cursor: %w", err) 413 + var collectionFilter string 414 + if q.TypeFilter != "" { 415 + nsid, ok := recordTypeToNSID[q.TypeFilter] 416 + if !ok { 417 + return nil, fmt.Errorf("unknown record type: %s", q.TypeFilter) 535 418 } 419 + collectionFilter = nsid 536 420 } 537 421 538 - // Fetch more than needed to account for filtering 539 - fetchLimit := q.Limit + 10 540 - 541 - err := idx.db.View(func(tx *bolt.Tx) error { 542 - recordsBucket := tx.Bucket(BucketRecords) 422 + items, err := idx.getFeedItems(ctx, collectionFilter, q.Limit+1, q.Cursor) 423 + if err != nil { 424 + return nil, err 425 + } 543 426 544 - if q.TypeFilter != "" { 545 - // Use BucketByCollection for filtered queries 546 - nsid, ok := recordTypeToNSID[q.TypeFilter] 547 - if !ok { 548 - return fmt.Errorf("unknown record type: %s", q.TypeFilter) 427 + // Sort based on query 428 + if q.Sort == FeedSortPopular { 429 + sort.Slice(items, func(i, j int) bool { 430 + scoreI := items[i].LikeCount*3 + items[i].CommentCount*2 431 + scoreJ := items[j].LikeCount*3 + items[j].CommentCount*2 432 + if scoreI != scoreJ { 433 + return scoreI > scoreJ 549 434 } 435 + return items[i].Timestamp.After(items[j].Timestamp) 436 + }) 437 + } 550 438 551 - byCollection := tx.Bucket(BucketByCollection) 552 - c := byCollection.Cursor() 439 + result := &FeedResult{Items: items} 440 + if len(items) > q.Limit { 441 + result.Items = items[:q.Limit] 442 + last := result.Items[q.Limit-1] 443 + result.NextCursor = last.Timestamp.Format(time.RFC3339Nano) + "|" + last.SubjectURI 444 + } 553 445 554 - // Collection keys: {collection}:{inverted_timestamp}:{uri} 555 - prefix := []byte(nsid + ":") 446 + return result, nil 447 + } 556 448 557 - var k []byte 558 - if cursorBytes != nil { 559 - // Seek to cursor position (cursor is the full collection key) 560 - k, _ = c.Seek(cursorBytes) 561 - // Skip the cursor key itself (it was the last item of previous page) 562 - if k != nil && string(k) == string(cursorBytes) { 563 - k, _ = c.Next() 564 - } 565 - } else { 566 - k, _ = c.Seek(prefix) 567 - } 449 + // getFeedItems fetches records from SQLite, resolves references, and returns FeedItems. 450 + func (idx *FeedIndex) getFeedItems(ctx context.Context, collectionFilter string, limit int, cursor string) ([]*FeedItem, error) { 451 + // Build query for feedable records 452 + var args []any 453 + query := `SELECT uri, did, collection, rkey, record, cid, indexed_at, created_at FROM records WHERE ` 568 454 569 - count := 0 570 - for ; k != nil && count < fetchLimit; k, _ = c.Next() { 571 - if !bytes.HasPrefix(k, prefix) { 572 - break 573 - } 455 + if collectionFilter != "" { 456 + query += `collection = ? ` 457 + args = append(args, collectionFilter) 458 + } else { 459 + // Only feedable collections 460 + placeholders := make([]string, len(feedableCollections)) 461 + for i, c := range feedableCollections { 462 + placeholders[i] = "?" 463 + args = append(args, c) 464 + } 465 + query += `collection IN (` + strings.Join(placeholders, ",") + `) ` 466 + } 574 467 575 - // Extract URI from collection key: {collection}:{timestamp_bytes}:{uri} 576 - uri := extractURIFromCollectionKey(k, nsid) 577 - if uri == "" { 578 - continue 579 - } 468 + // Cursor-based pagination: cursor format is "created_at|uri" 469 + if cursor != "" { 470 + parts := strings.SplitN(cursor, "|", 2) 471 + if len(parts) == 2 { 472 + query += `AND (created_at < ? OR (created_at = ? AND uri < ?)) ` 473 + args = append(args, parts[0], parts[0], parts[1]) 474 + } 475 + } 580 476 581 - data := recordsBucket.Get([]byte(uri)) 582 - if data == nil { 583 - continue 584 - } 477 + query += `ORDER BY created_at DESC LIMIT ?` 478 + args = append(args, limit) 585 479 586 - var record IndexedRecord 587 - if err := json.Unmarshal(data, &record); err != nil { 588 - continue 589 - } 590 - records = append(records, &record) 591 - lastTimeKey = make([]byte, len(k)) 592 - copy(lastTimeKey, k) 593 - count++ 594 - } 595 - } else { 596 - // Use BucketByTime for unfiltered queries 597 - byTime := tx.Bucket(BucketByTime) 598 - c := byTime.Cursor() 480 + rows, err := idx.db.QueryContext(ctx, query, args...) 481 + if err != nil { 482 + return nil, err 483 + } 484 + defer rows.Close() 599 485 600 - var k []byte 601 - if cursorBytes != nil { 602 - k, _ = c.Seek(cursorBytes) 603 - if k != nil && string(k) == string(cursorBytes) { 604 - k, _ = c.Next() 605 - } 606 - } else { 607 - k, _ = c.First() 608 - } 486 + var records []*IndexedRecord 487 + refURIs := make(map[string]bool) // URIs we need to resolve 609 488 610 - count := 0 611 - for ; k != nil && count < fetchLimit; k, _ = c.Next() { 612 - uri := extractURIFromTimeKey(k) 613 - if uri == "" { 614 - continue 615 - } 489 + for rows.Next() { 490 + var rec IndexedRecord 491 + var recordStr, indexedAtStr, createdAtStr string 492 + if err := rows.Scan(&rec.URI, &rec.DID, &rec.Collection, &rec.RKey, 493 + &recordStr, &rec.CID, &indexedAtStr, &createdAtStr); err != nil { 494 + continue 495 + } 496 + rec.Record = json.RawMessage(recordStr) 497 + rec.IndexedAt, _ = time.Parse(time.RFC3339Nano, indexedAtStr) 498 + rec.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 499 + records = append(records, &rec) 616 500 617 - data := recordsBucket.Get([]byte(uri)) 618 - if data == nil { 619 - continue 620 - } 621 - 622 - var record IndexedRecord 623 - if err := json.Unmarshal(data, &record); err != nil { 624 - continue 625 - } 626 - // Skip non-feedable records (likes, comments) so they don't 627 - // consume slots in the fetch limit, which would cause pagination 628 - // to break when many non-feedable records are intermixed. 629 - if record.Collection == atproto.NSIDLike || record.Collection == atproto.NSIDComment { 630 - continue 501 + // Collect reference URIs from the record data 502 + var recordData map[string]any 503 + if err := json.Unmarshal(rec.Record, &recordData); err == nil { 504 + for _, key := range []string{"beanRef", "roasterRef", "grinderRef", "brewerRef"} { 505 + if ref, ok := recordData[key].(string); ok && ref != "" { 506 + refURIs[ref] = true 631 507 } 632 - records = append(records, &record) 633 - lastTimeKey = make([]byte, len(k)) 634 - copy(lastTimeKey, k) 635 - count++ 636 508 } 637 509 } 638 - 639 - return nil 640 - }) 641 - if err != nil { 510 + } 511 + if err := rows.Err(); err != nil { 642 512 return nil, err 643 513 } 644 514 645 - // Build lookup maps for reference resolution 646 - recordsByURI := make(map[string]*IndexedRecord) 515 + // Build lookup map starting with the fetched records 516 + recordsByURI := make(map[string]*IndexedRecord, len(records)) 647 517 for _, r := range records { 648 518 recordsByURI[r.URI] = r 649 519 } 650 520 651 - // Load additional records for reference resolution 652 - err = idx.db.View(func(tx *bolt.Tx) error { 653 - recordsBucket := tx.Bucket(BucketRecords) 654 - return recordsBucket.ForEach(func(k, v []byte) error { 655 - uri := string(k) 656 - if _, exists := recordsByURI[uri]; exists { 657 - return nil 658 - } 659 - var record IndexedRecord 660 - if err := json.Unmarshal(v, &record); err != nil { 661 - return nil 662 - } 663 - switch record.Collection { 664 - case atproto.NSIDBean, atproto.NSIDRoaster, atproto.NSIDGrinder, atproto.NSIDBrewer: 665 - recordsByURI[uri] = &record 521 + // Fetch referenced records that we don't already have 522 + var missingURIs []string 523 + for uri := range refURIs { 524 + if _, ok := recordsByURI[uri]; !ok { 525 + missingURIs = append(missingURIs, uri) 526 + } 527 + } 528 + 529 + if len(missingURIs) > 0 { 530 + placeholders := make([]string, len(missingURIs)) 531 + refArgs := make([]any, len(missingURIs)) 532 + for i, uri := range missingURIs { 533 + placeholders[i] = "?" 534 + refArgs[i] = uri 535 + } 536 + refQuery := `SELECT uri, did, collection, rkey, record, cid, indexed_at, created_at FROM records WHERE uri IN (` + strings.Join(placeholders, ",") + `)` 537 + refRows, err := idx.db.QueryContext(ctx, refQuery, refArgs...) 538 + if err == nil { 539 + defer refRows.Close() 540 + for refRows.Next() { 541 + var rec IndexedRecord 542 + var recordStr, indexedAtStr, createdAtStr string 543 + if err := refRows.Scan(&rec.URI, &rec.DID, &rec.Collection, &rec.RKey, 544 + &recordStr, &rec.CID, &indexedAtStr, &createdAtStr); err != nil { 545 + continue 546 + } 547 + rec.Record = json.RawMessage(recordStr) 548 + rec.IndexedAt, _ = time.Parse(time.RFC3339Nano, indexedAtStr) 549 + rec.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 550 + recordsByURI[rec.URI] = &rec 551 + 552 + // If this is a bean, check if it references a roaster we also need 553 + if rec.Collection == atproto.NSIDBean { 554 + var beanData map[string]any 555 + if err := json.Unmarshal(rec.Record, &beanData); err == nil { 556 + if roasterRef, ok := beanData["roasterRef"].(string); ok && roasterRef != "" { 557 + if _, ok := recordsByURI[roasterRef]; !ok { 558 + // Fetch this roaster too 559 + var rRec IndexedRecord 560 + var rStr, rIdxAt, rCreAt string 561 + err := idx.db.QueryRowContext(ctx, 562 + `SELECT uri, did, collection, rkey, record, cid, indexed_at, created_at FROM records WHERE uri = ?`, 563 + roasterRef).Scan(&rRec.URI, &rRec.DID, &rRec.Collection, &rRec.RKey, 564 + &rStr, &rRec.CID, &rIdxAt, &rCreAt) 565 + if err == nil { 566 + rRec.Record = json.RawMessage(rStr) 567 + rRec.IndexedAt, _ = time.Parse(time.RFC3339Nano, rIdxAt) 568 + rRec.CreatedAt, _ = time.Parse(time.RFC3339Nano, rCreAt) 569 + recordsByURI[rRec.URI] = &rRec 570 + } 571 + } 572 + } 573 + } 574 + } 666 575 } 667 - return nil 668 - }) 669 - }) 670 - if err != nil { 671 - return nil, err 576 + } 672 577 } 673 578 674 579 // Convert to FeedItems 675 580 items := make([]*FeedItem, 0, len(records)) 676 581 for _, record := range records { 677 - if record.Collection == atproto.NSIDLike || record.Collection == atproto.NSIDComment { 678 - continue 679 - } 680 - 681 582 item, err := idx.recordToFeedItem(ctx, record, recordsByURI) 682 583 if err != nil { 683 584 log.Warn().Err(err).Str("uri", record.URI).Msg("failed to convert record to feed item") ··· 689 590 items = append(items, item) 690 591 } 691 592 692 - // Sort based on query 693 - switch q.Sort { 694 - case FeedSortPopular: 695 - sort.Slice(items, func(i, j int) bool { 696 - scoreI := items[i].LikeCount*3 + items[i].CommentCount*2 697 - scoreJ := items[j].LikeCount*3 + items[j].CommentCount*2 698 - if scoreI != scoreJ { 699 - return scoreI > scoreJ 700 - } 701 - return items[i].Timestamp.After(items[j].Timestamp) 702 - }) 703 - default: // FeedSortRecent 704 - sort.Slice(items, func(i, j int) bool { 705 - return items[i].Timestamp.After(items[j].Timestamp) 706 - }) 707 - } 708 - 709 - // Build result with cursor 710 - result := &FeedResult{Items: items} 711 - 712 - if len(items) > q.Limit { 713 - result.Items = items[:q.Limit] 714 - // Cursor is the last time key we read from the DB 715 - if lastTimeKey != nil { 716 - result.NextCursor = encodeCursor(lastTimeKey) 717 - } 718 - } 719 - 720 - return result, nil 721 - } 722 - 723 - // extractURIFromCollectionKey extracts the URI from a collection key 724 - // Format: {collection}:{inverted_timestamp_8bytes}:{uri} 725 - func extractURIFromCollectionKey(key []byte, collection string) string { 726 - // prefix is collection + ":" 727 - prefixLen := len(collection) + 1 728 - // Then 8 bytes of timestamp + ":" 729 - minLen := prefixLen + 8 + 1 + 1 // prefix + timestamp + ":" + at least 1 char 730 - if len(key) < minLen { 731 - return "" 732 - } 733 - return string(key[prefixLen+9:]) 734 - } 735 - 736 - func encodeCursor(key []byte) string { 737 - return hex.EncodeToString(key) 738 - } 739 - 740 - func decodeCursor(s string) ([]byte, error) { 741 - return hex.DecodeString(s) 593 + return items, nil 742 594 } 743 595 744 596 // recordToFeedItem converts an IndexedRecord to a FeedItem ··· 757 609 profile, err := idx.GetProfile(ctx, record.DID) 758 610 if err != nil { 759 611 log.Warn().Err(err).Str("did", record.DID).Msg("failed to get profile") 760 - // Use a placeholder profile 761 612 profile = &atproto.Profile{ 762 613 DID: record.DID, 763 - Handle: record.DID, // Use DID as handle if we can't resolve 614 + Handle: record.DID, 764 615 } 765 616 } 766 617 item.Author = profile ··· 869 720 item.Brewer = brewer 870 721 871 722 case atproto.NSIDLike: 872 - // This should never be reached - likes are filtered before calling recordToFeedItem 873 723 return nil, fmt.Errorf("unexpected: likes should be filtered before conversion") 874 724 875 725 default: ··· 896 746 idx.profileCacheMu.RUnlock() 897 747 898 748 // Check persistent cache 899 - var cached *CachedProfile 900 - err := idx.db.View(func(tx *bolt.Tx) error { 901 - b := tx.Bucket(BucketProfiles) 902 - data := b.Get([]byte(did)) 903 - if data == nil { 904 - return nil 749 + var dataStr, expiresAtStr string 750 + err := idx.db.QueryRow(`SELECT data, expires_at FROM profiles WHERE did = ?`, did).Scan(&dataStr, &expiresAtStr) 751 + if err == nil { 752 + expiresAt, _ := time.Parse(time.RFC3339Nano, expiresAtStr) 753 + if time.Now().Before(expiresAt) { 754 + cached := &CachedProfile{} 755 + if err := json.Unmarshal([]byte(dataStr), cached); err == nil { 756 + idx.profileCacheMu.Lock() 757 + idx.profileCache[did] = cached 758 + idx.profileCacheMu.Unlock() 759 + return cached.Profile, nil 760 + } 905 761 } 906 - cached = &CachedProfile{} 907 - return json.Unmarshal(data, cached) 908 - }) 909 - if err == nil && cached != nil && time.Now().Before(cached.ExpiresAt) { 910 - // Update in-memory cache 911 - idx.profileCacheMu.Lock() 912 - idx.profileCache[did] = cached 913 - idx.profileCacheMu.Unlock() 914 - return cached.Profile, nil 915 762 } 916 763 917 764 // Fetch from API ··· 922 769 923 770 // Cache the result 924 771 now := time.Now() 925 - cached = &CachedProfile{ 772 + cached := &CachedProfile{ 926 773 Profile: profile, 927 774 CachedAt: now, 928 775 ExpiresAt: now.Add(idx.profileTTL), ··· 935 782 936 783 // Persist to database 937 784 data, _ := json.Marshal(cached) 938 - _ = idx.db.Update(func(tx *bolt.Tx) error { 939 - b := tx.Bucket(BucketProfiles) 940 - return b.Put([]byte(did), data) 941 - }) 785 + _, _ = idx.db.Exec(`INSERT OR REPLACE INTO profiles (did, data, expires_at) VALUES (?, ?, ?)`, 786 + did, string(data), cached.ExpiresAt.Format(time.RFC3339Nano)) 942 787 943 788 return profile, nil 944 789 } 945 790 946 791 // GetKnownDIDs returns all DIDs that have created Arabica records 947 792 func (idx *FeedIndex) GetKnownDIDs() ([]string, error) { 793 + rows, err := idx.db.Query(`SELECT did FROM known_dids`) 794 + if err != nil { 795 + return nil, err 796 + } 797 + defer rows.Close() 798 + 948 799 var dids []string 949 - err := idx.db.View(func(tx *bolt.Tx) error { 950 - b := tx.Bucket(BucketKnownDIDs) 951 - return b.ForEach(func(k, v []byte) error { 952 - dids = append(dids, string(k)) 953 - return nil 954 - }) 955 - }) 956 - return dids, err 800 + for rows.Next() { 801 + var did string 802 + if err := rows.Scan(&did); err != nil { 803 + continue 804 + } 805 + dids = append(dids, did) 806 + } 807 + return dids, rows.Err() 808 + } 809 + 810 + // ListRecordsByCollection returns all indexed records for a given collection. 811 + func (idx *FeedIndex) ListRecordsByCollection(collection string) ([]IndexedRecord, error) { 812 + rows, err := idx.db.Query(` 813 + SELECT uri, did, collection, rkey, record, cid, indexed_at, created_at 814 + FROM records WHERE collection = ? ORDER BY created_at DESC 815 + `, collection) 816 + if err != nil { 817 + return nil, err 818 + } 819 + defer rows.Close() 820 + 821 + var records []IndexedRecord 822 + for rows.Next() { 823 + var rec IndexedRecord 824 + var recordStr, indexedAtStr, createdAtStr string 825 + if err := rows.Scan(&rec.URI, &rec.DID, &rec.Collection, &rec.RKey, 826 + &recordStr, &rec.CID, &indexedAtStr, &createdAtStr); err != nil { 827 + continue 828 + } 829 + rec.Record = json.RawMessage(recordStr) 830 + rec.IndexedAt, _ = time.Parse(time.RFC3339Nano, indexedAtStr) 831 + rec.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 832 + records = append(records, rec) 833 + } 834 + return records, rows.Err() 957 835 } 958 836 959 837 // RecordCount returns the total number of indexed records 960 838 func (idx *FeedIndex) RecordCount() int { 961 839 var count int 962 - _ = idx.db.View(func(tx *bolt.Tx) error { 963 - b := tx.Bucket(BucketRecords) 964 - count = b.Stats().KeyN 965 - return nil 966 - }) 840 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM records`).Scan(&count) 967 841 return count 968 842 } 969 843 970 844 // KnownDIDCount returns the number of unique DIDs in the index 971 845 func (idx *FeedIndex) KnownDIDCount() int { 972 846 var count int 973 - _ = idx.db.View(func(tx *bolt.Tx) error { 974 - b := tx.Bucket(BucketKnownDIDs) 975 - count = b.Stats().KeyN 976 - return nil 977 - }) 847 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM known_dids`).Scan(&count) 978 848 return count 979 849 } 980 850 981 851 // TotalLikeCount returns the total number of likes indexed 982 852 func (idx *FeedIndex) TotalLikeCount() int { 983 853 var count int 984 - _ = idx.db.View(func(tx *bolt.Tx) error { 985 - b := tx.Bucket(BucketLikes) 986 - count = b.Stats().KeyN 987 - return nil 988 - }) 854 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM likes`).Scan(&count) 989 855 return count 990 856 } 991 857 992 858 // TotalCommentCount returns the total number of comments indexed 993 859 func (idx *FeedIndex) TotalCommentCount() int { 994 860 var count int 995 - _ = idx.db.View(func(tx *bolt.Tx) error { 996 - b := tx.Bucket(BucketCommentsByActor) 997 - count = b.Stats().KeyN 998 - return nil 999 - }) 861 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM comments`).Scan(&count) 1000 862 return count 1001 863 } 1002 864 1003 865 // RecordCountByCollection returns a breakdown of record counts by collection type 1004 866 func (idx *FeedIndex) RecordCountByCollection() map[string]int { 1005 867 counts := make(map[string]int) 1006 - _ = idx.db.View(func(tx *bolt.Tx) error { 1007 - records := tx.Bucket(BucketRecords) 1008 - return records.ForEach(func(k, v []byte) error { 1009 - var record IndexedRecord 1010 - if err := json.Unmarshal(v, &record); err != nil { 1011 - return nil 1012 - } 1013 - counts[record.Collection]++ 1014 - return nil 1015 - }) 1016 - }) 868 + rows, err := idx.db.Query(`SELECT collection, COUNT(*) FROM records GROUP BY collection`) 869 + if err != nil { 870 + return counts 871 + } 872 + defer rows.Close() 873 + for rows.Next() { 874 + var collection string 875 + var count int 876 + if err := rows.Scan(&collection, &count); err == nil { 877 + counts[collection] = count 878 + } 879 + } 1017 880 return counts 1018 - } 1019 - 1020 - // Helper functions 1021 - 1022 - func makeTimeKey(t time.Time, uri string) []byte { 1023 - // Format: inverted timestamp (for reverse chronological order) + ":" + uri 1024 - // Use nanoseconds for uniqueness 1025 - inverted := ^uint64(t.UnixNano()) 1026 - buf := make([]byte, 8) 1027 - binary.BigEndian.PutUint64(buf, inverted) 1028 - return append(buf, []byte(":"+uri)...) 1029 - } 1030 - 1031 - func extractURIFromTimeKey(key []byte) string { 1032 - if len(key) < 10 { // 8 bytes timestamp + ":" + at least 1 char 1033 - return "" 1034 - } 1035 - // Skip 8 bytes timestamp + 1 byte ":" 1036 - return string(key[9:]) 1037 881 } 1038 882 1039 883 func formatTimeAgo(t time.Time) string { ··· 1077 921 1078 922 // IsBackfilled checks if a DID has already been backfilled 1079 923 func (idx *FeedIndex) IsBackfilled(did string) bool { 1080 - var exists bool 1081 - _ = idx.db.View(func(tx *bolt.Tx) error { 1082 - b := tx.Bucket(BucketBackfilled) 1083 - exists = b.Get([]byte(did)) != nil 1084 - return nil 1085 - }) 1086 - return exists 924 + var exists int 925 + err := idx.db.QueryRow(`SELECT 1 FROM backfilled WHERE did = ?`, did).Scan(&exists) 926 + return err == nil 1087 927 } 1088 928 1089 929 // MarkBackfilled marks a DID as backfilled with current timestamp 1090 930 func (idx *FeedIndex) MarkBackfilled(did string) error { 1091 - return idx.db.Update(func(tx *bolt.Tx) error { 1092 - b := tx.Bucket(BucketBackfilled) 1093 - timestamp := []byte(time.Now().Format(time.RFC3339)) 1094 - return b.Put([]byte(did), timestamp) 1095 - }) 931 + _, err := idx.db.Exec(`INSERT OR IGNORE INTO backfilled (did, backfilled_at) VALUES (?, ?)`, 932 + did, time.Now().Format(time.RFC3339)) 933 + return err 1096 934 } 1097 935 1098 936 // BackfillUser fetches all existing records for a DID and adds them to the index 1099 - // Returns early if the DID has already been backfilled 1100 937 func (idx *FeedIndex) BackfillUser(ctx context.Context, did string) error { 1101 - // Check if already backfilled 1102 938 if idx.IsBackfilled(did) { 1103 939 log.Debug().Str("did", did).Msg("DID already backfilled, skipping") 1104 940 return nil ··· 1115 951 } 1116 952 1117 953 for _, record := range records.Records { 1118 - // Extract rkey from URI 1119 954 parts := strings.Split(record.URI, "/") 1120 955 if len(parts) < 3 { 1121 956 continue ··· 1133 968 } 1134 969 recordCount++ 1135 970 1136 - // Index likes and comments into their specialized buckets 1137 971 switch collection { 1138 972 case atproto.NSIDLike: 1139 973 if subject, ok := record.Value["subject"].(map[string]interface{}); ok { ··· 1170 1004 } 1171 1005 } 1172 1006 1173 - // Mark as backfilled 1174 1007 if err := idx.MarkBackfilled(did); err != nil { 1175 1008 log.Warn().Err(err).Str("did", did).Msg("failed to mark DID as backfilled") 1176 1009 } ··· 1183 1016 1184 1017 // UpsertLike adds or updates a like in the index 1185 1018 func (idx *FeedIndex) UpsertLike(actorDID, rkey, subjectURI string) error { 1186 - return idx.db.Update(func(tx *bolt.Tx) error { 1187 - likes := tx.Bucket(BucketLikes) 1188 - likeCounts := tx.Bucket(BucketLikeCounts) 1189 - likesByActor := tx.Bucket(BucketLikesByActor) 1190 - 1191 - // Key format: {subject_uri}:{actor_did} 1192 - likeKey := []byte(subjectURI + ":" + actorDID) 1193 - 1194 - // Check if this like already exists 1195 - existingRKey := likes.Get(likeKey) 1196 - if existingRKey != nil { 1197 - // Already exists, nothing to do 1198 - return nil 1199 - } 1200 - 1201 - // Store the like mapping 1202 - if err := likes.Put(likeKey, []byte(rkey)); err != nil { 1203 - return err 1204 - } 1205 - 1206 - // Store by actor for reverse lookup 1207 - actorKey := []byte(actorDID + ":" + subjectURI) 1208 - if err := likesByActor.Put(actorKey, []byte(rkey)); err != nil { 1209 - return err 1210 - } 1211 - 1212 - // Increment the like count 1213 - countKey := []byte(subjectURI) 1214 - currentCount := uint64(0) 1215 - if countData := likeCounts.Get(countKey); len(countData) == 8 { 1216 - currentCount = binary.BigEndian.Uint64(countData) 1217 - } 1218 - currentCount++ 1219 - countBuf := make([]byte, 8) 1220 - binary.BigEndian.PutUint64(countBuf, currentCount) 1221 - return likeCounts.Put(countKey, countBuf) 1222 - }) 1019 + _, err := idx.db.Exec(`INSERT OR IGNORE INTO likes (subject_uri, actor_did, rkey) VALUES (?, ?, ?)`, 1020 + subjectURI, actorDID, rkey) 1021 + return err 1223 1022 } 1224 1023 1225 1024 // DeleteLike removes a like from the index 1226 1025 func (idx *FeedIndex) DeleteLike(actorDID, subjectURI string) error { 1227 - return idx.db.Update(func(tx *bolt.Tx) error { 1228 - likes := tx.Bucket(BucketLikes) 1229 - likeCounts := tx.Bucket(BucketLikeCounts) 1230 - likesByActor := tx.Bucket(BucketLikesByActor) 1231 - 1232 - // Key format: {subject_uri}:{actor_did} 1233 - likeKey := []byte(subjectURI + ":" + actorDID) 1234 - 1235 - // Check if like exists 1236 - if likes.Get(likeKey) == nil { 1237 - // Doesn't exist, nothing to do 1238 - return nil 1239 - } 1240 - 1241 - // Delete the like mapping 1242 - if err := likes.Delete(likeKey); err != nil { 1243 - return err 1244 - } 1245 - 1246 - // Delete by actor lookup 1247 - actorKey := []byte(actorDID + ":" + subjectURI) 1248 - if err := likesByActor.Delete(actorKey); err != nil { 1249 - return err 1250 - } 1251 - 1252 - // Decrement the like count 1253 - countKey := []byte(subjectURI) 1254 - currentCount := uint64(0) 1255 - if countData := likeCounts.Get(countKey); len(countData) == 8 { 1256 - currentCount = binary.BigEndian.Uint64(countData) 1257 - } 1258 - if currentCount > 0 { 1259 - currentCount-- 1260 - } 1261 - if currentCount == 0 { 1262 - return likeCounts.Delete(countKey) 1263 - } 1264 - countBuf := make([]byte, 8) 1265 - binary.BigEndian.PutUint64(countBuf, currentCount) 1266 - return likeCounts.Put(countKey, countBuf) 1267 - }) 1026 + _, err := idx.db.Exec(`DELETE FROM likes WHERE subject_uri = ? AND actor_did = ?`, 1027 + subjectURI, actorDID) 1028 + return err 1268 1029 } 1269 1030 1270 1031 // GetLikeCount returns the number of likes for a record 1271 1032 func (idx *FeedIndex) GetLikeCount(subjectURI string) int { 1272 - var count uint64 1273 - _ = idx.db.View(func(tx *bolt.Tx) error { 1274 - likeCounts := tx.Bucket(BucketLikeCounts) 1275 - countData := likeCounts.Get([]byte(subjectURI)) 1276 - if len(countData) == 8 { 1277 - count = binary.BigEndian.Uint64(countData) 1278 - } 1279 - return nil 1280 - }) 1281 - return int(count) 1033 + var count int 1034 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM likes WHERE subject_uri = ?`, subjectURI).Scan(&count) 1035 + return count 1282 1036 } 1283 1037 1284 1038 // HasUserLiked checks if a user has liked a specific record 1285 1039 func (idx *FeedIndex) HasUserLiked(actorDID, subjectURI string) bool { 1286 - var exists bool 1287 - _ = idx.db.View(func(tx *bolt.Tx) error { 1288 - likesByActor := tx.Bucket(BucketLikesByActor) 1289 - actorKey := []byte(actorDID + ":" + subjectURI) 1290 - exists = likesByActor.Get(actorKey) != nil 1291 - return nil 1292 - }) 1293 - return exists 1040 + var exists int 1041 + err := idx.db.QueryRow(`SELECT 1 FROM likes WHERE actor_did = ? AND subject_uri = ? LIMIT 1`, 1042 + actorDID, subjectURI).Scan(&exists) 1043 + return err == nil 1294 1044 } 1295 1045 1296 1046 // GetUserLikeRKey returns the rkey of a user's like for a specific record, or empty string if not found 1297 1047 func (idx *FeedIndex) GetUserLikeRKey(actorDID, subjectURI string) string { 1298 1048 var rkey string 1299 - _ = idx.db.View(func(tx *bolt.Tx) error { 1300 - likesByActor := tx.Bucket(BucketLikesByActor) 1301 - actorKey := []byte(actorDID + ":" + subjectURI) 1302 - if data := likesByActor.Get(actorKey); data != nil { 1303 - rkey = string(data) 1304 - } 1305 - return nil 1306 - }) 1049 + err := idx.db.QueryRow(`SELECT rkey FROM likes WHERE actor_did = ? AND subject_uri = ?`, 1050 + actorDID, subjectURI).Scan(&rkey) 1051 + if err != nil { 1052 + return "" 1053 + } 1307 1054 return rkey 1308 1055 } 1309 1056 ··· 1332 1079 1333 1080 // UpsertComment adds or updates a comment in the index 1334 1081 func (idx *FeedIndex) UpsertComment(actorDID, rkey, subjectURI, parentURI, cid, text string, createdAt time.Time) error { 1335 - return idx.db.Update(func(tx *bolt.Tx) error { 1336 - comments := tx.Bucket(BucketComments) 1337 - commentCounts := tx.Bucket(BucketCommentCounts) 1338 - commentsByActor := tx.Bucket(BucketCommentsByActor) 1339 - commentChildren := tx.Bucket(BucketCommentChildren) 1340 - 1341 - // Key format: {subject_uri}:{timestamp}:{actor_did}:{rkey} 1342 - // Using timestamp for chronological ordering 1343 - commentKey := []byte(subjectURI + ":" + createdAt.Format(time.RFC3339Nano) + ":" + actorDID + ":" + rkey) 1344 - 1345 - // Check if this comment already exists (by actor key) 1346 - actorKey := []byte(actorDID + ":" + rkey) 1347 - existingSubject := commentsByActor.Get(actorKey) 1348 - isNew := existingSubject == nil 1349 - 1350 - // If the comment already exists, delete the old entry from BucketComments 1351 - // to prevent duplicates (the key includes timestamp which may differ between calls) 1352 - if !isNew { 1353 - oldPrefix := []byte(string(existingSubject) + ":") 1354 - suffix := ":" + actorDID + ":" + rkey 1355 - cur := comments.Cursor() 1356 - for k, _ := cur.Seek(oldPrefix); k != nil && strings.HasPrefix(string(k), string(oldPrefix)); k, _ = cur.Next() { 1357 - if strings.HasSuffix(string(k), suffix) { 1358 - _ = comments.Delete(k) 1359 - break 1360 - } 1361 - } 1362 - } 1363 - 1364 - // Extract parent rkey from parent URI if present 1365 - var parentRKey string 1366 - if parentURI != "" { 1367 - parts := strings.Split(parentURI, "/") 1368 - if len(parts) > 0 { 1369 - parentRKey = parts[len(parts)-1] 1370 - } 1371 - } 1372 - 1373 - // Store comment data as JSON 1374 - commentData := IndexedComment{ 1375 - RKey: rkey, 1376 - SubjectURI: subjectURI, 1377 - Text: text, 1378 - ActorDID: actorDID, 1379 - CreatedAt: createdAt, 1380 - ParentURI: parentURI, 1381 - ParentRKey: parentRKey, 1382 - CID: cid, 1383 - } 1384 - commentJSON, err := json.Marshal(commentData) 1385 - if err != nil { 1386 - return fmt.Errorf("failed to marshal comment: %w", err) 1387 - } 1388 - 1389 - // Store comment 1390 - if err := comments.Put(commentKey, commentJSON); err != nil { 1391 - return fmt.Errorf("failed to store comment: %w", err) 1392 - } 1393 - 1394 - // Store actor lookup 1395 - if err := commentsByActor.Put(actorKey, []byte(subjectURI)); err != nil { 1396 - return fmt.Errorf("failed to store comment by actor: %w", err) 1397 - } 1398 - 1399 - // Store parent-child relationship if this is a reply 1400 - if parentURI != "" { 1401 - childKey := []byte(parentURI + ":" + rkey) 1402 - if err := commentChildren.Put(childKey, []byte(actorDID)); err != nil { 1403 - return fmt.Errorf("failed to store comment child: %w", err) 1404 - } 1405 - } 1406 - 1407 - // Increment count only if this is a new comment 1408 - if isNew { 1409 - countKey := []byte(subjectURI) 1410 - var count uint64 1411 - if countData := commentCounts.Get(countKey); len(countData) == 8 { 1412 - count = binary.BigEndian.Uint64(countData) 1413 - } 1414 - count++ 1415 - countBytes := make([]byte, 8) 1416 - binary.BigEndian.PutUint64(countBytes, count) 1417 - if err := commentCounts.Put(countKey, countBytes); err != nil { 1418 - return fmt.Errorf("failed to update comment count: %w", err) 1419 - } 1082 + // Extract parent rkey from parent URI if present 1083 + var parentRKey string 1084 + if parentURI != "" { 1085 + parts := strings.Split(parentURI, "/") 1086 + if len(parts) > 0 { 1087 + parentRKey = parts[len(parts)-1] 1420 1088 } 1089 + } 1421 1090 1422 - return nil 1423 - }) 1091 + _, err := idx.db.Exec(` 1092 + INSERT INTO comments (actor_did, rkey, subject_uri, parent_uri, parent_rkey, cid, text, created_at) 1093 + VALUES (?, ?, ?, ?, ?, ?, ?, ?) 1094 + ON CONFLICT(actor_did, rkey) DO UPDATE SET 1095 + subject_uri = excluded.subject_uri, 1096 + parent_uri = excluded.parent_uri, 1097 + parent_rkey = excluded.parent_rkey, 1098 + cid = excluded.cid, 1099 + text = excluded.text, 1100 + created_at = excluded.created_at 1101 + `, actorDID, rkey, subjectURI, parentURI, parentRKey, cid, text, createdAt.Format(time.RFC3339Nano)) 1102 + return err 1424 1103 } 1425 1104 1426 1105 // DeleteComment removes a comment from the index 1427 1106 func (idx *FeedIndex) DeleteComment(actorDID, rkey, subjectURI string) error { 1428 - return idx.db.Update(func(tx *bolt.Tx) error { 1429 - comments := tx.Bucket(BucketComments) 1430 - commentCounts := tx.Bucket(BucketCommentCounts) 1431 - commentsByActor := tx.Bucket(BucketCommentsByActor) 1432 - commentChildren := tx.Bucket(BucketCommentChildren) 1433 - 1434 - actorKey := []byte(actorDID + ":" + rkey) 1435 - 1436 - // Get subject URI from the actor index, or use the provided one 1437 - existingSubject := commentsByActor.Get(actorKey) 1438 - if existingSubject != nil && subjectURI == "" { 1439 - subjectURI = string(existingSubject) 1440 - } 1441 - 1442 - // Find and delete the comment from BucketComments 1443 - var parentURI string 1444 - suffix := ":" + actorDID + ":" + rkey 1445 - 1446 - if subjectURI != "" { 1447 - // Fast path: we know the subject URI, scan only that prefix 1448 - prefix := []byte(subjectURI + ":") 1449 - c := comments.Cursor() 1450 - for k, v := c.Seek(prefix); k != nil && strings.HasPrefix(string(k), string(prefix)); k, v = c.Next() { 1451 - if strings.HasSuffix(string(k), suffix) { 1452 - var comment IndexedComment 1453 - if err := json.Unmarshal(v, &comment); err == nil { 1454 - parentURI = comment.ParentURI 1455 - } 1456 - if err := comments.Delete(k); err != nil { 1457 - return fmt.Errorf("failed to delete comment: %w", err) 1458 - } 1459 - break 1460 - } 1461 - } 1462 - } else { 1463 - // Slow path: scan all comments to find this actor+rkey 1464 - c := comments.Cursor() 1465 - for k, v := c.First(); k != nil; k, v = c.Next() { 1466 - if strings.HasSuffix(string(k), suffix) { 1467 - var comment IndexedComment 1468 - if err := json.Unmarshal(v, &comment); err == nil { 1469 - parentURI = comment.ParentURI 1470 - subjectURI = comment.SubjectURI 1471 - } 1472 - if err := comments.Delete(k); err != nil { 1473 - return fmt.Errorf("failed to delete comment: %w", err) 1474 - } 1475 - break 1476 - } 1477 - } 1478 - } 1479 - 1480 - // Delete actor lookup 1481 - if existingSubject != nil { 1482 - if err := commentsByActor.Delete(actorKey); err != nil { 1483 - return fmt.Errorf("failed to delete comment by actor: %w", err) 1484 - } 1485 - } 1486 - 1487 - // Delete parent-child relationship if this was a reply 1488 - if parentURI != "" { 1489 - childKey := []byte(parentURI + ":" + rkey) 1490 - if err := commentChildren.Delete(childKey); err != nil { 1491 - return fmt.Errorf("failed to delete comment child: %w", err) 1492 - } 1493 - } 1494 - 1495 - // Decrement count 1496 - countKey := []byte(subjectURI) 1497 - var count uint64 1498 - if countData := commentCounts.Get(countKey); len(countData) == 8 { 1499 - count = binary.BigEndian.Uint64(countData) 1500 - } 1501 - if count > 0 { 1502 - count-- 1503 - } 1504 - countBytes := make([]byte, 8) 1505 - binary.BigEndian.PutUint64(countBytes, count) 1506 - if err := commentCounts.Put(countKey, countBytes); err != nil { 1507 - return fmt.Errorf("failed to update comment count: %w", err) 1508 - } 1509 - 1510 - return nil 1511 - }) 1107 + _, err := idx.db.Exec(`DELETE FROM comments WHERE actor_did = ? AND rkey = ?`, actorDID, rkey) 1108 + return err 1512 1109 } 1513 1110 1514 1111 // GetCommentCount returns the number of comments on a record 1515 1112 func (idx *FeedIndex) GetCommentCount(subjectURI string) int { 1516 - var count uint64 1517 - _ = idx.db.View(func(tx *bolt.Tx) error { 1518 - commentCounts := tx.Bucket(BucketCommentCounts) 1519 - countData := commentCounts.Get([]byte(subjectURI)) 1520 - if len(countData) == 8 { 1521 - count = binary.BigEndian.Uint64(countData) 1522 - } 1523 - return nil 1524 - }) 1525 - return int(count) 1113 + var count int 1114 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM comments WHERE subject_uri = ?`, subjectURI).Scan(&count) 1115 + return count 1526 1116 } 1527 1117 1528 1118 // GetCommentsForSubject returns all comments for a specific record, ordered by creation time 1529 - // This returns a flat list of comments without threading 1530 1119 func (idx *FeedIndex) GetCommentsForSubject(ctx context.Context, subjectURI string, limit int, viewerDID string) []IndexedComment { 1531 - var comments []IndexedComment 1532 - _ = idx.db.View(func(tx *bolt.Tx) error { 1533 - bucket := tx.Bucket(BucketComments) 1534 - prefix := []byte(subjectURI + ":") 1535 - c := bucket.Cursor() 1120 + query := `SELECT actor_did, rkey, subject_uri, parent_uri, parent_rkey, cid, text, created_at 1121 + FROM comments WHERE subject_uri = ? ORDER BY created_at` 1122 + var args []any 1123 + args = append(args, subjectURI) 1124 + if limit > 0 { 1125 + query += ` LIMIT ?` 1126 + args = append(args, limit) 1127 + } 1536 1128 1537 - for k, v := c.Seek(prefix); k != nil && strings.HasPrefix(string(k), string(prefix)); k, v = c.Next() { 1538 - var comment IndexedComment 1539 - if err := json.Unmarshal(v, &comment); err != nil { 1540 - continue 1541 - } 1542 - comments = append(comments, comment) 1543 - if limit > 0 && len(comments) >= limit { 1544 - break 1545 - } 1546 - } 1129 + rows, err := idx.db.QueryContext(ctx, query, args...) 1130 + if err != nil { 1547 1131 return nil 1548 - }) 1132 + } 1133 + defer rows.Close() 1134 + 1135 + var comments []IndexedComment 1136 + for rows.Next() { 1137 + var c IndexedComment 1138 + var createdAtStr string 1139 + if err := rows.Scan(&c.ActorDID, &c.RKey, &c.SubjectURI, &c.ParentURI, &c.ParentRKey, 1140 + &c.CID, &c.Text, &createdAtStr); err != nil { 1141 + continue 1142 + } 1143 + c.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 1144 + comments = append(comments, c) 1145 + } 1549 1146 1550 1147 // Populate profile and like info for each comment 1551 1148 for i := range comments { 1552 1149 profile, err := idx.GetProfile(ctx, comments[i].ActorDID) 1553 1150 if err != nil { 1554 - // Use DID as fallback handle 1555 1151 comments[i].Handle = comments[i].ActorDID 1556 1152 } else { 1557 1153 comments[i].Handle = profile.Handle ··· 1570 1166 } 1571 1167 1572 1168 // GetThreadedCommentsForSubject returns comments for a record in threaded order with depth 1573 - // Comments are returned in depth-first order (parent followed by children) 1574 - // Visual depth is capped at 2 levels for display purposes 1575 1169 func (idx *FeedIndex) GetThreadedCommentsForSubject(ctx context.Context, subjectURI string, limit int, viewerDID string) []IndexedComment { 1576 - // First get all comments for this subject 1577 - allComments := idx.GetCommentsForSubject(ctx, subjectURI, 0, viewerDID) // Get all, we'll limit after threading 1170 + allComments := idx.GetCommentsForSubject(ctx, subjectURI, 0, viewerDID) 1578 1171 1579 1172 if len(allComments) == 0 { 1580 1173 return nil ··· 1593 1186 for i := range allComments { 1594 1187 comment := &allComments[i] 1595 1188 if comment.ParentRKey == "" { 1596 - // Top-level comment 1597 1189 topLevel = append(topLevel, comment) 1598 1190 } else { 1599 - // Reply - add to parent's children 1600 1191 childrenMap[comment.ParentRKey] = append(childrenMap[comment.ParentRKey], comment) 1601 1192 } 1602 1193 } ··· 1620 1211 if limit > 0 && len(result) >= limit { 1621 1212 return 1622 1213 } 1623 - // Cap visual depth at 2 for display 1624 1214 visualDepth := depth 1625 1215 if visualDepth > 2 { 1626 1216 visualDepth = 2 ··· 1628 1218 comment.Depth = visualDepth 1629 1219 result = append(result, *comment) 1630 1220 1631 - // Add children (if any) 1632 1221 if children, ok := childrenMap[comment.RKey]; ok { 1633 1222 for _, child := range children { 1634 1223 flatten(child, depth+1)
+75 -147
internal/firehose/notifications.go
··· 1 1 package firehose 2 2 3 3 import ( 4 - "encoding/json" 5 4 "fmt" 6 5 "strings" 7 6 "time" ··· 9 8 "arabica/internal/models" 10 9 11 10 "github.com/rs/zerolog/log" 12 - bolt "go.etcd.io/bbolt" 13 - ) 14 - 15 - // Bucket names for notifications 16 - var ( 17 - // BucketNotifications stores notifications: {target_did}:{inverted_timestamp}:{id} -> {Notification JSON} 18 - BucketNotifications = []byte("notifications") 19 - 20 - // BucketNotificationsMeta stores per-user metadata: {target_did}:last_read -> {timestamp RFC3339} 21 - BucketNotificationsMeta = []byte("notifications_meta") 22 11 ) 23 12 24 13 // CreateNotification stores a notification for the target user. 25 - // Deduplicates by (type + actorDID + subjectURI) to prevent duplicates from backfills. 14 + // Deduplicates by (type + actorDID + subjectURI) via unique index. 26 15 // Self-notifications (actorDID == targetDID) are silently skipped. 27 16 func (idx *FeedIndex) CreateNotification(targetDID string, notif models.Notification) error { 28 17 if targetDID == "" || targetDID == notif.ActorDID { 29 18 return nil // skip self-notifications 30 19 } 31 20 32 - return idx.db.Update(func(tx *bolt.Tx) error { 33 - b := tx.Bucket(BucketNotifications) 34 - 35 - // Deduplication: scan for existing notification with same type+actor+subject 36 - prefix := []byte(targetDID + ":") 37 - c := b.Cursor() 38 - for k, v := c.Seek(prefix); k != nil && strings.HasPrefix(string(k), string(prefix)); k, v = c.Next() { 39 - var existing models.Notification 40 - if err := json.Unmarshal(v, &existing); err != nil { 41 - continue 42 - } 43 - if existing.Type == notif.Type && existing.ActorDID == notif.ActorDID && existing.SubjectURI == notif.SubjectURI { 44 - return nil // duplicate, skip 45 - } 46 - } 47 - 48 - // Generate ID from timestamp 49 - if notif.ID == "" { 50 - notif.ID = fmt.Sprintf("%d", notif.CreatedAt.UnixNano()) 51 - } 52 - 53 - data, err := json.Marshal(notif) 54 - if err != nil { 55 - return fmt.Errorf("failed to marshal notification: %w", err) 56 - } 21 + // Generate ID from timestamp 22 + if notif.ID == "" { 23 + notif.ID = fmt.Sprintf("%d", notif.CreatedAt.UnixNano()) 24 + } 57 25 58 - // Key: {target_did}:{inverted_timestamp}:{id} for reverse chronological order 59 - inverted := ^uint64(notif.CreatedAt.UnixNano()) 60 - key := fmt.Sprintf("%s:%016x:%s", targetDID, inverted, notif.ID) 61 - return b.Put([]byte(key), data) 62 - }) 26 + // INSERT OR IGNORE deduplicates via the unique index on (target_did, type, actor_did, subject_uri) 27 + _, err := idx.db.Exec(` 28 + INSERT OR IGNORE INTO notifications (id, target_did, type, actor_did, subject_uri, created_at) 29 + VALUES (?, ?, ?, ?, ?, ?) 30 + `, notif.ID, targetDID, string(notif.Type), notif.ActorDID, notif.SubjectURI, 31 + notif.CreatedAt.Format(time.RFC3339Nano)) 32 + return err 63 33 } 64 34 65 35 // GetNotifications returns notifications for a user, newest first. 66 36 // Uses cursor-based pagination. Returns notifications, next cursor, and error. 67 37 func (idx *FeedIndex) GetNotifications(targetDID string, limit int, cursor string) ([]models.Notification, string, error) { 68 - var notifications []models.Notification 69 - var nextCursor string 70 - 71 38 if limit <= 0 { 72 39 limit = 20 73 40 } 74 41 75 - // Get last_read timestamp for marking read status 76 42 lastRead := idx.getLastRead(targetDID) 77 43 78 - err := idx.db.View(func(tx *bolt.Tx) error { 79 - b := tx.Bucket(BucketNotifications) 80 - c := b.Cursor() 44 + var args []any 45 + query := `SELECT id, type, actor_did, subject_uri, created_at 46 + FROM notifications WHERE target_did = ?` 47 + args = append(args, targetDID) 81 48 82 - prefix := []byte(targetDID + ":") 83 - var k, v []byte 49 + if cursor != "" { 50 + query += ` AND created_at < ?` 51 + args = append(args, cursor) 52 + } 84 53 85 - if cursor != "" { 86 - // Seek to cursor position, then advance past it 87 - k, v = c.Seek([]byte(cursor)) 88 - if k != nil && string(k) == cursor { 89 - k, v = c.Next() 90 - } 91 - } else { 92 - k, v = c.Seek(prefix) 93 - } 54 + query += ` ORDER BY created_at DESC LIMIT ?` 55 + // Fetch one extra to determine if there's a next page 56 + args = append(args, limit+1) 94 57 95 - var lastKey []byte 96 - count := 0 97 - for ; k != nil && strings.HasPrefix(string(k), string(prefix)); k, v = c.Next() { 98 - if count >= limit { 99 - // There are more items beyond our limit 100 - nextCursor = string(lastKey) 101 - break 102 - } 58 + rows, err := idx.db.Query(query, args...) 59 + if err != nil { 60 + return nil, "", err 61 + } 62 + defer rows.Close() 103 63 104 - var notif models.Notification 105 - if err := json.Unmarshal(v, &notif); err != nil { 106 - continue 107 - } 108 - 109 - // Determine read status based on last_read timestamp 110 - if !lastRead.IsZero() && !notif.CreatedAt.After(lastRead) { 111 - notif.Read = true 112 - } 64 + var notifications []models.Notification 65 + for rows.Next() { 66 + var notif models.Notification 67 + var typeStr, createdAtStr string 68 + if err := rows.Scan(&notif.ID, &typeStr, &notif.ActorDID, &notif.SubjectURI, &createdAtStr); err != nil { 69 + continue 70 + } 71 + notif.Type = models.NotificationType(typeStr) 72 + notif.CreatedAt, _ = time.Parse(time.RFC3339Nano, createdAtStr) 113 73 114 - notifications = append(notifications, notif) 115 - lastKey = make([]byte, len(k)) 116 - copy(lastKey, k) 117 - count++ 74 + if !lastRead.IsZero() && !notif.CreatedAt.After(lastRead) { 75 + notif.Read = true 118 76 } 119 77 120 - return nil 121 - }) 78 + notifications = append(notifications, notif) 79 + } 122 80 123 - return notifications, nextCursor, err 81 + var nextCursor string 82 + if len(notifications) > limit { 83 + // There are more results 84 + last := notifications[limit-1] 85 + nextCursor = last.CreatedAt.Format(time.RFC3339Nano) 86 + notifications = notifications[:limit] 87 + } 88 + 89 + return notifications, nextCursor, rows.Err() 124 90 } 125 91 126 92 // GetUnreadCount returns the number of unread notifications for a user. ··· 132 98 lastRead := idx.getLastRead(targetDID) 133 99 134 100 var count int 135 - _ = idx.db.View(func(tx *bolt.Tx) error { 136 - b := tx.Bucket(BucketNotifications) 137 - c := b.Cursor() 138 - 139 - prefix := []byte(targetDID + ":") 140 - for k, v := c.Seek(prefix); k != nil && strings.HasPrefix(string(k), string(prefix)); k, v = c.Next() { 141 - var notif models.Notification 142 - if err := json.Unmarshal(v, &notif); err != nil { 143 - continue 144 - } 145 - // If no last_read set, all are unread 146 - if lastRead.IsZero() || notif.CreatedAt.After(lastRead) { 147 - count++ 148 - } else { 149 - // Since keys are in reverse chronological order, 150 - // once we hit a read notification, all remaining are also read 151 - break 152 - } 153 - } 154 - return nil 155 - }) 101 + if lastRead.IsZero() { 102 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM notifications WHERE target_did = ?`, targetDID).Scan(&count) 103 + } else { 104 + _ = idx.db.QueryRow(`SELECT COUNT(*) FROM notifications WHERE target_did = ? AND created_at > ?`, 105 + targetDID, lastRead.Format(time.RFC3339Nano)).Scan(&count) 106 + } 156 107 157 108 return count 158 109 } 159 110 160 111 // MarkAllRead updates the last_read timestamp to now for the user. 161 112 func (idx *FeedIndex) MarkAllRead(targetDID string) error { 162 - return idx.db.Update(func(tx *bolt.Tx) error { 163 - b := tx.Bucket(BucketNotificationsMeta) 164 - key := []byte(targetDID + ":last_read") 165 - return b.Put(key, []byte(time.Now().Format(time.RFC3339Nano))) 166 - }) 113 + _, err := idx.db.Exec(`INSERT OR REPLACE INTO notifications_meta (target_did, last_read) VALUES (?, ?)`, 114 + targetDID, time.Now().Format(time.RFC3339Nano)) 115 + return err 167 116 } 168 117 169 118 // getLastRead returns the last_read timestamp for a user. 170 119 func (idx *FeedIndex) getLastRead(targetDID string) time.Time { 171 - var lastRead time.Time 172 - _ = idx.db.View(func(tx *bolt.Tx) error { 173 - b := tx.Bucket(BucketNotificationsMeta) 174 - v := b.Get([]byte(targetDID + ":last_read")) 175 - if v != nil { 176 - if t, err := time.Parse(time.RFC3339Nano, string(v)); err == nil { 177 - lastRead = t 178 - } 179 - } 180 - return nil 181 - }) 182 - return lastRead 120 + var lastReadStr string 121 + err := idx.db.QueryRow(`SELECT last_read FROM notifications_meta WHERE target_did = ?`, targetDID).Scan(&lastReadStr) 122 + if err != nil { 123 + return time.Time{} 124 + } 125 + t, _ := time.Parse(time.RFC3339Nano, lastReadStr) 126 + return t 183 127 } 184 128 185 129 // parseTargetDID extracts the DID from an AT-URI (at://did:plc:xxx/collection/rkey) ··· 206 150 return 207 151 } 208 152 209 - err := idx.db.Update(func(tx *bolt.Tx) error { 210 - b := tx.Bucket(BucketNotifications) 211 - prefix := []byte(targetDID + ":") 212 - c := b.Cursor() 213 - for k, v := c.Seek(prefix); k != nil && strings.HasPrefix(string(k), string(prefix)); k, v = c.Next() { 214 - var existing models.Notification 215 - if err := json.Unmarshal(v, &existing); err != nil { 216 - continue 217 - } 218 - if existing.Type == notifType && existing.ActorDID == actorDID && existing.SubjectURI == subjectURI { 219 - return b.Delete(k) 220 - } 221 - } 222 - return nil 223 - }) 153 + _, err := idx.db.Exec(` 154 + DELETE FROM notifications 155 + WHERE target_did = ? AND type = ? AND actor_did = ? AND subject_uri = ? 156 + `, targetDID, string(notifType), actorDID, subjectURI) 224 157 if err != nil { 225 158 log.Warn().Err(err).Str("target", targetDID).Str("actor", actorDID).Msg("failed to delete notification") 226 159 } ··· 251 184 // Returns empty string if not found. 252 185 func (idx *FeedIndex) GetCommentSubjectURI(actorDID, rkey string) string { 253 186 var subjectURI string 254 - _ = idx.db.View(func(tx *bolt.Tx) error { 255 - b := tx.Bucket(BucketCommentsByActor) 256 - v := b.Get([]byte(actorDID + ":" + rkey)) 257 - if v != nil { 258 - subjectURI = string(v) 259 - } 260 - return nil 261 - }) 187 + err := idx.db.QueryRow(`SELECT subject_uri FROM comments WHERE actor_did = ? AND rkey = ?`, 188 + actorDID, rkey).Scan(&subjectURI) 189 + if err != nil { 190 + return "" 191 + } 262 192 return subjectURI 263 193 } 264 194 ··· 301 231 } 302 232 303 233 // If this is a reply, also notify the parent comment's author. 304 - // We store the brew's subjectURI (not the parent comment URI) so the 305 - // notification links directly to the brew page with comments. 306 234 if parentURI != "" { 307 235 parentAuthorDID := parseTargetDID(parentURI) 308 236 if parentAuthorDID != "" && parentAuthorDID != actorDID && parentAuthorDID != targetDID {
-220
internal/firehose/suggestions.go
··· 1 - package firehose 2 - 3 - import ( 4 - "encoding/json" 5 - "sort" 6 - "strings" 7 - 8 - "arabica/internal/atproto" 9 - 10 - bolt "go.etcd.io/bbolt" 11 - ) 12 - 13 - // EntitySuggestion represents a suggestion for auto-completing an entity 14 - type EntitySuggestion struct { 15 - Name string `json:"name"` 16 - SourceURI string `json:"source_uri"` 17 - Fields map[string]string `json:"fields"` 18 - Count int `json:"count"` 19 - } 20 - 21 - // entityFieldConfig defines which fields to extract and search for each entity type 22 - type entityFieldConfig struct { 23 - allFields []string 24 - searchFields []string 25 - nameField string 26 - } 27 - 28 - var entityConfigs = map[string]entityFieldConfig{ 29 - atproto.NSIDRoaster: { 30 - allFields: []string{"name", "location", "website"}, 31 - searchFields: []string{"name", "location", "website"}, 32 - nameField: "name", 33 - }, 34 - atproto.NSIDGrinder: { 35 - allFields: []string{"name", "grinderType", "burrType"}, 36 - searchFields: []string{"name", "grinderType", "burrType"}, 37 - nameField: "name", 38 - }, 39 - atproto.NSIDBrewer: { 40 - allFields: []string{"name", "brewerType", "description"}, 41 - searchFields: []string{"name", "brewerType"}, 42 - nameField: "name", 43 - }, 44 - atproto.NSIDBean: { 45 - allFields: []string{"name", "origin", "roastLevel", "process"}, 46 - searchFields: []string{"name", "origin", "roastLevel"}, 47 - nameField: "name", 48 - }, 49 - } 50 - 51 - // SearchEntitySuggestions searches indexed records for entity suggestions matching a query. 52 - // It scans BucketByCollection for the given collection, matches against searchable fields, 53 - // deduplicates by normalized name, and returns results sorted by popularity. 54 - func (idx *FeedIndex) SearchEntitySuggestions(collection, query string, limit int) ([]EntitySuggestion, error) { 55 - if limit <= 0 { 56 - limit = 10 57 - } 58 - 59 - config, ok := entityConfigs[collection] 60 - if !ok { 61 - return nil, nil 62 - } 63 - 64 - queryLower := strings.ToLower(strings.TrimSpace(query)) 65 - if len(queryLower) < 2 { 66 - return nil, nil 67 - } 68 - 69 - // dedupKey -> aggregated suggestion 70 - type candidate struct { 71 - suggestion EntitySuggestion 72 - fieldCount int // number of non-empty fields (to pick best representative) 73 - dids map[string]struct{} 74 - } 75 - candidates := make(map[string]*candidate) 76 - 77 - err := idx.db.View(func(tx *bolt.Tx) error { 78 - byCollection := tx.Bucket(BucketByCollection) 79 - recordsBucket := tx.Bucket(BucketRecords) 80 - 81 - prefix := []byte(collection + ":") 82 - c := byCollection.Cursor() 83 - 84 - for k, _ := c.Seek(prefix); k != nil; k, _ = c.Next() { 85 - if !hasPrefix(k, prefix) { 86 - break 87 - } 88 - 89 - // Extract URI from collection key 90 - uri := extractURIFromCollectionKey(k, collection) 91 - if uri == "" { 92 - continue 93 - } 94 - 95 - data := recordsBucket.Get([]byte(uri)) 96 - if data == nil { 97 - continue 98 - } 99 - 100 - var indexed IndexedRecord 101 - if err := json.Unmarshal(data, &indexed); err != nil { 102 - continue 103 - } 104 - 105 - var recordData map[string]interface{} 106 - if err := json.Unmarshal(indexed.Record, &recordData); err != nil { 107 - continue 108 - } 109 - 110 - // Extract fields 111 - fields := make(map[string]string) 112 - for _, f := range config.allFields { 113 - if v, ok := recordData[f].(string); ok && v != "" { 114 - fields[f] = v 115 - } 116 - } 117 - 118 - name := fields[config.nameField] 119 - if name == "" { 120 - continue 121 - } 122 - 123 - // Check if any searchable field matches the query 124 - matched := false 125 - for _, sf := range config.searchFields { 126 - val := strings.ToLower(fields[sf]) 127 - if val == "" { 128 - continue 129 - } 130 - if strings.HasPrefix(val, queryLower) || strings.Contains(val, queryLower) { 131 - matched = true 132 - break 133 - } 134 - } 135 - if !matched { 136 - continue 137 - } 138 - 139 - // Deduplicate by normalized name 140 - normalizedName := strings.ToLower(strings.TrimSpace(name)) 141 - 142 - if existing, ok := candidates[normalizedName]; ok { 143 - existing.dids[indexed.DID] = struct{}{} 144 - // Keep the record with more complete fields 145 - nonEmpty := 0 146 - for _, v := range fields { 147 - if v != "" { 148 - nonEmpty++ 149 - } 150 - } 151 - if nonEmpty > existing.fieldCount { 152 - existing.suggestion.Name = name 153 - existing.suggestion.Fields = fields 154 - existing.suggestion.SourceURI = indexed.URI 155 - existing.fieldCount = nonEmpty 156 - } 157 - } else { 158 - nonEmpty := 0 159 - for _, v := range fields { 160 - if v != "" { 161 - nonEmpty++ 162 - } 163 - } 164 - candidates[normalizedName] = &candidate{ 165 - suggestion: EntitySuggestion{ 166 - Name: name, 167 - SourceURI: indexed.URI, 168 - Fields: fields, 169 - }, 170 - fieldCount: nonEmpty, 171 - dids: map[string]struct{}{indexed.DID: {}}, 172 - } 173 - } 174 - } 175 - 176 - return nil 177 - }) 178 - if err != nil { 179 - return nil, err 180 - } 181 - 182 - // Build results with counts 183 - results := make([]EntitySuggestion, 0, len(candidates)) 184 - for _, c := range candidates { 185 - c.suggestion.Count = len(c.dids) 186 - results = append(results, c.suggestion) 187 - } 188 - 189 - // Sort: prefix matches first, then by count desc, then alphabetically 190 - sort.Slice(results, func(i, j int) bool { 191 - iPrefix := strings.HasPrefix(strings.ToLower(results[i].Name), queryLower) 192 - jPrefix := strings.HasPrefix(strings.ToLower(results[j].Name), queryLower) 193 - if iPrefix != jPrefix { 194 - return iPrefix 195 - } 196 - if results[i].Count != results[j].Count { 197 - return results[i].Count > results[j].Count 198 - } 199 - return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name) 200 - }) 201 - 202 - if len(results) > limit { 203 - results = results[:limit] 204 - } 205 - 206 - return results, nil 207 - } 208 - 209 - // hasPrefix checks if a byte slice starts with a prefix (avoids import of bytes) 210 - func hasPrefix(s, prefix []byte) bool { 211 - if len(s) < len(prefix) { 212 - return false 213 - } 214 - for i, b := range prefix { 215 - if s[i] != b { 216 - return false 217 - } 218 - } 219 - return true 220 - }
-224
internal/firehose/suggestions_test.go
··· 1 - package firehose 2 - 3 - import ( 4 - "encoding/json" 5 - "os" 6 - "path/filepath" 7 - "testing" 8 - "time" 9 - 10 - "arabica/internal/atproto" 11 - 12 - "github.com/stretchr/testify/assert" 13 - ) 14 - 15 - func newTestFeedIndex(t *testing.T) *FeedIndex { 16 - t.Helper() 17 - dir := t.TempDir() 18 - path := filepath.Join(dir, "test-index.db") 19 - idx, err := NewFeedIndex(path, 1*time.Hour) 20 - assert.NoError(t, err) 21 - t.Cleanup(func() { 22 - idx.Close() 23 - os.Remove(path) 24 - }) 25 - return idx 26 - } 27 - 28 - func insertRecord(t *testing.T, idx *FeedIndex, did, collection, rkey string, fields map[string]interface{}) { 29 - t.Helper() 30 - fields["$type"] = collection 31 - fields["createdAt"] = time.Now().Format(time.RFC3339) 32 - data, err := json.Marshal(fields) 33 - assert.NoError(t, err) 34 - err = idx.UpsertRecord(did, collection, rkey, "cid-"+rkey, data, 0) 35 - assert.NoError(t, err) 36 - } 37 - 38 - func TestSearchEntitySuggestions_PrefixMatch(t *testing.T) { 39 - idx := newTestFeedIndex(t) 40 - 41 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 42 - "name": "Black & White Coffee", 43 - "location": "Raleigh, NC", 44 - }) 45 - insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 46 - "name": "Blue Bottle", 47 - "location": "Oakland, CA", 48 - }) 49 - 50 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "bl", 10) 51 - assert.NoError(t, err) 52 - assert.Len(t, results, 2) 53 - // Both match prefix "bl" 54 - assert.Equal(t, "Black & White Coffee", results[0].Name) 55 - assert.Equal(t, "Blue Bottle", results[1].Name) 56 - } 57 - 58 - func TestSearchEntitySuggestions_CaseInsensitive(t *testing.T) { 59 - idx := newTestFeedIndex(t) 60 - 61 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 62 - "name": "Stumptown Coffee", 63 - }) 64 - 65 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "STUMP", 10) 66 - assert.NoError(t, err) 67 - assert.Len(t, results, 1) 68 - assert.Equal(t, "Stumptown Coffee", results[0].Name) 69 - } 70 - 71 - func TestSearchEntitySuggestions_SubstringMatch(t *testing.T) { 72 - idx := newTestFeedIndex(t) 73 - 74 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 75 - "name": "Red Rooster Coffee", 76 - "location": "Floyd, VA", 77 - }) 78 - 79 - // Search by location substring 80 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "floyd", 10) 81 - assert.NoError(t, err) 82 - assert.Len(t, results, 1) 83 - assert.Equal(t, "Red Rooster Coffee", results[0].Name) 84 - } 85 - 86 - func TestSearchEntitySuggestions_Deduplication(t *testing.T) { 87 - idx := newTestFeedIndex(t) 88 - 89 - // Two users have the same roaster (different case/whitespace) 90 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 91 - "name": "Counter Culture", 92 - "location": "Durham, NC", 93 - "website": "https://counterculturecoffee.com", 94 - }) 95 - insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 96 - "name": "Counter Culture", 97 - }) 98 - 99 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "counter", 10) 100 - assert.NoError(t, err) 101 - assert.Len(t, results, 1) 102 - assert.Equal(t, 2, results[0].Count) 103 - // Should keep the more complete record (alice's with location + website) 104 - assert.Equal(t, "Durham, NC", results[0].Fields["location"]) 105 - } 106 - 107 - func TestSearchEntitySuggestions_Limit(t *testing.T) { 108 - idx := newTestFeedIndex(t) 109 - 110 - for i := 0; i < 5; i++ { 111 - rkey := "r" + string(rune('0'+i)) 112 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDGrinder, rkey, map[string]interface{}{ 113 - "name": "Grinder " + string(rune('A'+i)), 114 - "grinderType": "hand", 115 - }) 116 - } 117 - 118 - results, err := idx.SearchEntitySuggestions(atproto.NSIDGrinder, "grinder", 3) 119 - assert.NoError(t, err) 120 - assert.Len(t, results, 3) 121 - } 122 - 123 - func TestSearchEntitySuggestions_ShortQuery(t *testing.T) { 124 - idx := newTestFeedIndex(t) 125 - 126 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 127 - "name": "ABC", 128 - }) 129 - 130 - // Query too short (< 2 chars) 131 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "a", 10) 132 - assert.NoError(t, err) 133 - assert.Empty(t, results) 134 - 135 - // 2 chars should work 136 - results, err = idx.SearchEntitySuggestions(atproto.NSIDRoaster, "ab", 10) 137 - assert.NoError(t, err) 138 - assert.Len(t, results, 1) 139 - } 140 - 141 - func TestSearchEntitySuggestions_EmptyQuery(t *testing.T) { 142 - idx := newTestFeedIndex(t) 143 - 144 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "", 10) 145 - assert.NoError(t, err) 146 - assert.Empty(t, results) 147 - } 148 - 149 - func TestSearchEntitySuggestions_UnknownCollection(t *testing.T) { 150 - idx := newTestFeedIndex(t) 151 - 152 - results, err := idx.SearchEntitySuggestions("unknown.collection", "test", 10) 153 - assert.NoError(t, err) 154 - assert.Empty(t, results) 155 - } 156 - 157 - func TestSearchEntitySuggestions_GrinderFields(t *testing.T) { 158 - idx := newTestFeedIndex(t) 159 - 160 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDGrinder, "g1", map[string]interface{}{ 161 - "name": "1Zpresso JX Pro", 162 - "grinderType": "hand", 163 - "burrType": "conical", 164 - }) 165 - 166 - results, err := idx.SearchEntitySuggestions(atproto.NSIDGrinder, "1zp", 10) 167 - assert.NoError(t, err) 168 - assert.Len(t, results, 1) 169 - assert.Equal(t, "hand", results[0].Fields["grinderType"]) 170 - assert.Equal(t, "conical", results[0].Fields["burrType"]) 171 - } 172 - 173 - func TestSearchEntitySuggestions_BeanFields(t *testing.T) { 174 - idx := newTestFeedIndex(t) 175 - 176 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDBean, "b1", map[string]interface{}{ 177 - "name": "Ethiopian Yirgacheffe", 178 - "origin": "Ethiopia", 179 - "roastLevel": "Light", 180 - "process": "Washed", 181 - }) 182 - 183 - // Search by origin 184 - results, err := idx.SearchEntitySuggestions(atproto.NSIDBean, "ethiopia", 10) 185 - assert.NoError(t, err) 186 - assert.Len(t, results, 1) 187 - assert.Equal(t, "Ethiopian Yirgacheffe", results[0].Name) 188 - assert.Equal(t, "Light", results[0].Fields["roastLevel"]) 189 - } 190 - 191 - func TestSearchEntitySuggestions_BrewerFields(t *testing.T) { 192 - idx := newTestFeedIndex(t) 193 - 194 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDBrewer, "br1", map[string]interface{}{ 195 - "name": "Hario V60", 196 - "brewerType": "Pour-Over", 197 - }) 198 - 199 - results, err := idx.SearchEntitySuggestions(atproto.NSIDBrewer, "hario", 10) 200 - assert.NoError(t, err) 201 - assert.Len(t, results, 1) 202 - assert.Equal(t, "Pour-Over", results[0].Fields["brewerType"]) 203 - } 204 - 205 - func TestSearchEntitySuggestions_SortOrder(t *testing.T) { 206 - idx := newTestFeedIndex(t) 207 - 208 - // "Alpha Roasters" used by 3 people 209 - insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{"name": "Alpha Roasters"}) 210 - insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{"name": "Alpha Roasters"}) 211 - insertRecord(t, idx, "did:plc:charlie", atproto.NSIDRoaster, "r3", map[string]interface{}{"name": "Alpha Roasters"}) 212 - 213 - // "Alpha Beta" used by 1 person 214 - insertRecord(t, idx, "did:plc:dave", atproto.NSIDRoaster, "r4", map[string]interface{}{"name": "Alpha Beta"}) 215 - 216 - results, err := idx.SearchEntitySuggestions(atproto.NSIDRoaster, "alpha", 10) 217 - assert.NoError(t, err) 218 - assert.Len(t, results, 2) 219 - // More popular first 220 - assert.Equal(t, "Alpha Roasters", results[0].Name) 221 - assert.Equal(t, 3, results[0].Count) 222 - assert.Equal(t, "Alpha Beta", results[1].Name) 223 - assert.Equal(t, 1, results[1].Count) 224 - }
+2 -2
internal/handlers/handlers.go
··· 47 47 48 48 // Moderation dependencies (optional) 49 49 moderationService *moderation.Service 50 - moderationStore *boltstore.ModerationStore 50 + moderationStore moderation.Store 51 51 52 52 // Join request dependencies (optional) 53 53 emailSender *email.Sender ··· 83 83 } 84 84 85 85 // SetModeration configures the handler with moderation service and store 86 - func (h *Handler) SetModeration(svc *moderation.Service, store *boltstore.ModerationStore) { 86 + func (h *Handler) SetModeration(svc *moderation.Service, store moderation.Store) { 87 87 h.moderationService = svc 88 88 h.moderationStore = store 89 89 }
+5 -5
internal/handlers/suggestions.go
··· 6 6 "strconv" 7 7 8 8 "arabica/internal/atproto" 9 - "arabica/internal/firehose" 9 + "arabica/internal/suggestions" 10 10 11 11 "github.com/rs/zerolog/log" 12 12 ) ··· 57 57 return 58 58 } 59 59 60 - suggestions, err := h.feedIndex.SearchEntitySuggestions(nsid, query, limit) 60 + results, err := suggestions.Search(h.feedIndex, nsid, query, limit) 61 61 if err != nil { 62 62 log.Error().Err(err).Str("entity", entityType).Str("query", query).Msg("Failed to search suggestions") 63 63 http.Error(w, "Failed to search suggestions", http.StatusInternalServerError) 64 64 return 65 65 } 66 66 67 - if suggestions == nil { 68 - suggestions = []firehose.EntitySuggestion{} 67 + if results == nil { 68 + results = []suggestions.EntitySuggestion{} 69 69 } 70 70 71 71 w.Header().Set("Content-Type", "application/json") 72 - if err := json.NewEncoder(w).Encode(suggestions); err != nil { 72 + if err := json.NewEncoder(w).Encode(results); err != nil { 73 73 log.Error().Err(err).Msg("Failed to encode suggestions response") 74 74 } 75 75 }
+44
internal/moderation/store.go
··· 1 + package moderation 2 + 3 + import ( 4 + "context" 5 + "time" 6 + ) 7 + 8 + // Store defines the persistence interface for moderation data. 9 + // Implementations must be safe for concurrent use. 10 + type Store interface { 11 + // Hidden records 12 + HideRecord(ctx context.Context, entry HiddenRecord) error 13 + UnhideRecord(ctx context.Context, atURI string) error 14 + IsRecordHidden(ctx context.Context, atURI string) bool 15 + GetHiddenRecord(ctx context.Context, atURI string) (*HiddenRecord, error) 16 + ListHiddenRecords(ctx context.Context) ([]HiddenRecord, error) 17 + 18 + // Blacklist 19 + BlacklistUser(ctx context.Context, entry BlacklistedUser) error 20 + UnblacklistUser(ctx context.Context, did string) error 21 + IsBlacklisted(ctx context.Context, did string) bool 22 + GetBlacklistedUser(ctx context.Context, did string) (*BlacklistedUser, error) 23 + ListBlacklistedUsers(ctx context.Context) ([]BlacklistedUser, error) 24 + 25 + // Reports 26 + CreateReport(ctx context.Context, report Report) error 27 + GetReport(ctx context.Context, id string) (*Report, error) 28 + ListPendingReports(ctx context.Context) ([]Report, error) 29 + ListAllReports(ctx context.Context) ([]Report, error) 30 + ResolveReport(ctx context.Context, id string, status ReportStatus, resolvedBy string) error 31 + CountReportsForURI(ctx context.Context, atURI string) (int, error) 32 + CountReportsForDID(ctx context.Context, did string) (int, error) 33 + CountReportsForDIDSince(ctx context.Context, did string, since time.Time) (int, error) 34 + HasReportedURI(ctx context.Context, reporterDID, subjectURI string) (bool, error) 35 + CountReportsFromUserSince(ctx context.Context, reporterDID string, since time.Time) (int, error) 36 + 37 + // Audit log 38 + LogAction(ctx context.Context, entry AuditEntry) error 39 + ListAuditLog(ctx context.Context, limit int) ([]AuditEntry, error) 40 + 41 + // Auto-hide resets 42 + SetAutoHideReset(ctx context.Context, did string, resetAt time.Time) error 43 + GetAutoHideReset(ctx context.Context, did string) (time.Time, error) 44 + }
+319
internal/suggestions/suggestions.go
··· 1 + package suggestions 2 + 3 + import ( 4 + "encoding/json" 5 + "regexp" 6 + "sort" 7 + "strings" 8 + 9 + "arabica/internal/atproto" 10 + "arabica/internal/firehose" 11 + ) 12 + 13 + // EntitySuggestion represents a suggestion for auto-completing an entity 14 + type EntitySuggestion struct { 15 + Name string `json:"name"` 16 + SourceURI string `json:"source_uri"` 17 + Fields map[string]string `json:"fields"` 18 + Count int `json:"count"` 19 + } 20 + 21 + // RecordSource provides read access to indexed records. 22 + type RecordSource interface { 23 + ListRecordsByCollection(collection string) ([]firehose.IndexedRecord, error) 24 + } 25 + 26 + // entityFieldConfig defines which fields to extract and search for each entity type 27 + type entityFieldConfig struct { 28 + allFields []string 29 + searchFields []string 30 + nameField string 31 + dedupKey func(fields map[string]string) string 32 + } 33 + 34 + var entityConfigs = map[string]entityFieldConfig{ 35 + atproto.NSIDRoaster: { 36 + allFields: []string{"name", "location", "website"}, 37 + searchFields: []string{"name", "location", "website"}, 38 + nameField: "name", 39 + dedupKey: roasterDedupKey, 40 + }, 41 + atproto.NSIDGrinder: { 42 + allFields: []string{"name", "grinderType", "burrType"}, 43 + searchFields: []string{"name", "grinderType", "burrType"}, 44 + nameField: "name", 45 + dedupKey: grinderDedupKey, 46 + }, 47 + atproto.NSIDBrewer: { 48 + allFields: []string{"name", "brewerType", "description"}, 49 + searchFields: []string{"name", "brewerType"}, 50 + nameField: "name", 51 + dedupKey: brewerDedupKey, 52 + }, 53 + atproto.NSIDBean: { 54 + allFields: []string{"name", "origin", "roastLevel", "process"}, 55 + searchFields: []string{"name", "origin", "roastLevel"}, 56 + nameField: "name", 57 + dedupKey: beanDedupKey, 58 + }, 59 + } 60 + 61 + // --- Dedup key functions --- 62 + // Each returns a string that groups "same entity" records together. 63 + // Records with the same dedup key are merged; different keys stay separate. 64 + 65 + // roasterDedupKey: fuzzy name + normalized location. 66 + // "Counter Culture Coffee" in "Durham, NC" vs "Counter Culture" in "Durham" → same. 67 + // "Stumptown" in "Portland" vs "Stumptown" in "NYC" → different. 68 + // Website is not included because it's too sparse — many records lack one, 69 + // causing false splits. Website is still kept in Fields for display. 70 + func roasterDedupKey(fields map[string]string) string { 71 + parts := []string{fuzzyName(fields["name"])} 72 + if loc := normalize(fields["location"]); loc != "" { 73 + parts = append(parts, loc) 74 + } 75 + return strings.Join(parts, "|") 76 + } 77 + 78 + // grinderDedupKey: exact name + grinder type + burr type. 79 + // "1Zpresso JX Pro" hand/conical vs "1Zpresso JX Pro" electric/flat → different. 80 + func grinderDedupKey(fields map[string]string) string { 81 + parts := []string{normalize(fields["name"])} 82 + if gt := normalize(fields["grinderType"]); gt != "" { 83 + parts = append(parts, gt) 84 + } 85 + if bt := normalize(fields["burrType"]); bt != "" { 86 + parts = append(parts, bt) 87 + } 88 + return strings.Join(parts, "|") 89 + } 90 + 91 + // brewerDedupKey: exact name + brewer type. 92 + // "Hario V60" pour-over vs "Hario V60" dripper → different (if someone miscategorized). 93 + func brewerDedupKey(fields map[string]string) string { 94 + parts := []string{normalize(fields["name"])} 95 + if bt := normalize(fields["brewerType"]); bt != "" { 96 + parts = append(parts, bt) 97 + } 98 + return strings.Join(parts, "|") 99 + } 100 + 101 + // beanDedupKey: exact name + origin + process. 102 + // "Yirgacheffe" from Ethiopia/washed vs "Yirgacheffe" from Ethiopia/natural → different. 103 + func beanDedupKey(fields map[string]string) string { 104 + parts := []string{normalize(fields["name"])} 105 + if o := normalize(fields["origin"]); o != "" { 106 + parts = append(parts, o) 107 + } 108 + if p := normalize(fields["process"]); p != "" { 109 + parts = append(parts, p) 110 + } 111 + return strings.Join(parts, "|") 112 + } 113 + 114 + // --- Normalization helpers --- 115 + 116 + // normalize lowercases, trims whitespace, and collapses internal whitespace. 117 + func normalize(s string) string { 118 + return collapseSpaces(strings.ToLower(strings.TrimSpace(s))) 119 + } 120 + 121 + // Common suffixes stripped during fuzzy name normalization for roasters/brewers. 122 + // Order matters: longer suffixes first to avoid partial stripping. 123 + var commonSuffixes = []string{ 124 + "coffee roasters", 125 + "coffee roasting", 126 + "coffee company", 127 + "coffee co", 128 + "roasting company", 129 + "roasting co", 130 + "roasters", 131 + "roasting", 132 + "coffee", 133 + "co.", 134 + } 135 + 136 + // fuzzyName normalizes a name by lowercasing, stripping common coffee-industry 137 + // suffixes, punctuation, and extra whitespace. This lets "Counter Culture Coffee" 138 + // and "Counter Culture" merge, while still keeping genuinely different names apart. 139 + func fuzzyName(name string) string { 140 + s := strings.ToLower(strings.TrimSpace(name)) 141 + 142 + // Strip common suffixes 143 + for _, suffix := range commonSuffixes { 144 + if strings.HasSuffix(s, suffix) { 145 + s = strings.TrimSpace(s[:len(s)-len(suffix)]) 146 + break // only strip one suffix 147 + } 148 + } 149 + 150 + // Remove punctuation (keep letters, digits, spaces) 151 + s = stripPunctuation(s) 152 + 153 + return collapseSpaces(s) 154 + } 155 + 156 + var nonAlphanumSpace = regexp.MustCompile(`[^a-z0-9\s]`) 157 + 158 + func stripPunctuation(s string) string { 159 + return nonAlphanumSpace.ReplaceAllString(s, "") 160 + } 161 + 162 + var multiSpace = regexp.MustCompile(`\s+`) 163 + 164 + func collapseSpaces(s string) string { 165 + return strings.TrimSpace(multiSpace.ReplaceAllString(s, " ")) 166 + } 167 + 168 + // extractDomain pulls the domain from a URL for normalization. 169 + // "https://www.counterculturecoffee.com/shop" → "counterculturecoffee.com" 170 + func extractDomain(rawURL string) string { 171 + if rawURL == "" { 172 + return "" 173 + } 174 + s := strings.ToLower(strings.TrimSpace(rawURL)) 175 + // Strip scheme 176 + if i := strings.Index(s, "://"); i >= 0 { 177 + s = s[i+3:] 178 + } 179 + // Strip www. 180 + s = strings.TrimPrefix(s, "www.") 181 + // Strip path 182 + if i := strings.IndexByte(s, '/'); i >= 0 { 183 + s = s[:i] 184 + } 185 + // Strip port 186 + if i := strings.IndexByte(s, ':'); i >= 0 { 187 + s = s[:i] 188 + } 189 + return s 190 + } 191 + 192 + // Search searches indexed records for entity suggestions matching a query. 193 + // It matches against searchable fields, deduplicates using entity-specific 194 + // keys, and returns results sorted by popularity. 195 + func Search(source RecordSource, collection, query string, limit int) ([]EntitySuggestion, error) { 196 + if limit <= 0 { 197 + limit = 10 198 + } 199 + 200 + config, ok := entityConfigs[collection] 201 + if !ok { 202 + return nil, nil 203 + } 204 + 205 + queryLower := strings.ToLower(strings.TrimSpace(query)) 206 + if len(queryLower) < 2 { 207 + return nil, nil 208 + } 209 + 210 + records, err := source.ListRecordsByCollection(collection) 211 + if err != nil { 212 + return nil, err 213 + } 214 + 215 + // dedupKey -> aggregated suggestion 216 + type candidate struct { 217 + suggestion EntitySuggestion 218 + fieldCount int // number of non-empty fields (to pick best representative) 219 + dids map[string]struct{} 220 + } 221 + candidates := make(map[string]*candidate) 222 + 223 + for _, indexed := range records { 224 + var recordData map[string]any 225 + if err := json.Unmarshal(indexed.Record, &recordData); err != nil { 226 + continue 227 + } 228 + 229 + // Extract fields 230 + fields := make(map[string]string) 231 + for _, f := range config.allFields { 232 + if v, ok := recordData[f].(string); ok && v != "" { 233 + fields[f] = v 234 + } 235 + } 236 + 237 + name := fields[config.nameField] 238 + if name == "" { 239 + continue 240 + } 241 + 242 + // Check if any searchable field matches the query 243 + matched := false 244 + for _, sf := range config.searchFields { 245 + val := strings.ToLower(fields[sf]) 246 + if val == "" { 247 + continue 248 + } 249 + if strings.HasPrefix(val, queryLower) || strings.Contains(val, queryLower) { 250 + matched = true 251 + break 252 + } 253 + } 254 + if !matched { 255 + continue 256 + } 257 + 258 + // Deduplicate using entity-specific key 259 + key := config.dedupKey(fields) 260 + 261 + if existing, ok := candidates[key]; ok { 262 + existing.dids[indexed.DID] = struct{}{} 263 + // Keep the record with more complete fields 264 + nonEmpty := countNonEmpty(fields) 265 + if nonEmpty > existing.fieldCount { 266 + existing.suggestion.Name = name 267 + existing.suggestion.Fields = fields 268 + existing.suggestion.SourceURI = indexed.URI 269 + existing.fieldCount = nonEmpty 270 + } 271 + } else { 272 + candidates[key] = &candidate{ 273 + suggestion: EntitySuggestion{ 274 + Name: name, 275 + SourceURI: indexed.URI, 276 + Fields: fields, 277 + }, 278 + fieldCount: countNonEmpty(fields), 279 + dids: map[string]struct{}{indexed.DID: {}}, 280 + } 281 + } 282 + } 283 + 284 + // Build results with counts 285 + results := make([]EntitySuggestion, 0, len(candidates)) 286 + for _, c := range candidates { 287 + c.suggestion.Count = len(c.dids) 288 + results = append(results, c.suggestion) 289 + } 290 + 291 + // Sort: prefix matches first, then by count desc, then alphabetically 292 + sort.Slice(results, func(i, j int) bool { 293 + iPrefix := strings.HasPrefix(strings.ToLower(results[i].Name), queryLower) 294 + jPrefix := strings.HasPrefix(strings.ToLower(results[j].Name), queryLower) 295 + if iPrefix != jPrefix { 296 + return iPrefix 297 + } 298 + if results[i].Count != results[j].Count { 299 + return results[i].Count > results[j].Count 300 + } 301 + return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name) 302 + }) 303 + 304 + if len(results) > limit { 305 + results = results[:limit] 306 + } 307 + 308 + return results, nil 309 + } 310 + 311 + func countNonEmpty(fields map[string]string) int { 312 + n := 0 313 + for _, v := range fields { 314 + if v != "" { 315 + n++ 316 + } 317 + } 318 + return n 319 + }
+460
internal/suggestions/suggestions_test.go
··· 1 + package suggestions 2 + 3 + import ( 4 + "encoding/json" 5 + "os" 6 + "path/filepath" 7 + "testing" 8 + "time" 9 + 10 + "arabica/internal/atproto" 11 + "arabica/internal/firehose" 12 + 13 + "github.com/stretchr/testify/assert" 14 + ) 15 + 16 + func newTestFeedIndex(t *testing.T) *firehose.FeedIndex { 17 + t.Helper() 18 + dir := t.TempDir() 19 + path := filepath.Join(dir, "test-index.db") 20 + idx, err := firehose.NewFeedIndex(path, 1*time.Hour) 21 + assert.NoError(t, err) 22 + t.Cleanup(func() { 23 + idx.Close() 24 + os.Remove(path) 25 + }) 26 + return idx 27 + } 28 + 29 + func insertRecord(t *testing.T, idx *firehose.FeedIndex, did, collection, rkey string, fields map[string]interface{}) { 30 + t.Helper() 31 + fields["$type"] = collection 32 + fields["createdAt"] = time.Now().Format(time.RFC3339) 33 + data, err := json.Marshal(fields) 34 + assert.NoError(t, err) 35 + err = idx.UpsertRecord(did, collection, rkey, "cid-"+rkey, data, 0) 36 + assert.NoError(t, err) 37 + } 38 + 39 + // --- Helper unit tests --- 40 + 41 + func TestFuzzyName(t *testing.T) { 42 + tests := []struct { 43 + input string 44 + want string 45 + }{ 46 + {"Counter Culture Coffee", "counter culture"}, 47 + {"Counter Culture", "counter culture"}, 48 + {"counter culture coffee roasters", "counter culture"}, 49 + {"Stumptown Coffee Roasters", "stumptown"}, 50 + {"Stumptown", "stumptown"}, 51 + {"Black & White Coffee", "black white"}, 52 + {" Some Roasting Company ", "some"}, 53 + {"Heart Coffee Roasters", "heart"}, 54 + {"Heart Roasters", "heart"}, 55 + {"Heart", "heart"}, 56 + } 57 + for _, tt := range tests { 58 + assert.Equal(t, tt.want, fuzzyName(tt.input), "fuzzyName(%q)", tt.input) 59 + } 60 + } 61 + 62 + func TestExtractDomain(t *testing.T) { 63 + tests := []struct { 64 + input string 65 + want string 66 + }{ 67 + {"https://www.counterculturecoffee.com/shop", "counterculturecoffee.com"}, 68 + {"http://example.com", "example.com"}, 69 + {"https://example.com:8080/path", "example.com"}, 70 + {"www.example.com", "example.com"}, 71 + {"example.com", "example.com"}, 72 + {"", ""}, 73 + } 74 + for _, tt := range tests { 75 + assert.Equal(t, tt.want, extractDomain(tt.input), "extractDomain(%q)", tt.input) 76 + } 77 + } 78 + 79 + func TestNormalize(t *testing.T) { 80 + assert.Equal(t, "durham, nc", normalize(" Durham, NC ")) 81 + assert.Equal(t, "oakland ca", normalize("Oakland CA")) 82 + assert.Equal(t, "", normalize("")) 83 + } 84 + 85 + // --- Roaster dedup tests --- 86 + 87 + func TestRoasterDedup_SameNameDifferentLocation(t *testing.T) { 88 + idx := newTestFeedIndex(t) 89 + 90 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 91 + "name": "Stumptown Coffee", 92 + "location": "Portland, OR", 93 + }) 94 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 95 + "name": "Stumptown Coffee", 96 + "location": "New York, NY", 97 + }) 98 + 99 + results, err := Search(idx, atproto.NSIDRoaster, "stumptown", 10) 100 + assert.NoError(t, err) 101 + assert.Len(t, results, 2, "different locations should produce separate suggestions") 102 + } 103 + 104 + func TestRoasterDedup_FuzzyNameMerge(t *testing.T) { 105 + idx := newTestFeedIndex(t) 106 + 107 + // Same roaster, different name variations, same location 108 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 109 + "name": "Counter Culture Coffee", 110 + "location": "Durham, NC", 111 + "website": "https://counterculturecoffee.com", 112 + }) 113 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 114 + "name": "Counter Culture Coffee Roasters", 115 + "location": "Durham, NC", 116 + }) 117 + 118 + results, err := Search(idx, atproto.NSIDRoaster, "counter", 10) 119 + assert.NoError(t, err) 120 + assert.Len(t, results, 1, "fuzzy name + same location should merge") 121 + assert.Equal(t, 2, results[0].Count) 122 + } 123 + 124 + func TestRoasterDedup_NoLocationMerges(t *testing.T) { 125 + idx := newTestFeedIndex(t) 126 + 127 + // Both have no location — should merge on fuzzy name alone 128 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 129 + "name": "Blue Bottle Coffee", 130 + }) 131 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 132 + "name": "Blue Bottle", 133 + }) 134 + 135 + results, err := Search(idx, atproto.NSIDRoaster, "blue", 10) 136 + assert.NoError(t, err) 137 + assert.Len(t, results, 1, "same fuzzy name with no location should merge") 138 + assert.Equal(t, 2, results[0].Count) 139 + } 140 + 141 + // --- Grinder dedup tests --- 142 + 143 + func TestGrinderDedup_SameNameDifferentType(t *testing.T) { 144 + idx := newTestFeedIndex(t) 145 + 146 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDGrinder, "g1", map[string]interface{}{ 147 + "name": "Baratza Encore", 148 + "grinderType": "electric", 149 + "burrType": "conical", 150 + }) 151 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDGrinder, "g2", map[string]interface{}{ 152 + "name": "Baratza Encore", 153 + "grinderType": "electric", 154 + "burrType": "flat", 155 + }) 156 + 157 + results, err := Search(idx, atproto.NSIDGrinder, "baratza", 10) 158 + assert.NoError(t, err) 159 + assert.Len(t, results, 2, "different burr types should produce separate suggestions") 160 + } 161 + 162 + func TestGrinderDedup_SameEverythingMerges(t *testing.T) { 163 + idx := newTestFeedIndex(t) 164 + 165 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDGrinder, "g1", map[string]interface{}{ 166 + "name": "1Zpresso JX Pro", 167 + "grinderType": "hand", 168 + "burrType": "conical", 169 + }) 170 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDGrinder, "g2", map[string]interface{}{ 171 + "name": "1Zpresso JX Pro", 172 + "grinderType": "hand", 173 + "burrType": "conical", 174 + }) 175 + 176 + results, err := Search(idx, atproto.NSIDGrinder, "1zp", 10) 177 + assert.NoError(t, err) 178 + assert.Len(t, results, 1) 179 + assert.Equal(t, 2, results[0].Count) 180 + } 181 + 182 + // --- Brewer dedup tests --- 183 + 184 + func TestBrewerDedup_SameNameDifferentType(t *testing.T) { 185 + idx := newTestFeedIndex(t) 186 + 187 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBrewer, "br1", map[string]interface{}{ 188 + "name": "Hario V60", 189 + "brewerType": "pour-over", 190 + }) 191 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDBrewer, "br2", map[string]interface{}{ 192 + "name": "Hario V60", 193 + "brewerType": "dripper", 194 + }) 195 + 196 + results, err := Search(idx, atproto.NSIDBrewer, "hario", 10) 197 + assert.NoError(t, err) 198 + assert.Len(t, results, 2, "different brewer types should produce separate suggestions") 199 + } 200 + 201 + func TestBrewerDedup_SameNameSameTypeMerges(t *testing.T) { 202 + idx := newTestFeedIndex(t) 203 + 204 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBrewer, "br1", map[string]interface{}{ 205 + "name": "AeroPress", 206 + "brewerType": "immersion", 207 + }) 208 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDBrewer, "br2", map[string]interface{}{ 209 + "name": "AeroPress", 210 + "brewerType": "immersion", 211 + }) 212 + 213 + results, err := Search(idx, atproto.NSIDBrewer, "aero", 10) 214 + assert.NoError(t, err) 215 + assert.Len(t, results, 1) 216 + assert.Equal(t, 2, results[0].Count) 217 + } 218 + 219 + // --- Bean dedup tests --- 220 + 221 + func TestBeanDedup_SameNameDifferentProcess(t *testing.T) { 222 + idx := newTestFeedIndex(t) 223 + 224 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBean, "b1", map[string]interface{}{ 225 + "name": "Yirgacheffe", 226 + "origin": "Ethiopia", 227 + "process": "Washed", 228 + }) 229 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDBean, "b2", map[string]interface{}{ 230 + "name": "Yirgacheffe", 231 + "origin": "Ethiopia", 232 + "process": "Natural", 233 + }) 234 + 235 + results, err := Search(idx, atproto.NSIDBean, "yirga", 10) 236 + assert.NoError(t, err) 237 + assert.Len(t, results, 2, "different processes should produce separate suggestions") 238 + } 239 + 240 + func TestBeanDedup_SameNameDifferentOrigin(t *testing.T) { 241 + idx := newTestFeedIndex(t) 242 + 243 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBean, "b1", map[string]interface{}{ 244 + "name": "Gesha", 245 + "origin": "Panama", 246 + }) 247 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDBean, "b2", map[string]interface{}{ 248 + "name": "Gesha", 249 + "origin": "Ethiopia", 250 + }) 251 + 252 + results, err := Search(idx, atproto.NSIDBean, "gesha", 10) 253 + assert.NoError(t, err) 254 + assert.Len(t, results, 2, "different origins should produce separate suggestions") 255 + } 256 + 257 + func TestBeanDedup_SameEverythingMerges(t *testing.T) { 258 + idx := newTestFeedIndex(t) 259 + 260 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBean, "b1", map[string]interface{}{ 261 + "name": "Ethiopian Yirgacheffe", 262 + "origin": "Ethiopia", 263 + "roastLevel": "Light", 264 + "process": "Washed", 265 + }) 266 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDBean, "b2", map[string]interface{}{ 267 + "name": "Ethiopian Yirgacheffe", 268 + "origin": "Ethiopia", 269 + "process": "Washed", 270 + }) 271 + 272 + results, err := Search(idx, atproto.NSIDBean, "ethiopia", 10) 273 + assert.NoError(t, err) 274 + assert.Len(t, results, 1) 275 + assert.Equal(t, 2, results[0].Count) 276 + } 277 + 278 + // --- General search tests --- 279 + 280 + func TestSearch_PrefixMatch(t *testing.T) { 281 + idx := newTestFeedIndex(t) 282 + 283 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 284 + "name": "Black & White Coffee", 285 + "location": "Raleigh, NC", 286 + }) 287 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 288 + "name": "Blue Bottle", 289 + "location": "Oakland, CA", 290 + }) 291 + 292 + results, err := Search(idx, atproto.NSIDRoaster, "bl", 10) 293 + assert.NoError(t, err) 294 + assert.Len(t, results, 2) 295 + assert.Equal(t, "Black & White Coffee", results[0].Name) 296 + assert.Equal(t, "Blue Bottle", results[1].Name) 297 + } 298 + 299 + func TestSearch_CaseInsensitive(t *testing.T) { 300 + idx := newTestFeedIndex(t) 301 + 302 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 303 + "name": "Stumptown Coffee", 304 + }) 305 + 306 + results, err := Search(idx, atproto.NSIDRoaster, "STUMP", 10) 307 + assert.NoError(t, err) 308 + assert.Len(t, results, 1) 309 + assert.Equal(t, "Stumptown Coffee", results[0].Name) 310 + } 311 + 312 + func TestSearch_SubstringMatch(t *testing.T) { 313 + idx := newTestFeedIndex(t) 314 + 315 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 316 + "name": "Red Rooster Coffee", 317 + "location": "Floyd, VA", 318 + }) 319 + 320 + results, err := Search(idx, atproto.NSIDRoaster, "floyd", 10) 321 + assert.NoError(t, err) 322 + assert.Len(t, results, 1) 323 + assert.Equal(t, "Red Rooster Coffee", results[0].Name) 324 + } 325 + 326 + func TestSearch_Deduplication(t *testing.T) { 327 + idx := newTestFeedIndex(t) 328 + 329 + // Two users have the same roaster, same location (one with website, one without) 330 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 331 + "name": "Counter Culture Coffee", 332 + "location": "Durham, NC", 333 + "website": "https://counterculturecoffee.com", 334 + }) 335 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{ 336 + "name": "Counter Culture", 337 + "location": "Durham, NC", 338 + }) 339 + 340 + results, err := Search(idx, atproto.NSIDRoaster, "counter", 10) 341 + assert.NoError(t, err) 342 + assert.Len(t, results, 1) 343 + assert.Equal(t, 2, results[0].Count) 344 + assert.Equal(t, "Durham, NC", results[0].Fields["location"]) 345 + } 346 + 347 + func TestSearch_Limit(t *testing.T) { 348 + idx := newTestFeedIndex(t) 349 + 350 + for i := 0; i < 5; i++ { 351 + rkey := "r" + string(rune('0'+i)) 352 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDGrinder, rkey, map[string]interface{}{ 353 + "name": "Grinder " + string(rune('A'+i)), 354 + "grinderType": "hand", 355 + }) 356 + } 357 + 358 + results, err := Search(idx, atproto.NSIDGrinder, "grinder", 3) 359 + assert.NoError(t, err) 360 + assert.Len(t, results, 3) 361 + } 362 + 363 + func TestSearch_ShortQuery(t *testing.T) { 364 + idx := newTestFeedIndex(t) 365 + 366 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{ 367 + "name": "ABC", 368 + }) 369 + 370 + results, err := Search(idx, atproto.NSIDRoaster, "a", 10) 371 + assert.NoError(t, err) 372 + assert.Empty(t, results) 373 + 374 + results, err = Search(idx, atproto.NSIDRoaster, "ab", 10) 375 + assert.NoError(t, err) 376 + assert.Len(t, results, 1) 377 + } 378 + 379 + func TestSearch_EmptyQuery(t *testing.T) { 380 + idx := newTestFeedIndex(t) 381 + 382 + results, err := Search(idx, atproto.NSIDRoaster, "", 10) 383 + assert.NoError(t, err) 384 + assert.Empty(t, results) 385 + } 386 + 387 + func TestSearch_UnknownCollection(t *testing.T) { 388 + idx := newTestFeedIndex(t) 389 + 390 + results, err := Search(idx, "unknown.collection", "test", 10) 391 + assert.NoError(t, err) 392 + assert.Empty(t, results) 393 + } 394 + 395 + func TestSearch_GrinderFields(t *testing.T) { 396 + idx := newTestFeedIndex(t) 397 + 398 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDGrinder, "g1", map[string]interface{}{ 399 + "name": "1Zpresso JX Pro", 400 + "grinderType": "hand", 401 + "burrType": "conical", 402 + }) 403 + 404 + results, err := Search(idx, atproto.NSIDGrinder, "1zp", 10) 405 + assert.NoError(t, err) 406 + assert.Len(t, results, 1) 407 + assert.Equal(t, "hand", results[0].Fields["grinderType"]) 408 + assert.Equal(t, "conical", results[0].Fields["burrType"]) 409 + } 410 + 411 + func TestSearch_BeanFields(t *testing.T) { 412 + idx := newTestFeedIndex(t) 413 + 414 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBean, "b1", map[string]interface{}{ 415 + "name": "Ethiopian Yirgacheffe", 416 + "origin": "Ethiopia", 417 + "roastLevel": "Light", 418 + "process": "Washed", 419 + }) 420 + 421 + results, err := Search(idx, atproto.NSIDBean, "ethiopia", 10) 422 + assert.NoError(t, err) 423 + assert.Len(t, results, 1) 424 + assert.Equal(t, "Ethiopian Yirgacheffe", results[0].Name) 425 + assert.Equal(t, "Light", results[0].Fields["roastLevel"]) 426 + } 427 + 428 + func TestSearch_BrewerFields(t *testing.T) { 429 + idx := newTestFeedIndex(t) 430 + 431 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDBrewer, "br1", map[string]interface{}{ 432 + "name": "Hario V60", 433 + "brewerType": "Pour-Over", 434 + }) 435 + 436 + results, err := Search(idx, atproto.NSIDBrewer, "hario", 10) 437 + assert.NoError(t, err) 438 + assert.Len(t, results, 1) 439 + assert.Equal(t, "Pour-Over", results[0].Fields["brewerType"]) 440 + } 441 + 442 + func TestSearch_SortOrder(t *testing.T) { 443 + idx := newTestFeedIndex(t) 444 + 445 + // "Alpha Roasters" used by 3 people (same location so they merge) 446 + insertRecord(t, idx, "did:plc:alice", atproto.NSIDRoaster, "r1", map[string]interface{}{"name": "Alpha Roasters"}) 447 + insertRecord(t, idx, "did:plc:bob", atproto.NSIDRoaster, "r2", map[string]interface{}{"name": "Alpha Roasters"}) 448 + insertRecord(t, idx, "did:plc:charlie", atproto.NSIDRoaster, "r3", map[string]interface{}{"name": "Alpha Roasters"}) 449 + 450 + // "Alpha Beta" used by 1 person 451 + insertRecord(t, idx, "did:plc:dave", atproto.NSIDRoaster, "r4", map[string]interface{}{"name": "Alpha Beta"}) 452 + 453 + results, err := Search(idx, atproto.NSIDRoaster, "alpha", 10) 454 + assert.NoError(t, err) 455 + assert.Len(t, results, 2) 456 + assert.Equal(t, "Alpha Roasters", results[0].Name) 457 + assert.Equal(t, 3, results[0].Count) 458 + assert.Equal(t, "Alpha Beta", results[1].Name) 459 + assert.Equal(t, 1, results[1].Count) 460 + }