like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto

fix process_new func

karitham.dev d4de39c1 e96e9547

verified
+246 -79
+1 -1
main.go
··· 553 553 a.log.Info("Fetched existing records", slog.Int("count", len(existingRecords))) 554 554 555 555 published, _ := storage.GetPublished(authClient.DID()) 556 - newRecords := sync.FilterNew(records, existingRecords, published) 556 + newRecords := sync.FilterNew(records, existingRecords, published, tolerance) 557 557 skippedCount := len(records) - len(newRecords) 558 558 a.log.Info("Filtered to new records", 559 559 slog.Int("count", len(newRecords)),
+7 -3
sync/publish_test.go
··· 351 351 return 352 352 } 353 353 354 - for i, key := range gotKeys { 355 - if key != tt.wantKeys[i] { 356 - t.Errorf("IteratePublished() key[%d] = %s, want %s", i, key, tt.wantKeys[i]) 354 + wantSet := make(map[string]bool) 355 + for _, k := range tt.wantKeys { 356 + wantSet[k] = true 357 + } 358 + for _, k := range gotKeys { 359 + if !wantSet[k] { 360 + t.Errorf("IteratePublished() got unexpected key %s", k) 357 361 } 358 362 } 359 363 })
+19 -10
sync/record.go
··· 149 149 return keys 150 150 } 151 151 152 - func FilterNew(records []*PlayRecord, existing []ExistingRecord, processed map[string]bool) []*PlayRecord { 153 - existingKeys := make(map[string]bool) 152 + func FilterNew(records []*PlayRecord, existing []ExistingRecord, processed map[string]bool, tolerance time.Duration) []*PlayRecord { 153 + existingSet := make(map[*PlayRecord]bool) 154 154 for _, rec := range existing { 155 - key := CreateRecordKey(rec.Value) 156 - if key == "|||" { 157 - continue 158 - } 159 - existingKeys[key] = true 155 + existingSet[rec.Value] = true 160 156 } 161 157 162 158 var newRecords []*PlayRecord 163 159 for _, record := range records { 164 - key := CreateRecordKey(record) 165 - if !existingKeys[key] && !processed[key] { 166 - newRecords = append(newRecords, record) 160 + if processed != nil && processed[CreateRecordKey(record)] { 161 + continue 162 + } 163 + 164 + if len(existingSet) > 0 { 165 + isDup := false 166 + for existingRec := range existingSet { 167 + if record.sameAs(existingRec, tolerance) { 168 + isDup = true 169 + break 170 + } 171 + } 172 + if isDup { 173 + continue 174 + } 167 175 } 176 + newRecords = append(newRecords, record) 168 177 } 169 178 return newRecords 170 179 }
+219 -65
sync/sync_test.go
··· 126 126 } 127 127 } 128 128 129 - func TestFilterNewExcludesExisting(t *testing.T) { 130 - records := []*PlayRecord{ 129 + func TestFilterNew(t *testing.T) { 130 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 131 + processedKey := CreateRecordKey(&PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}) 132 + 133 + tests := []struct { 134 + name string 135 + records []*PlayRecord 136 + existing []ExistingRecord 137 + processed map[string]bool 138 + tolerance time.Duration 139 + wantNewCount int 140 + wantNewTracks []string 141 + }{ 131 142 { 132 - TrackName: "Song A", 133 - Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 134 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 143 + name: "excludes exact matches", 144 + records: []*PlayRecord{ 145 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 146 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Minute)}}, 147 + {TrackName: "Song C", Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}}, 148 + }, 149 + existing: []ExistingRecord{ 150 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Minute)}}}, 151 + }, 152 + tolerance: 5 * time.Minute, 153 + wantNewCount: 2, 154 + wantNewTracks: []string{"Song A", "Song C"}, 135 155 }, 136 156 { 137 - TrackName: "Song B", 138 - Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 139 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 1, 0, 0, time.UTC)}, 157 + name: "returns all when none exist", 158 + records: []*PlayRecord{ 159 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 160 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Minute)}}, 161 + }, 162 + existing: []ExistingRecord{}, 163 + tolerance: 5 * time.Minute, 164 + wantNewCount: 2, 165 + wantNewTracks: []string{"Song A", "Song B"}, 140 166 }, 141 167 { 142 - TrackName: "Song C", 143 - Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, 144 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 2, 0, 0, time.UTC)}, 168 + name: "detects duplicates within tolerance", 169 + records: []*PlayRecord{ 170 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 171 + {TrackName: "Different Song", Artists: []PlayRecordArtist{{ArtistName: "Different Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 172 + }, 173 + existing: []ExistingRecord{ 174 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}}, 175 + }, 176 + tolerance: 5 * time.Minute, 177 + wantNewCount: 1, 178 + wantNewTracks: []string{"Different Song"}, 145 179 }, 146 - } 147 - 148 - existing := []ExistingRecord{ 149 180 { 150 - URI: "at://did:example:user/fm.teal.alpha.feed.play/abc123", 151 - CID: "bafyreabc123", 152 - Value: &PlayRecord{ 153 - TrackName: "Song B", 154 - Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 155 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 1, 0, 0, time.UTC)}, 181 + name: "excludes via processed map", 182 + records: []*PlayRecord{ 183 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 184 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Minute)}}, 156 185 }, 186 + existing: []ExistingRecord{}, 187 + processed: map[string]bool{ 188 + processedKey: true, 189 + }, 190 + tolerance: 5 * time.Minute, 191 + wantNewCount: 1, 192 + wantNewTracks: []string{"Song B"}, 157 193 }, 158 - } 159 - 160 - newRecords := FilterNew(records, existing, nil) 161 - 162 - if len(newRecords) != 2 { 163 - t.Errorf("len(newRecords) = %d, want 2", len(newRecords)) 164 - } 165 - 166 - foundSongA := false 167 - foundSongB := false 168 - foundSongC := false 169 - for _, rec := range newRecords { 170 - switch rec.TrackName { 171 - case "Song A": 172 - foundSongA = true 173 - case "Song B": 174 - foundSongB = true 175 - case "Song C": 176 - foundSongC = true 177 - } 178 - } 179 - 180 - if !foundSongA { 181 - t.Error("Song A should be in new records") 182 - } 183 - if foundSongB { 184 - t.Error("Song B should not be in new records (it exists)") 185 - } 186 - if !foundSongC { 187 - t.Error("Song C should be in new records") 188 - } 189 - } 190 - 191 - func TestFilterNewReturnsAllWhenNoneExist(t *testing.T) { 192 - records := []*PlayRecord{ 194 + { 195 + name: "empty records returns nothing", 196 + records: []*PlayRecord{}, 197 + existing: []ExistingRecord{{URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}}, 198 + tolerance: 5 * time.Minute, 199 + wantNewCount: 0, 200 + wantNewTracks: []string{}, 201 + }, 193 202 { 194 - TrackName: "Song A", 195 - Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 196 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 203 + name: "nil processed map works", 204 + records: []*PlayRecord{ 205 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 206 + }, 207 + existing: []ExistingRecord{}, 208 + processed: nil, 209 + tolerance: 5 * time.Minute, 210 + wantNewCount: 1, 211 + wantNewTracks: []string{"Song A"}, 197 212 }, 198 213 { 199 - TrackName: "Song B", 200 - Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 201 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 1, 0, 0, time.UTC)}, 214 + name: "nil existing records works", 215 + records: []*PlayRecord{ 216 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 217 + }, 218 + existing: nil, 219 + tolerance: 5 * time.Minute, 220 + wantNewCount: 1, 221 + wantNewTracks: []string{"Song A"}, 222 + }, 223 + { 224 + name: "zero tolerance requires exact time match", 225 + records: []*PlayRecord{ 226 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}}, 227 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Second)}}, 228 + }, 229 + existing: []ExistingRecord{ 230 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}}}, 231 + }, 232 + tolerance: 0, 233 + wantNewCount: 1, 234 + wantNewTracks: []string{"Same Song"}, 235 + }, 236 + { 237 + name: "matches multiple existing records", 238 + records: []*PlayRecord{ 239 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 240 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Minute)}}, 241 + {TrackName: "Song C", Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}}, 242 + }, 243 + existing: []ExistingRecord{ 244 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}, 245 + {URI: "at://did:example/user/play/def", Value: &PlayRecord{TrackName: "Song C", Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}}}, 246 + }, 247 + tolerance: 5 * time.Minute, 248 + wantNewCount: 1, 249 + wantNewTracks: []string{"Song B"}, 250 + }, 251 + { 252 + name: "time at exact tolerance boundary matches", 253 + records: []*PlayRecord{ 254 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime.Add(5 * time.Minute)}}, 255 + }, 256 + existing: []ExistingRecord{ 257 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}, 258 + }, 259 + tolerance: 5 * time.Minute, 260 + wantNewCount: 0, 261 + wantNewTracks: []string{}, 262 + }, 263 + { 264 + name: "time just beyond tolerance does not match", 265 + records: []*PlayRecord{ 266 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime.Add(5*time.Minute + time.Second)}}, 267 + }, 268 + existing: []ExistingRecord{ 269 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}, 270 + }, 271 + tolerance: 5 * time.Minute, 272 + wantNewCount: 1, 273 + wantNewTracks: []string{"Song A"}, 274 + }, 275 + { 276 + name: "different artist does not match", 277 + records: []*PlayRecord{ 278 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 279 + }, 280 + existing: []ExistingRecord{ 281 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Different Artist"}}, PlayedTime: Timestamp{Time: baseTime}}}, 282 + }, 283 + tolerance: 5 * time.Minute, 284 + wantNewCount: 1, 285 + wantNewTracks: []string{"Same Song"}, 286 + }, 287 + { 288 + name: "different track does not match", 289 + records: []*PlayRecord{ 290 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}}, 291 + }, 292 + existing: []ExistingRecord{ 293 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}}}, 294 + }, 295 + tolerance: 5 * time.Minute, 296 + wantNewCount: 1, 297 + wantNewTracks: []string{"Song A"}, 298 + }, 299 + { 300 + name: "processed takes precedence over existing check", 301 + records: []*PlayRecord{ 302 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 303 + }, 304 + existing: []ExistingRecord{ 305 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}, 306 + }, 307 + processed: map[string]bool{ 308 + processedKey: true, 309 + }, 310 + tolerance: 5 * time.Minute, 311 + wantNewCount: 0, 312 + wantNewTracks: []string{}, 313 + }, 314 + { 315 + name: "same_record_processed_and_matches_existing_returns_nothing", 316 + records: []*PlayRecord{ 317 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 318 + }, 319 + existing: []ExistingRecord{ 320 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}, 321 + }, 322 + processed: map[string]bool{ 323 + processedKey: true, 324 + }, 325 + tolerance: 5 * time.Minute, 326 + wantNewCount: 0, 327 + wantNewTracks: []string{}, 328 + }, 329 + { 330 + name: "processed_skips_record_regardless_of_existing", 331 + records: []*PlayRecord{ 332 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 333 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Minute)}}, 334 + }, 335 + existing: []ExistingRecord{ 336 + {URI: "at://did:example/user/play/abc", Value: &PlayRecord{TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}}, 337 + }, 338 + processed: map[string]bool{ 339 + processedKey: true, 340 + }, 341 + tolerance: 5 * time.Minute, 342 + wantNewCount: 1, 343 + wantNewTracks: []string{"Song B"}, 202 344 }, 203 345 } 204 346 205 - existing := []ExistingRecord{} 347 + for _, tt := range tests { 348 + t.Run(tt.name, func(t *testing.T) { 349 + newRecords := FilterNew(tt.records, tt.existing, tt.processed, tt.tolerance) 206 350 207 - newRecords := FilterNew(records, existing, nil) 351 + if len(newRecords) != tt.wantNewCount { 352 + t.Errorf("FilterNew() returned %d records, want %d", len(newRecords), tt.wantNewCount) 353 + } 208 354 209 - if len(newRecords) != 2 { 210 - t.Errorf("len(newRecords) = %d, want 2", len(newRecords)) 355 + wantSet := make(map[string]bool) 356 + for _, tr := range tt.wantNewTracks { 357 + wantSet[tr] = true 358 + } 359 + for _, rec := range newRecords { 360 + if !wantSet[rec.TrackName] { 361 + t.Errorf("FilterNew() returned unexpected track %q", rec.TrackName) 362 + } 363 + } 364 + }) 211 365 } 212 366 } 213 367