A third party ATProto appview

Refactor worker transaction handling and data conversion

Co-authored-by: dollspacegay <dollspacegay@gmail.com>

+82 -40
+4 -1
python-firehose/backfill_service.py
··· 416 416 """Save backfill progress to database""" 417 417 try: 418 418 async with self.db_pool.acquire() as conn: 419 + # Convert timezone-aware datetime to naive for PostgreSQL compatibility 420 + last_update_naive = self.progress.last_update_time.replace(tzinfo=None) if self.progress.last_update_time.tzinfo else self.progress.last_update_time 421 + 419 422 # Upsert progress to firehose_cursor table 420 423 await conn.execute(""" 421 424 INSERT INTO firehose_cursor (service, cursor, last_event_time, updated_at) ··· 428 431 """, 429 432 "backfill", 430 433 str(self.progress.current_cursor) if self.progress.current_cursor else None, 431 - self.progress.last_update_time 434 + last_update_naive 432 435 ) 433 436 except Exception as e: 434 437 logger.error(f"[BACKFILL] Error saving progress: {e}")
+55 -23
python-firehose/redis_consumer_worker.py
··· 414 414 # Extract avatar/banner CIDs 415 415 avatar = record.get('avatar') 416 416 if avatar and isinstance(avatar, dict): 417 - avatar_url = avatar.get('ref', {}).get('$link') if isinstance(avatar.get('ref'), dict) else avatar.get('ref') 417 + ref = avatar.get('ref') 418 + if isinstance(ref, dict): 419 + avatar_url = ref.get('$link') 420 + else: 421 + avatar_url = ref 422 + # Convert CID objects to strings 423 + if avatar_url and not isinstance(avatar_url, str): 424 + avatar_url = str(avatar_url) 418 425 419 426 banner = record.get('banner') 420 427 if banner and isinstance(banner, dict): 421 - banner_url = banner.get('ref', {}).get('$link') if isinstance(banner.get('ref'), dict) else banner.get('ref') 428 + ref = banner.get('ref') 429 + if isinstance(ref, dict): 430 + banner_url = ref.get('$link') 431 + else: 432 + banner_url = ref 433 + # Convert CID objects to strings 434 + if banner_url and not isinstance(banner_url, str): 435 + banner_url = str(banner_url) 422 436 423 437 profile_json = json.dumps(record) 424 438 ··· 458 472 459 473 avatar = record.get('avatar') 460 474 if avatar and isinstance(avatar, dict): 461 - avatar_url = avatar.get('ref', {}).get('$link') if isinstance(avatar.get('ref'), dict) else avatar.get('ref') 475 + ref = avatar.get('ref') 476 + if isinstance(ref, dict): 477 + avatar_url = ref.get('$link') 478 + else: 479 + avatar_url = ref 480 + # Convert CID objects to strings 481 + if avatar_url and not isinstance(avatar_url, str): 482 + avatar_url = str(avatar_url) 462 483 463 484 created_at = self.safe_date(record.get('createdAt')) 464 485 ··· 522 543 523 544 avatar = record.get('avatar') 524 545 if avatar and isinstance(avatar, dict): 525 - avatar_url = avatar.get('ref', {}).get('$link') if isinstance(avatar.get('ref'), dict) else avatar.get('ref') 546 + ref = avatar.get('ref') 547 + if isinstance(ref, dict): 548 + avatar_url = ref.get('$link') 549 + else: 550 + avatar_url = ref 551 + # Convert CID objects to strings 552 + if avatar_url and not isinstance(avatar_url, str): 553 + avatar_url = str(avatar_url) 526 554 527 555 created_at = self.safe_date(record.get('createdAt')) 528 556 ··· 800 828 if not repo or not ops: 801 829 return 802 830 803 - # Acquire database connection and process in transaction 831 + # Acquire database connection 804 832 async with self.db.acquire() as conn: 805 - async with conn.transaction(): 806 - for op in ops: 807 - action = op.get('action') 808 - path = op.get('path') 809 - record = op.get('record') 810 - cid = op.get('cid') 811 - 812 - if not path: 813 - continue 814 - 815 - collection = path.split("/")[0] 816 - uri = f"at://{repo}/{path}" 817 - 818 - try: 833 + # Process each operation in its own transaction to prevent 834 + # errors in one operation from aborting subsequent operations 835 + for op in ops: 836 + action = op.get('action') 837 + path = op.get('path') 838 + record = op.get('record') 839 + cid = op.get('cid') 840 + 841 + if not path: 842 + continue 843 + 844 + collection = path.split("/")[0] 845 + uri = f"at://{repo}/{path}" 846 + 847 + try: 848 + async with conn.transaction(): 819 849 if action in ["create", "update"] and record: 820 850 record_type = record.get('$type') 821 851 ··· 891 921 892 922 elif action == "delete": 893 923 await self.process_delete(conn, uri, collection) 894 - 895 - except Exception as e: 896 - logger.error(f"Error processing {action} {uri}: {e}") 897 - continue 924 + 925 + except Exception as e: 926 + # Log error and continue with next operation 927 + # Transaction will be automatically rolled back 928 + logger.error(f"Error processing {action} {uri}: {e}") 929 + continue 898 930 899 931 self.event_count += 1 900 932 if self.event_count % 1000 == 0:
+23 -16
python-firehose/unified_worker.py
··· 677 677 return ref if ref != 'undefined' else None 678 678 if hasattr(ref, '$link'): 679 679 link = ref.get('$link') if isinstance(ref, dict) else getattr(ref, '$link', None) 680 - return link if link != 'undefined' else None 681 - if hasattr(ref, 'toString'): 680 + if link and link != 'undefined': 681 + return str(link) if not isinstance(link, str) else link 682 + # Convert ref to string if it's a CID object 683 + if ref and ref != 'undefined': 682 684 return str(ref) 683 685 684 686 if hasattr(blob, 'cid'): 685 - return blob.cid if blob.cid != 'undefined' else None 687 + cid = blob.cid 688 + if cid and cid != 'undefined': 689 + return str(cid) if not isinstance(cid, str) else cid 686 690 687 691 return None 688 692 ··· 1853 1857 1854 1858 # Acquire database connection 1855 1859 async with self.db.acquire() as conn: 1856 - # Process operations in a transaction 1857 - async with conn.transaction(): 1858 - for op in commit.ops: 1859 - action = op.action 1860 - path = op.path 1861 - collection = path.split("/")[0] 1862 - uri = f"at://{repo}/{path}" 1863 - 1864 - try: 1860 + # Process each operation in its own transaction to prevent 1861 + # errors in one operation from aborting subsequent operations 1862 + for op in commit.ops: 1863 + action = op.action 1864 + path = op.path 1865 + collection = path.split("/")[0] 1866 + uri = f"at://{repo}/{path}" 1867 + 1868 + try: 1869 + async with conn.transaction(): 1865 1870 if action in ["create", "update"]: 1866 1871 # Extract record from CAR blocks 1867 1872 if not car or not hasattr(op, 'cid') or not op.cid: ··· 1960 1965 1961 1966 elif action == "delete": 1962 1967 await self.process_delete(conn, uri, collection) 1963 - 1964 - except Exception as e: 1965 - logger.error(f"Error processing {action} {uri}: {e}") 1966 - continue 1968 + 1969 + except Exception as e: 1970 + # Log error and continue with next operation 1971 + # Transaction will be automatically rolled back 1972 + logger.error(f"Error processing {action} {uri}: {e}") 1973 + continue 1967 1974 1968 1975 self.event_count += 1 1969 1976 if self.event_count % 1000 == 0: