this repo has no description

bunch of fixes

+23 -32
-7
Dockerfile
··· 22 23 COPY *.py ./ 24 25 - ENV METRICS_PORT=6009 26 - ENV EMBEDDING_DEVICE=cuda 27 - ENV NVIDIA_VISIBLE_DEVICES=all 28 - ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility 29 - 30 - EXPOSE 6009 31 - 32 ENTRYPOINT ["/usr/bin/dumb-init", "--"] 33 CMD ["uv", "run", "main.py"]
··· 22 23 COPY *.py ./ 24 25 ENTRYPOINT ["/usr/bin/dumb-init", "--"] 26 CMD ["uv", "run", "main.py"]
+4 -4
config.py
··· 4 5 6 class Config(BaseSettings): 7 - ch_host: str = "localhost" 8 - ch_port: int = 8123 9 - ch_user: str = "default" 10 - ch_pass: str = "clickhouse" 11 12 batch_size: int = 1000 13
··· 4 5 6 class Config(BaseSettings): 7 + clickhouse_host: str = "localhost" 8 + clickhouse_port: int = 8123 9 + clickhouse_user: str = "default" 10 + clickhouse_pass: str = "clickhouse" 11 12 batch_size: int = 1000 13
+9 -8
docker-compose.yaml
··· 30 hostname: zookeeper 31 container_name: zookeeper 32 ports: 33 - - "2181:2181" 34 environment: 35 ZOOKEEPER_CLIENT_PORT: 2181 36 ZOOKEEPER_TICK_TIME: 2000 ··· 45 depends_on: 46 - zookeeper 47 ports: 48 - - "9092:9092" 49 - - "9101:9101" 50 environment: 51 KAFKA_BROKER_ID: 1 52 KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' ··· 78 kafka: 79 condition: service_healthy 80 ports: 81 - - "2480:2480" 82 - - "6010:6010" 83 environment: 84 TAP_BIND: ":2480" 85 TAP_FULL_NETWORK: true 86 TAP_DISABLE_ACKS: false 87 TAP_COLLECTION_FILTERS: "app.bsky.graph.follow" 88 TAP_METRICS_LISTEN: ":6010" 89 volumes: 90 - tap-data:/data 91 restart: unless-stopped ··· 99 condition: service_healthy 100 ports: 101 # metrics port 102 - - "6011:6009" 103 command: ["tap-mode"] 104 environment: 105 ATKAFKA_TAP_HOST: "ws://tap:2480" ··· 111 indexer: 112 build: . 113 ports: 114 - - "6012:6009" 115 depends_on: 116 clickhouse: 117 condition: service_healthy ··· 119 condition: service_healthy 120 command: ["uv", "run", "indexer.py"] 121 environment: 122 - KAFKA_BOOTSTRAP_SERVERS: "localhost:9092" 123 KAFKA_INPUT_TOPIC: "tap-events" 124 CLICKHOUSE_HOST: "clickhouse" 125 restart: unless-stopped
··· 30 hostname: zookeeper 31 container_name: zookeeper 32 ports: 33 + - "127.0.0.1:2181:2181" 34 environment: 35 ZOOKEEPER_CLIENT_PORT: 2181 36 ZOOKEEPER_TICK_TIME: 2000 ··· 45 depends_on: 46 - zookeeper 47 ports: 48 + - "127.0.0.1:9092:9092" 49 + - "127.0.0.1:9101:9101" 50 environment: 51 KAFKA_BROKER_ID: 1 52 KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' ··· 78 kafka: 79 condition: service_healthy 80 ports: 81 + - "127.0.0.1:2480:2480" 82 + - "127.0.0.1:6010:6010" 83 environment: 84 TAP_BIND: ":2480" 85 TAP_FULL_NETWORK: true 86 TAP_DISABLE_ACKS: false 87 TAP_COLLECTION_FILTERS: "app.bsky.graph.follow" 88 TAP_METRICS_LISTEN: ":6010" 89 + TAP_RESYNC_PARALLELISM: 50 90 volumes: 91 - tap-data:/data 92 restart: unless-stopped ··· 100 condition: service_healthy 101 ports: 102 # metrics port 103 + - "127.0.0.1:6011:6009" 104 command: ["tap-mode"] 105 environment: 106 ATKAFKA_TAP_HOST: "ws://tap:2480" ··· 112 indexer: 113 build: . 114 ports: 115 + - "127.0.0.1:8050:8050" 116 depends_on: 117 clickhouse: 118 condition: service_healthy ··· 120 condition: service_healthy 121 command: ["uv", "run", "indexer.py"] 122 environment: 123 + KAFKA_BOOTSTRAP_SERVERS: '["kafka:29092"]' 124 KAFKA_INPUT_TOPIC: "tap-events" 125 CLICKHOUSE_HOST: "clickhouse" 126 restart: unless-stopped
+10 -13
indexer.py
··· 118 column_names=["did", "subject", "uri", "created_at"], 119 ) 120 121 status = "ok" 122 except Exception as e: 123 # TODO: handle errors gracefully ··· 231 232 if op.action == "update": 233 kind = "update" 234 - status = "ok" 235 - return 236 elif op.action == "create": 237 kind = "create" 238 - 239 rec = FollowRecord.model_validate(op.record) 240 created_at = isoparse(rec.created_at) 241 - 242 follow = Follow( 243 uri=op.uri, did=evt.did, subject=rec.subject, created_at=created_at 244 ) 245 - 246 self.indexer.insert_follow(follow) 247 else: 248 kind = "delete" 249 - 250 - kind = "delete" 251 - 252 unfollow = Unfollow(uri=op.uri, created_at=datetime.now()) 253 - 254 self.indexer.insert_unfollow(unfollow) 255 256 status = "ok" ··· 343 ) 344 345 indexer = FollowIndexer( 346 - clickhouse_host=ch_host or CONFIG.ch_host, 347 - clickhouse_port=ch_port or CONFIG.ch_port, 348 - clickhouse_user=ch_user or CONFIG.ch_user, 349 - clickhouse_pass=ch_pass or CONFIG.ch_pass, 350 batch_size=batch_size or CONFIG.batch_size, 351 ) 352 indexer.init_schema()
··· 118 column_names=["did", "subject", "uri", "created_at"], 119 ) 120 121 + self.client.insert( 122 + "follows_reverse", 123 + follows_data, 124 + column_names=["did", "subject", "uri", "created_at"], 125 + ) 126 + 127 status = "ok" 128 except Exception as e: 129 # TODO: handle errors gracefully ··· 237 238 if op.action == "update": 239 kind = "update" 240 elif op.action == "create": 241 kind = "create" 242 rec = FollowRecord.model_validate(op.record) 243 created_at = isoparse(rec.created_at) 244 follow = Follow( 245 uri=op.uri, did=evt.did, subject=rec.subject, created_at=created_at 246 ) 247 self.indexer.insert_follow(follow) 248 else: 249 kind = "delete" 250 unfollow = Unfollow(uri=op.uri, created_at=datetime.now()) 251 self.indexer.insert_unfollow(unfollow) 252 253 status = "ok" ··· 340 ) 341 342 indexer = FollowIndexer( 343 + clickhouse_host=ch_host or CONFIG.clickhouse_host, 344 + clickhouse_port=ch_port or CONFIG.clickhouse_port, 345 + clickhouse_user=ch_user or CONFIG.clickhouse_user, 346 + clickhouse_pass=ch_pass or CONFIG.clickhouse_pass, 347 batch_size=batch_size or CONFIG.batch_size, 348 ) 349 indexer.init_schema()