tangled
alpha
login
or
join now
hailey.at
/
followgraph
0
fork
atom
this repo has no description
0
fork
atom
overview
issues
pulls
pipelines
bunch of fixes
hailey.at
2 months ago
01fec516
345ef963
+23
-32
4 changed files
expand all
collapse all
unified
split
Dockerfile
config.py
docker-compose.yaml
indexer.py
-7
Dockerfile
···
22
23
COPY *.py ./
24
25
-
ENV METRICS_PORT=6009
26
-
ENV EMBEDDING_DEVICE=cuda
27
-
ENV NVIDIA_VISIBLE_DEVICES=all
28
-
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
29
-
30
-
EXPOSE 6009
31
-
32
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
33
CMD ["uv", "run", "main.py"]
···
22
23
COPY *.py ./
24
0
0
0
0
0
0
0
25
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
26
CMD ["uv", "run", "main.py"]
+4
-4
config.py
···
4
5
6
class Config(BaseSettings):
7
-
ch_host: str = "localhost"
8
-
ch_port: int = 8123
9
-
ch_user: str = "default"
10
-
ch_pass: str = "clickhouse"
11
12
batch_size: int = 1000
13
···
4
5
6
class Config(BaseSettings):
7
+
clickhouse_host: str = "localhost"
8
+
clickhouse_port: int = 8123
9
+
clickhouse_user: str = "default"
10
+
clickhouse_pass: str = "clickhouse"
11
12
batch_size: int = 1000
13
+9
-8
docker-compose.yaml
···
30
hostname: zookeeper
31
container_name: zookeeper
32
ports:
33
-
- "2181:2181"
34
environment:
35
ZOOKEEPER_CLIENT_PORT: 2181
36
ZOOKEEPER_TICK_TIME: 2000
···
45
depends_on:
46
- zookeeper
47
ports:
48
-
- "9092:9092"
49
-
- "9101:9101"
50
environment:
51
KAFKA_BROKER_ID: 1
52
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
···
78
kafka:
79
condition: service_healthy
80
ports:
81
-
- "2480:2480"
82
-
- "6010:6010"
83
environment:
84
TAP_BIND: ":2480"
85
TAP_FULL_NETWORK: true
86
TAP_DISABLE_ACKS: false
87
TAP_COLLECTION_FILTERS: "app.bsky.graph.follow"
88
TAP_METRICS_LISTEN: ":6010"
0
89
volumes:
90
- tap-data:/data
91
restart: unless-stopped
···
99
condition: service_healthy
100
ports:
101
# metrics port
102
-
- "6011:6009"
103
command: ["tap-mode"]
104
environment:
105
ATKAFKA_TAP_HOST: "ws://tap:2480"
···
111
indexer:
112
build: .
113
ports:
114
-
- "6012:6009"
115
depends_on:
116
clickhouse:
117
condition: service_healthy
···
119
condition: service_healthy
120
command: ["uv", "run", "indexer.py"]
121
environment:
122
-
KAFKA_BOOTSTRAP_SERVERS: "localhost:9092"
123
KAFKA_INPUT_TOPIC: "tap-events"
124
CLICKHOUSE_HOST: "clickhouse"
125
restart: unless-stopped
···
30
hostname: zookeeper
31
container_name: zookeeper
32
ports:
33
+
- "127.0.0.1:2181:2181"
34
environment:
35
ZOOKEEPER_CLIENT_PORT: 2181
36
ZOOKEEPER_TICK_TIME: 2000
···
45
depends_on:
46
- zookeeper
47
ports:
48
+
- "127.0.0.1:9092:9092"
49
+
- "127.0.0.1:9101:9101"
50
environment:
51
KAFKA_BROKER_ID: 1
52
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
···
78
kafka:
79
condition: service_healthy
80
ports:
81
+
- "127.0.0.1:2480:2480"
82
+
- "127.0.0.1:6010:6010"
83
environment:
84
TAP_BIND: ":2480"
85
TAP_FULL_NETWORK: true
86
TAP_DISABLE_ACKS: false
87
TAP_COLLECTION_FILTERS: "app.bsky.graph.follow"
88
TAP_METRICS_LISTEN: ":6010"
89
+
TAP_RESYNC_PARALLELISM: 50
90
volumes:
91
- tap-data:/data
92
restart: unless-stopped
···
100
condition: service_healthy
101
ports:
102
# metrics port
103
+
- "127.0.0.1:6011:6009"
104
command: ["tap-mode"]
105
environment:
106
ATKAFKA_TAP_HOST: "ws://tap:2480"
···
112
indexer:
113
build: .
114
ports:
115
+
- "127.0.0.1:8050:8050"
116
depends_on:
117
clickhouse:
118
condition: service_healthy
···
120
condition: service_healthy
121
command: ["uv", "run", "indexer.py"]
122
environment:
123
+
KAFKA_BOOTSTRAP_SERVERS: '["kafka:29092"]'
124
KAFKA_INPUT_TOPIC: "tap-events"
125
CLICKHOUSE_HOST: "clickhouse"
126
restart: unless-stopped
+10
-13
indexer.py
···
118
column_names=["did", "subject", "uri", "created_at"],
119
)
120
0
0
0
0
0
0
121
status = "ok"
122
except Exception as e:
123
# TODO: handle errors gracefully
···
231
232
if op.action == "update":
233
kind = "update"
234
-
status = "ok"
235
-
return
236
elif op.action == "create":
237
kind = "create"
238
-
239
rec = FollowRecord.model_validate(op.record)
240
created_at = isoparse(rec.created_at)
241
-
242
follow = Follow(
243
uri=op.uri, did=evt.did, subject=rec.subject, created_at=created_at
244
)
245
-
246
self.indexer.insert_follow(follow)
247
else:
248
kind = "delete"
249
-
250
-
kind = "delete"
251
-
252
unfollow = Unfollow(uri=op.uri, created_at=datetime.now())
253
-
254
self.indexer.insert_unfollow(unfollow)
255
256
status = "ok"
···
343
)
344
345
indexer = FollowIndexer(
346
-
clickhouse_host=ch_host or CONFIG.ch_host,
347
-
clickhouse_port=ch_port or CONFIG.ch_port,
348
-
clickhouse_user=ch_user or CONFIG.ch_user,
349
-
clickhouse_pass=ch_pass or CONFIG.ch_pass,
350
batch_size=batch_size or CONFIG.batch_size,
351
)
352
indexer.init_schema()
···
118
column_names=["did", "subject", "uri", "created_at"],
119
)
120
121
+
self.client.insert(
122
+
"follows_reverse",
123
+
follows_data,
124
+
column_names=["did", "subject", "uri", "created_at"],
125
+
)
126
+
127
status = "ok"
128
except Exception as e:
129
# TODO: handle errors gracefully
···
237
238
if op.action == "update":
239
kind = "update"
0
0
240
elif op.action == "create":
241
kind = "create"
0
242
rec = FollowRecord.model_validate(op.record)
243
created_at = isoparse(rec.created_at)
0
244
follow = Follow(
245
uri=op.uri, did=evt.did, subject=rec.subject, created_at=created_at
246
)
0
247
self.indexer.insert_follow(follow)
248
else:
249
kind = "delete"
0
0
0
250
unfollow = Unfollow(uri=op.uri, created_at=datetime.now())
0
251
self.indexer.insert_unfollow(unfollow)
252
253
status = "ok"
···
340
)
341
342
indexer = FollowIndexer(
343
+
clickhouse_host=ch_host or CONFIG.clickhouse_host,
344
+
clickhouse_port=ch_port or CONFIG.clickhouse_port,
345
+
clickhouse_user=ch_user or CONFIG.clickhouse_user,
346
+
clickhouse_pass=ch_pass or CONFIG.clickhouse_pass,
347
batch_size=batch_size or CONFIG.batch_size,
348
)
349
indexer.init_schema()