tangled
alpha
login
or
join now
hailey.at
/
followgraph
0
fork
atom
this repo has no description
0
fork
atom
overview
issues
pulls
pipelines
small indexer tweaks
hailey.at
2 months ago
5d2cf6d5
d06a19ed
+75
-45
1 changed file
expand all
collapse all
unified
split
indexer.py
+75
-45
indexer.py
···
216
216
bootstrap_servers: List[str],
217
217
input_topic: str,
218
218
group_id: str,
219
219
+
max_concurrent_tasks: int = 100,
219
220
):
220
221
self.indexer = indexer
221
222
self.bootstrap_servers = bootstrap_servers
222
223
self.input_topic = input_topic
223
224
self.group_id = group_id
225
225
+
self.max_concurrent_tasks = max_concurrent_tasks
224
226
self.consumer: Optional[AIOKafkaConsumer] = None
225
227
self._flush_task: Optional[asyncio.Task[Any]] = None
228
228
+
self._semaphore: Optional[asyncio.Semaphore] = None
229
229
+
self._shutdown_event: Optional[asyncio.Event] = None
226
230
227
231
async def stop(self):
232
232
+
if self._shutdown_event:
233
233
+
self._shutdown_event.set()
234
234
+
228
235
if self._flush_task:
229
236
self._flush_task.cancel()
230
237
try:
···
282
289
finally:
283
290
prom_metrics.events_handled.labels(kind=kind, status=status).inc()
284
291
292
292
+
async def _handle_event_with_semaphore(self, message: ConsumerRecord[Any, Any]):
293
293
+
assert self._semaphore is not None
294
294
+
async with self._semaphore:
295
295
+
await self._handle_event(message)
296
296
+
285
297
async def run(self):
298
298
+
self._semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
299
299
+
self._shutdown_event = asyncio.Event()
300
300
+
286
301
self.consumer = AIOKafkaConsumer(
287
302
self.input_topic,
288
303
bootstrap_servers=",".join(self.bootstrap_servers),
···
296
311
)
297
312
await self.consumer.start()
298
313
logger.info(
299
299
-
f"Started Kafak consumer for topic: {self.bootstrap_servers}, {self.input_topic}"
314
314
+
f"Started Kafka consumer for topic: {self.bootstrap_servers}, {self.input_topic}"
300
315
)
301
316
302
302
-
if not self.consumer:
303
303
-
raise RuntimeError("Consumer not started, call start() first.")
317
317
+
self._flush_task = asyncio.create_task(self._periodic_flush())
318
318
+
319
319
+
pending_tasks: set[asyncio.Task[Any]] = set()
304
320
305
321
try:
306
322
async for message in self.consumer:
307
307
-
asyncio.ensure_future(self._handle_event(message))
308
323
prom_metrics.events_received.inc()
324
324
+
325
325
+
task = asyncio.create_task(self._handle_event_with_semaphore(message))
326
326
+
pending_tasks.add(task)
327
327
+
task.add_done_callback(pending_tasks.discard)
328
328
+
329
329
+
if len(pending_tasks) >= self.max_concurrent_tasks * 2:
330
330
+
done, pending_tasks_set = await asyncio.wait(
331
331
+
pending_tasks, timeout=0, return_when=asyncio.FIRST_COMPLETED
332
332
+
)
333
333
+
pending_tasks = pending_tasks_set
334
334
+
for t in done:
335
335
+
if t.exception():
336
336
+
logger.error(f"Task failed with exception: {t.exception()}")
337
337
+
309
338
except Exception as e:
310
339
logger.error(f"Error consuming messages: {e}")
311
340
raise
312
341
finally:
342
342
+
if pending_tasks:
343
343
+
logger.info(
344
344
+
f"Waiting for {len(pending_tasks)} pending tasks to complete..."
345
345
+
)
346
346
+
await asyncio.gather(*pending_tasks, return_exceptions=True)
313
347
self.indexer.flush_all()
314
348
315
349
316
350
@click.command()
317
317
-
@click.option(
318
318
-
"--ch-host",
319
319
-
)
320
320
-
@click.option(
321
321
-
"--ch-port",
322
322
-
type=int,
323
323
-
)
324
324
-
@click.option(
325
325
-
"--ch-user",
326
326
-
)
327
327
-
@click.option(
328
328
-
"--ch-pass",
329
329
-
)
330
330
-
@click.option(
331
331
-
"--batch-size",
332
332
-
type=int,
333
333
-
)
334
334
-
@click.option(
335
335
-
"--bootstrap-servers",
336
336
-
type=List[str],
337
337
-
)
338
338
-
@click.option(
339
339
-
"--input-topic",
340
340
-
)
341
341
-
@click.option(
342
342
-
"--group-id",
343
343
-
)
344
344
-
@click.option(
345
345
-
"--metrics-host",
346
346
-
)
351
351
+
@click.option("--ch-host")
352
352
+
@click.option("--ch-port", type=int)
353
353
+
@click.option("--ch-user")
354
354
+
@click.option("--ch-pass")
355
355
+
@click.option("--batch-size", type=int)
347
356
@click.option(
348
348
-
"--metrics-port",
349
349
-
type=int,
357
357
+
"--bootstrap-servers", help="Comma-separated list of Kafka bootstrap servers"
350
358
)
359
359
+
@click.option("--input-topic")
360
360
+
@click.option("--group-id")
361
361
+
@click.option("--metrics-host")
362
362
+
@click.option("--metrics-port", type=int)
351
363
def main(
352
364
ch_host: Optional[str],
353
365
ch_port: Optional[int],
354
366
ch_user: Optional[str],
355
367
ch_pass: Optional[str],
356
368
batch_size: Optional[int],
357
357
-
bootstrap_servers: Optional[List[str]],
369
369
+
bootstrap_servers: Optional[str],
358
370
input_topic: Optional[str],
359
371
group_id: Optional[str],
360
372
metrics_host: Optional[str],
···
374
386
)
375
387
indexer.init_schema()
376
388
389
389
+
kafka_servers = (
390
390
+
bootstrap_servers.split(",")
391
391
+
if bootstrap_servers
392
392
+
else CONFIG.kafka_bootstrap_servers
393
393
+
)
394
394
+
377
395
consumer = Consumer(
378
396
indexer=indexer,
379
379
-
bootstrap_servers=bootstrap_servers or CONFIG.kafka_bootstrap_servers,
397
397
+
bootstrap_servers=kafka_servers,
380
398
input_topic=input_topic or CONFIG.kafka_input_topic,
381
399
group_id=group_id or CONFIG.kafka_group_id,
382
400
)
383
401
384
384
-
try:
385
385
-
asyncio.run(consumer.run())
386
386
-
except KeyboardInterrupt:
387
387
-
logger.info("Shutting down...")
388
388
-
finally:
389
389
-
asyncio.run(consumer.stop())
402
402
+
async def run_with_shutdown():
403
403
+
loop = asyncio.get_event_loop()
404
404
+
405
405
+
import signal
390
406
391
391
-
pass
407
407
+
def handle_signal():
408
408
+
logger.info("Received shutdown signal...")
409
409
+
asyncio.create_task(consumer.stop())
410
410
+
411
411
+
for sig in (signal.SIGTERM, signal.SIGINT):
412
412
+
loop.add_signal_handler(sig, handle_signal)
413
413
+
414
414
+
try:
415
415
+
await consumer.run()
416
416
+
except asyncio.CancelledError:
417
417
+
pass
418
418
+
finally:
419
419
+
await consumer.stop()
420
420
+
421
421
+
asyncio.run(run_with_shutdown())
392
422
393
423
394
424
if __name__ == "__main__":