tangled
alpha
login
or
join now
hailey.at
/
skyembed
1
fork
atom
A lil service that creates embeddings of posts, profiles, and avatars to store them in Qdrant
1
fork
atom
overview
issues
pulls
pipelines
add dockerfile
hailey.at
2 months ago
1e6881a2
ecd229ed
+122
-1
5 changed files
expand all
collapse all
unified
split
.dockerignore
Dockerfile
config.py
docker-compose.yaml
main.py
+53
.dockerignore
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
# Python-generated files
2
+
__pycache__/
3
+
*.py[oc]
4
+
*.pyo
5
+
*.pyd
6
+
.Python
7
+
build/
8
+
dist/
9
+
wheels/
10
+
*.egg-info
11
+
*.egg
12
+
13
+
# Virtual environments
14
+
.venv/
15
+
venv/
16
+
ENV/
17
+
env/
18
+
19
+
# Environment files
20
+
.env
21
+
.env.*
22
+
23
+
# Git files
24
+
.git/
25
+
.gitignore
26
+
.gitattributes
27
+
28
+
# IDE files
29
+
.vscode/
30
+
.idea/
31
+
*.swp
32
+
*.swo
33
+
*~
34
+
35
+
# Documentation
36
+
README.md
37
+
*.md
38
+
LICENSE
39
+
40
+
# CI/CD
41
+
.github/
42
+
.gitlab-ci.yml
43
+
44
+
# Testing
45
+
.pytest_cache/
46
+
.coverage
47
+
htmlcov/
48
+
*.cover
49
+
.hypothesis/
50
+
51
+
# Other
52
+
.DS_Store
53
+
Thumbs.db
+39
Dockerfile
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
2
+
3
+
ENV DEBIAN_FRONTEND=noninteractive
4
+
ENV TZ=UTC
5
+
ENV PYTHONUNBUFFERED=1
6
+
7
+
RUN apt-get update && apt-get install -y --no-install-recommends \
8
+
dumb-init \
9
+
curl \
10
+
build-essential \
11
+
libsnappy-dev \
12
+
zlib1g-dev \
13
+
ca-certificates \
14
+
&& apt-get clean \
15
+
&& rm -rf /var/lib/apt/lists/*
16
+
17
+
ADD https://astral.sh/uv/install.sh /uv-installer.sh
18
+
19
+
RUN sh /uv-installer.sh && rm /uv-installer.sh
20
+
21
+
ENV PATH="/root/.local/bin/:$PATH"
22
+
23
+
WORKDIR /app
24
+
25
+
COPY pyproject.toml uv.lock ./
26
+
27
+
RUN uv sync --frozen --no-dev
28
+
29
+
COPY *.py ./
30
+
31
+
ENV METRICS_PORT=6009
32
+
ENV EMBEDDING_DEVICE=cuda
33
+
ENV NVIDIA_VISIBLE_DEVICES=all
34
+
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
35
+
36
+
EXPOSE 6009
37
+
38
+
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
39
+
CMD ["uv", "run", "main.py"]
+3
config.py
···
18
19
retina_host: str = "http://localhost:7075"
20
0
0
0
21
model_config = SettingsConfigDict(
22
env_file=".env",
23
env_file_encoding="utf-8",
···
18
19
retina_host: str = "http://localhost:7075"
20
21
+
metrics_port: int = 8500
22
+
metrics_host: str = "0.0.0.0"
23
+
24
model_config = SettingsConfigDict(
25
env_file=".env",
26
env_file_encoding="utf-8",
+26
docker-compose.yaml
···
16
- "7075:8080"
17
restart: unless-stopped
18
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
19
volumes:
20
qdrant_storage:
···
16
- "7075:8080"
17
restart: unless-stopped
18
19
+
skyembed:
20
+
build: .
21
+
ports:
22
+
- "6009:6009"
23
+
environment:
24
+
- QDRANT_URL=http://qdrant:6333
25
+
- RETINA_HOST=http://retina:8080
26
+
- METRICS_PORT=6009
27
+
- METRICS_HOST=0.0.0.0
28
+
- EMBEDDING_DEVICE=cuda
29
+
- KAFKA_BOOTSTRAP_SERVERS=${KAFKA_BOOTSTRAP_SERVERS:-localhost:9092}
30
+
- KAFKA_TOPIC=${KAFKA_TOPIC:-firehose_events}
31
+
- KAFKA_GROUP_ID=${KAFKA_GROUP_ID:-skyembed}
32
+
- NVIDIA_VISIBLE_DEVICES=all
33
+
deploy:
34
+
resources:
35
+
reservations:
36
+
devices:
37
+
- driver: nvidia
38
+
count: all
39
+
capabilities: [gpu]
40
+
depends_on:
41
+
- qdrant
42
+
- retina
43
+
restart: unless-stopped
44
+
45
volumes:
46
qdrant_storage:
+1
-1
main.py
···
94
signal.signal(signal.SIGINT, signal_handler)
95
signal.signal(signal.SIGTERM, signal_handler)
96
97
-
prom_metrics.start_http(8500, "0.0.0.0")
98
99
EMBEDDING_SERVICE.initialize()
100
···
94
signal.signal(signal.SIGINT, signal_handler)
95
signal.signal(signal.SIGTERM, signal_handler)
96
97
+
prom_metrics.start_http(CONFIG.metrics_port, CONFIG.metrics_host)
98
99
EMBEDDING_SERVICE.initialize()
100