tangled
alpha
login
or
join now
hailey.at
/
skyembed
1
fork
atom
A lil service that creates embeddings of posts, profiles, and avatars to store them in Qdrant
1
fork
atom
overview
issues
pulls
pipelines
add dockerfile
hailey.at
2 months ago
1e6881a2
ecd229ed
+122
-1
5 changed files
expand all
collapse all
unified
split
.dockerignore
Dockerfile
config.py
docker-compose.yaml
main.py
+53
.dockerignore
···
1
1
+
# Python-generated files
2
2
+
__pycache__/
3
3
+
*.py[oc]
4
4
+
*.pyo
5
5
+
*.pyd
6
6
+
.Python
7
7
+
build/
8
8
+
dist/
9
9
+
wheels/
10
10
+
*.egg-info
11
11
+
*.egg
12
12
+
13
13
+
# Virtual environments
14
14
+
.venv/
15
15
+
venv/
16
16
+
ENV/
17
17
+
env/
18
18
+
19
19
+
# Environment files
20
20
+
.env
21
21
+
.env.*
22
22
+
23
23
+
# Git files
24
24
+
.git/
25
25
+
.gitignore
26
26
+
.gitattributes
27
27
+
28
28
+
# IDE files
29
29
+
.vscode/
30
30
+
.idea/
31
31
+
*.swp
32
32
+
*.swo
33
33
+
*~
34
34
+
35
35
+
# Documentation
36
36
+
README.md
37
37
+
*.md
38
38
+
LICENSE
39
39
+
40
40
+
# CI/CD
41
41
+
.github/
42
42
+
.gitlab-ci.yml
43
43
+
44
44
+
# Testing
45
45
+
.pytest_cache/
46
46
+
.coverage
47
47
+
htmlcov/
48
48
+
*.cover
49
49
+
.hypothesis/
50
50
+
51
51
+
# Other
52
52
+
.DS_Store
53
53
+
Thumbs.db
+39
Dockerfile
···
1
1
+
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
2
2
+
3
3
+
ENV DEBIAN_FRONTEND=noninteractive
4
4
+
ENV TZ=UTC
5
5
+
ENV PYTHONUNBUFFERED=1
6
6
+
7
7
+
RUN apt-get update && apt-get install -y --no-install-recommends \
8
8
+
dumb-init \
9
9
+
curl \
10
10
+
build-essential \
11
11
+
libsnappy-dev \
12
12
+
zlib1g-dev \
13
13
+
ca-certificates \
14
14
+
&& apt-get clean \
15
15
+
&& rm -rf /var/lib/apt/lists/*
16
16
+
17
17
+
ADD https://astral.sh/uv/install.sh /uv-installer.sh
18
18
+
19
19
+
RUN sh /uv-installer.sh && rm /uv-installer.sh
20
20
+
21
21
+
ENV PATH="/root/.local/bin/:$PATH"
22
22
+
23
23
+
WORKDIR /app
24
24
+
25
25
+
COPY pyproject.toml uv.lock ./
26
26
+
27
27
+
RUN uv sync --frozen --no-dev
28
28
+
29
29
+
COPY *.py ./
30
30
+
31
31
+
ENV METRICS_PORT=6009
32
32
+
ENV EMBEDDING_DEVICE=cuda
33
33
+
ENV NVIDIA_VISIBLE_DEVICES=all
34
34
+
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
35
35
+
36
36
+
EXPOSE 6009
37
37
+
38
38
+
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
39
39
+
CMD ["uv", "run", "main.py"]
+3
config.py
···
18
18
19
19
retina_host: str = "http://localhost:7075"
20
20
21
21
+
metrics_port: int = 8500
22
22
+
metrics_host: str = "0.0.0.0"
23
23
+
21
24
model_config = SettingsConfigDict(
22
25
env_file=".env",
23
26
env_file_encoding="utf-8",
+26
docker-compose.yaml
···
16
16
- "7075:8080"
17
17
restart: unless-stopped
18
18
19
19
+
skyembed:
20
20
+
build: .
21
21
+
ports:
22
22
+
- "6009:6009"
23
23
+
environment:
24
24
+
- QDRANT_URL=http://qdrant:6333
25
25
+
- RETINA_HOST=http://retina:8080
26
26
+
- METRICS_PORT=6009
27
27
+
- METRICS_HOST=0.0.0.0
28
28
+
- EMBEDDING_DEVICE=cuda
29
29
+
- KAFKA_BOOTSTRAP_SERVERS=${KAFKA_BOOTSTRAP_SERVERS:-localhost:9092}
30
30
+
- KAFKA_TOPIC=${KAFKA_TOPIC:-firehose_events}
31
31
+
- KAFKA_GROUP_ID=${KAFKA_GROUP_ID:-skyembed}
32
32
+
- NVIDIA_VISIBLE_DEVICES=all
33
33
+
deploy:
34
34
+
resources:
35
35
+
reservations:
36
36
+
devices:
37
37
+
- driver: nvidia
38
38
+
count: all
39
39
+
capabilities: [gpu]
40
40
+
depends_on:
41
41
+
- qdrant
42
42
+
- retina
43
43
+
restart: unless-stopped
44
44
+
19
45
volumes:
20
46
qdrant_storage:
+1
-1
main.py
···
94
94
signal.signal(signal.SIGINT, signal_handler)
95
95
signal.signal(signal.SIGTERM, signal_handler)
96
96
97
97
-
prom_metrics.start_http(8500, "0.0.0.0")
97
97
+
prom_metrics.start_http(CONFIG.metrics_port, CONFIG.metrics_host)
98
98
99
99
EMBEDDING_SERVICE.initialize()
100
100